DataBase.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. # -*- coding:utf-8 -*-
  2. from contextlib import closing
  3. import pymysql
  4. import pymongo
  5. from DataMaker import DataMaker
  6. import json
  7. import time
  8. import warnings
  9. dm =DataMaker({})
  10. class CouldDB:
  11. def __init__(self):
  12. self.DB_HOST = '10.19.48.64'
  13. self.DB_USER = 'zsy'
  14. self.DB_PASSWORD = 'Hbt3sZNxepnZQNPU'
  15. self.DB_NAME = 'zsy_tk2'
  16. def __call__(self, sql):
  17. '''fetchone is much faster than fetchall'''
  18. with closing(pymysql.connect(host=self.DB_HOST,
  19. user=self.DB_USER,
  20. port=3307,
  21. password=self.DB_PASSWORD,
  22. db=self.DB_NAME,
  23. charset='utf8',
  24. connect_timeout=2,
  25. cursorclass=pymysql.cursors.DictCursor
  26. )) as connection:
  27. with connection.cursor() as cursor:
  28. cursor.execute(sql)
  29. result = cursor.fetchall()
  30. return result
  31. def execute(self, sql):
  32. '''fetchone is much faster than fetchall'''
  33. with closing(pymysql.connect(host=self.DB_HOST,
  34. user=self.DB_USER,
  35. port=3307,
  36. password=self.DB_PASSWORD,
  37. db=self.DB_NAME,
  38. charset='utf8',
  39. cursorclass=pymysql.cursors.DictCursor
  40. )) as connection:
  41. with connection.cursor() as cursor:
  42. cursor.execute(sql)
  43. result = cursor.fetchall()
  44. return result
  45. def execute_commit(self, sql):
  46. '''fetchone is much faster than fetchall'''
  47. with closing(pymysql.connect(host=self.DB_HOST,
  48. user=self.DB_USER,
  49. port=3307,
  50. password=self.DB_PASSWORD,
  51. db=self.DB_NAME,
  52. charset='utf8mb4',
  53. cursorclass=pymysql.cursors.DictCursor
  54. )) as connection:
  55. with connection.cursor() as cursor:
  56. cursor.execute(sql)
  57. connection.commit()
  58. return 1
  59. def get_topic(self, topic_no):
  60. topic = {}
  61. topic['all_content'] = []
  62. topic_info = self.execute(
  63. 'SELECT topic_id,content,answer,topic_type_id,status,source_type,grade,subject_id,is_audit FROM topic WHERE topic_id=%d;' % (
  64. int(topic_no)))
  65. slaves = self.execute(
  66. 'SELECT content,answer,topic_slave_id FROM topic_slave WHERE topic_id=%d;' % (int(topic_no)))
  67. if len(topic_info) == 1:
  68. topic = topic_info[0]
  69. topic['all_content'] = []
  70. topic['all_content'].append(topic['content'])
  71. if len(topic_info) == 1 and len(slaves) == 0:
  72. # option = self.execute(
  73. # 'SELECT content FROM topic_option WHERE topic_id=%d and is_true=1;' % (int(topic_no)))
  74. option = self.execute(
  75. 'SELECT content FROM topic_option WHERE topic_id=%d;' % (int(topic_no)))
  76. topic['all_content'].extend([i['content'] for i in option])
  77. topic['option'] = [i for i in option]
  78. if len(topic_info) == 1 and len(slaves):
  79. topic['slave'] = []
  80. for slave in slaves:
  81. topic_slave_id = slave['topic_slave_id']
  82. if 'content' in slave:
  83. topic['all_content'].append(slave['content'])
  84. slave['option'] = self.execute(
  85. 'SELECT content,is_true FROM topic_option WHERE topic_slave_id=%d;' % (int(topic_slave_id)))
  86. topic['all_content'].extend([i['content'] for i in slave['option']])
  87. topic['slave'].append(slave)
  88. dm = DataMaker(topic)
  89. # print(dm.get_all_content())
  90. topic['all_content'] = ' '.join([dm._replacer(i) for i in topic['all_content'] if i != ''])
  91. topic['checked'] = 0
  92. topic['type'] = 0
  93. return topic
  94. def get_essay(self, topic_no):
  95. topic = {}
  96. topic_info = self.execute(
  97. 'SELECT essay_id,content,title FROM essay WHERE essay_id=%d;' % (
  98. int(topic_no)))
  99. if len(topic_info) == 1:
  100. topic_info[0]['is_essay'] = 1
  101. topic_info[0]['topic_id'] = topic_info[0]['essay_id']
  102. topic_info[0]['status'] = 1
  103. topic_info[0]['topic_type_id'] = 'essay'
  104. topic_info[0]['type'] = 2
  105. topic_info[0]['checked'] = 0
  106. topic_info[0]['all_content'] = dm._replacer(topic_info[0]['content'])
  107. return topic_info[0]
  108. return topic
  109. def get_work_topic(self, topic_no):
  110. dic = {'单项填空': 1, '完形填空': 2, '阅读理解': 3, '任务型阅读': 4, '七选五': 5, '语法填空': 6, '短文改错': 7, '单词拼写': 8, '选词填空': 9,
  111. '课文填空': 10, '句子翻译': 11, '完成句子': 12, '书面表达': 13, '阅读表达': 14, '考试听力': 15, '概要写作': 16, '读后续写': 17,
  112. '单句改错': 18, '单句语法填空': 19, '单句翻译': 20}
  113. topic = {}
  114. topic_info = self.execute(
  115. 'SELECT topic_id,content,status FROM work_topic WHERE id=%d and status=1 and topic_id=0;' % (
  116. int(topic_no)))
  117. if len(topic_info) == 1:
  118. topic = {}
  119. topic_1 = topic_info[0]
  120. # status, source_type, grade, subject_id, is_audit
  121. topic_info = json.loads(topic_1['content'])
  122. topic_info = topic_info['mjson']['items'][0]
  123. topic['content'] = topic_info['content']
  124. all_content = []
  125. all_content.append(topic_info['content'])
  126. if 'option' in topic_info:
  127. all_content.extend(topic_info['option'])
  128. if 'slave' in topic_info:
  129. for slave in topic_info['slave']:
  130. if 'content' in slave:
  131. all_content.append(slave['content'])
  132. if 'option' in slave:
  133. all_content.extend(slave['option'])
  134. # dm = DataMaker(topic)
  135. topic['all_content'] = all_content
  136. topic['all_content'] = ' '.join([i for i in topic['all_content'] if i != ''])
  137. topic['checked'] = 0
  138. topic['topic_id'] = topic_no
  139. topic['topic_type_id'] = topic_info['topic_type_id']
  140. topic['status'] = topic_info['status']
  141. topic['type'] = 1
  142. return topic
  143. return {}
  144. class LocalDB:
  145. def __init__(self):
  146. self.db_name = "chachong_en_content"
  147. self._initDB()
  148. def Executor(self):
  149. return self.en_col
  150. def _initDB(self, ):
  151. self.client = pymongo.MongoClient("mongodb://127.0.0.1:27017/")
  152. self.en_db = self.client[self.db_name]
  153. self.en_col = self.en_db["site2"]
  154. def update(self, could_data: dict, exis='unkonw'):
  155. if 'topic_id' in could_data:
  156. myquery = {"topic_id": could_data['topic_id']}
  157. elif 'id' in could_data:
  158. myquery = {"topic_id": could_data['id']}
  159. else:
  160. warnings.warn(str(could_data))
  161. return
  162. if exis == 'exis':
  163. newvalues = {"$set": could_data}
  164. x = self.en_col.update_one(myquery, newvalues)
  165. elif exis == 'not':
  166. val = could_data
  167. if 'content' in val.keys():
  168. x = self.en_col.insert_one(val)
  169. else:
  170. if self.en_col.count(myquery):
  171. newvalues = {"$set": could_data}
  172. x = self.en_col.update_one(myquery, newvalues)
  173. else:
  174. val = could_data
  175. if 'content' in val.keys():
  176. x = self.en_col.insert_one(val)
  177. def finder(self, filter):
  178. return self.en_col.find(filter)
  179. def deleter(self, *args):
  180. return self.en_col.delete_many(*args)
  181. def backups(self, db_name):
  182. self.en_db_bak = self.client[db_name]
  183. self.en_col_bak = self.en_db["site2"]
  184. self.en_col_bak.delete_many({})
  185. for i in self.en_col.find({}):
  186. self.en_col.insert(i)
  187. class DataBase:
  188. def __init__(self, source: CouldDB, saver: LocalDB):
  189. self.source = source
  190. self.saver = saver
  191. def update(self, tipic_id, essay=False, work=False, exis='unknow'):
  192. if essay:
  193. self.saver.update(self.source.get_essay(tipic_id), exis=exis)
  194. elif work:
  195. self.saver.update(self.source.get_work_topic(tipic_id), exis=exis)
  196. else:
  197. self.saver.update(self.source.get_topic(tipic_id), exis=exis)
  198. def update_all(self, bakdel=False, essay=False, work_topic=False, topic=False, exis='unknow'):
  199. from tqdm import tqdm
  200. if bakdel:
  201. self.saver.backups('eng_bak' + time.strftime('%Y-%m-%d', time.localtime(time.time())))
  202. self.saver.deleter({})
  203. if essay:
  204. # TODO 更新美文
  205. all_topic_id = self.source.execute('SELECT essay_id FROM essay;')
  206. for i in tqdm(all_topic_id):
  207. i['topic_id'] = i['essay_id']
  208. self.update(i['topic_id'], essay=True, exis=exis)
  209. if work_topic:
  210. # TODO 更新工作区
  211. all_topic_id = self.source.execute('SELECT id FROM work_topic;')
  212. for i in tqdm(all_topic_id):
  213. try:
  214. with open('1.txt','a') as f:
  215. f.write(str(i)+'\n')
  216. time.sleep(0.05)
  217. self.update(i['id'], work=True)
  218. except Exception as e:
  219. print(e)
  220. if topic:
  221. allready = []
  222. with open('2.txt','r') as f:
  223. for i in f:
  224. allready.append(i.strip())
  225. # TODO 更新所有
  226. all_topic_id = self.source.execute('SELECT topic_id FROM topic;')
  227. for i in tqdm(all_topic_id):
  228. try:
  229. if str(i) not in allready:
  230. with open('2.txt','a') as f:
  231. f.write(str(i)+'\n')
  232. self.update(i['topic_id'])
  233. except:
  234. pass
  235. if __name__ == '__main__':
  236. cdb = CouldDB()
  237. ldb = LocalDB()
  238. # for i in ldb.finder({}).limit(10):
  239. # print(i)
  240. db = DataBase(cdb, ldb)
  241. db.update_all(topic=True,exis='not')