123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289 |
- # -*- coding:utf-8 -*-
- from contextlib import closing
- import pymysql
- import pymongo
- from DataMaker import DataMaker
- import json
- import time
- import warnings
- dm =DataMaker({})
- class CouldDB:
- def __init__(self):
- self.DB_HOST = '10.19.48.64'
- self.DB_USER = 'zsy'
- self.DB_PASSWORD = 'Hbt3sZNxepnZQNPU'
- self.DB_NAME = 'zsy_tk2'
- def __call__(self, sql):
- '''fetchone is much faster than fetchall'''
- with closing(pymysql.connect(host=self.DB_HOST,
- user=self.DB_USER,
- port=3307,
- password=self.DB_PASSWORD,
- db=self.DB_NAME,
- charset='utf8',
- connect_timeout=2,
- cursorclass=pymysql.cursors.DictCursor
- )) as connection:
- with connection.cursor() as cursor:
- cursor.execute(sql)
- result = cursor.fetchall()
- return result
- def execute(self, sql):
- '''fetchone is much faster than fetchall'''
- with closing(pymysql.connect(host=self.DB_HOST,
- user=self.DB_USER,
- port=3307,
- password=self.DB_PASSWORD,
- db=self.DB_NAME,
- charset='utf8',
- cursorclass=pymysql.cursors.DictCursor
- )) as connection:
- with connection.cursor() as cursor:
- cursor.execute(sql)
- result = cursor.fetchall()
- return result
- def execute_commit(self, sql):
- '''fetchone is much faster than fetchall'''
- with closing(pymysql.connect(host=self.DB_HOST,
- user=self.DB_USER,
- port=3307,
- password=self.DB_PASSWORD,
- db=self.DB_NAME,
- charset='utf8mb4',
- cursorclass=pymysql.cursors.DictCursor
- )) as connection:
- with connection.cursor() as cursor:
- cursor.execute(sql)
- connection.commit()
- return 1
- def get_topic(self, topic_no):
- topic = {}
- topic['all_content'] = []
- topic_info = self.execute(
- 'SELECT topic_id,content,answer,topic_type_id,status,source_type,grade,subject_id,is_audit FROM topic WHERE topic_id=%d;' % (
- int(topic_no)))
- slaves = self.execute(
- 'SELECT content,answer,topic_slave_id FROM topic_slave WHERE topic_id=%d;' % (int(topic_no)))
- if len(topic_info) == 1:
- topic = topic_info[0]
- topic['all_content'] = []
- topic['all_content'].append(topic['content'])
- if len(topic_info) == 1 and len(slaves) == 0:
- # option = self.execute(
- # 'SELECT content FROM topic_option WHERE topic_id=%d and is_true=1;' % (int(topic_no)))
- option = self.execute(
- 'SELECT content FROM topic_option WHERE topic_id=%d;' % (int(topic_no)))
- topic['all_content'].extend([i['content'] for i in option])
- topic['option'] = [i for i in option]
- if len(topic_info) == 1 and len(slaves):
- topic['slave'] = []
- for slave in slaves:
- topic_slave_id = slave['topic_slave_id']
- if 'content' in slave:
- topic['all_content'].append(slave['content'])
- slave['option'] = self.execute(
- 'SELECT content,is_true FROM topic_option WHERE topic_slave_id=%d;' % (int(topic_slave_id)))
- topic['all_content'].extend([i['content'] for i in slave['option']])
- topic['slave'].append(slave)
- dm = DataMaker(topic)
- # print(dm.get_all_content())
- topic['all_content'] = ' '.join([dm._replacer(i) for i in topic['all_content'] if i != ''])
- topic['checked'] = 0
- topic['type'] = 0
- return topic
- def get_essay(self, topic_no):
- topic = {}
- topic_info = self.execute(
- 'SELECT essay_id,content,title FROM essay WHERE essay_id=%d;' % (
- int(topic_no)))
- if len(topic_info) == 1:
- topic_info[0]['is_essay'] = 1
- topic_info[0]['topic_id'] = topic_info[0]['essay_id']
- topic_info[0]['status'] = 1
- topic_info[0]['topic_type_id'] = 'essay'
- topic_info[0]['type'] = 2
- topic_info[0]['checked'] = 0
- topic_info[0]['all_content'] = dm._replacer(topic_info[0]['content'])
- return topic_info[0]
- return topic
- def get_work_topic(self, topic_no):
- dic = {'单项填空': 1, '完形填空': 2, '阅读理解': 3, '任务型阅读': 4, '七选五': 5, '语法填空': 6, '短文改错': 7, '单词拼写': 8, '选词填空': 9,
- '课文填空': 10, '句子翻译': 11, '完成句子': 12, '书面表达': 13, '阅读表达': 14, '考试听力': 15, '概要写作': 16, '读后续写': 17,
- '单句改错': 18, '单句语法填空': 19, '单句翻译': 20}
- topic = {}
- topic_info = self.execute(
- 'SELECT topic_id,content,status FROM work_topic WHERE id=%d and status=1 and topic_id=0;' % (
- int(topic_no)))
- if len(topic_info) == 1:
- topic = {}
- topic_1 = topic_info[0]
- # status, source_type, grade, subject_id, is_audit
- topic_info = json.loads(topic_1['content'])
- topic_info = topic_info['mjson']['items'][0]
- topic['content'] = topic_info['content']
- all_content = []
- all_content.append(topic_info['content'])
- if 'option' in topic_info:
- all_content.extend(topic_info['option'])
- if 'slave' in topic_info:
- for slave in topic_info['slave']:
- if 'content' in slave:
- all_content.append(slave['content'])
- if 'option' in slave:
- all_content.extend(slave['option'])
- # dm = DataMaker(topic)
- topic['all_content'] = all_content
- topic['all_content'] = ' '.join([i for i in topic['all_content'] if i != ''])
- topic['checked'] = 0
- topic['topic_id'] = topic_no
- topic['topic_type_id'] = topic_info['topic_type_id']
- topic['status'] = topic_info['status']
- topic['type'] = 1
- return topic
- return {}
- class LocalDB:
- def __init__(self):
- self.db_name = "chachong_en_content"
- self._initDB()
- def Executor(self):
- return self.en_col
- def _initDB(self, ):
- self.client = pymongo.MongoClient("mongodb://127.0.0.1:27017/")
- self.en_db = self.client[self.db_name]
- self.en_col = self.en_db["site2"]
- def update(self, could_data: dict, exis='unkonw'):
- if 'topic_id' in could_data:
- myquery = {"topic_id": could_data['topic_id']}
- elif 'id' in could_data:
- myquery = {"topic_id": could_data['id']}
- else:
- warnings.warn(str(could_data))
- return
- if exis == 'exis':
- newvalues = {"$set": could_data}
- x = self.en_col.update_one(myquery, newvalues)
- elif exis == 'not':
- val = could_data
- if 'content' in val.keys():
- x = self.en_col.insert_one(val)
- else:
- if self.en_col.count(myquery):
- newvalues = {"$set": could_data}
- x = self.en_col.update_one(myquery, newvalues)
- else:
- val = could_data
- if 'content' in val.keys():
- x = self.en_col.insert_one(val)
- def finder(self, filter):
- return self.en_col.find(filter)
- def deleter(self, *args):
- return self.en_col.delete_many(*args)
- def backups(self, db_name):
- self.en_db_bak = self.client[db_name]
- self.en_col_bak = self.en_db["site2"]
- self.en_col_bak.delete_many({})
- for i in self.en_col.find({}):
- self.en_col.insert(i)
- class DataBase:
- def __init__(self, source: CouldDB, saver: LocalDB):
- self.source = source
- self.saver = saver
- def update(self, tipic_id, essay=False, work=False, exis='unknow'):
- if essay:
- self.saver.update(self.source.get_essay(tipic_id), exis=exis)
- elif work:
- self.saver.update(self.source.get_work_topic(tipic_id), exis=exis)
- else:
- self.saver.update(self.source.get_topic(tipic_id), exis=exis)
- def update_all(self, bakdel=False, essay=False, work_topic=False, topic=False, exis='unknow'):
- from tqdm import tqdm
- if bakdel:
- self.saver.backups('eng_bak' + time.strftime('%Y-%m-%d', time.localtime(time.time())))
- self.saver.deleter({})
- if essay:
- # TODO 更新美文
- all_topic_id = self.source.execute('SELECT essay_id FROM essay;')
- for i in tqdm(all_topic_id):
- i['topic_id'] = i['essay_id']
- self.update(i['topic_id'], essay=True, exis=exis)
- if work_topic:
- # TODO 更新工作区
- all_topic_id = self.source.execute('SELECT id FROM work_topic;')
- for i in tqdm(all_topic_id):
- try:
- with open('1.txt','a') as f:
- f.write(str(i)+'\n')
- time.sleep(0.05)
- self.update(i['id'], work=True)
- except Exception as e:
- print(e)
- if topic:
- allready = []
- with open('2.txt','r') as f:
- for i in f:
- allready.append(i.strip())
-
- # TODO 更新所有
- all_topic_id = self.source.execute('SELECT topic_id FROM topic;')
- for i in tqdm(all_topic_id):
- try:
- if str(i) not in allready:
- with open('2.txt','a') as f:
- f.write(str(i)+'\n')
- self.update(i['topic_id'])
- except:
- pass
- if __name__ == '__main__':
- cdb = CouldDB()
- ldb = LocalDB()
- # for i in ldb.finder({}).limit(10):
- # print(i)
- db = DataBase(cdb, ldb)
- db.update_all(topic=True,exis='not')
|