12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- import time
- from config import mongo_coll_school, log_msg
- from mysql_operate import mysql_operate
- class School_Dup_Logic():
- def __init__(self, logger):
- self.logger = logger
- # 校本题库查重逻辑判断
- def __call__(self, hnsw_process, retrieve_list):
- # 先进行云题库查重, 再进行校本题库查重
- cloud_list, school_list = hnsw_process(retrieve_list, hnsw_index=1)
- # 返回字典
- res_dict = dict()
- # 遍历retrieve_list
- for i,data in enumerate(retrieve_list):
- topic_id = data["topic_id"]
- res_dict[topic_id] = {'school_id': data['school_id']}
- # 先进行云题库查重结果标注判断,若不满足条件,则再进行校本题库查重结果标注判断
- # 云题库查重结果标注判断
- ele_list = self.math_mark_judge(cloud_list[i])
- if len(ele_list) > 0:
- res_dict[topic_id]["similar_topic_id"] = ele_list[0]
- res_dict[topic_id]["similar_score"] = ele_list[1]
- # 日志采集
- self.logger.info(log_msg.format(id=topic_id,
- type="chc查重",
- message="云题库{}已标注".format(topic_id)))
- else:
- # 校本题库标注预处理
- school_list1, school_list2 = [], []
- for ele in school_list[i]:
- school_data = mongo_coll_school.find_one({"topic_id": ele[0]})
- if "save_time" in school_data:
- if school_data["save_time"] <= (time.time() - 7 * 24 * 3600):
- school_list1.append(ele)
- elif school_data["save_time"] > (time.time() - 7 * 24 * 3600):
- school_list2.append(ele)
- else:
- school_list1, school_list2 = school_list[i], school_list[i]
- break
- # 校本题库查重结果标注判断
- ele_list = self.math_mark_judge(school_list1)
- if len(ele_list) > 0:
- res_dict[topic_id]["similar_topic_id"] = ele_list[0]
- res_dict[topic_id]["similar_score"] = ele_list[1]
- # 日志采集
- self.logger.info(log_msg.format(id=topic_id,
- type="chc查重",
- message="校本题库{}已标注".format(topic_id)))
- # 若无标注结果,则返回相似度最高的查重结果
- elif len(school_list2) > 0:
- first_dup_list = school_list2[0]
- res_dict[topic_id]["similar_topic_id"] = first_dup_list[0]
- res_dict[topic_id]["similar_score"] = first_dup_list[1]
- # 若查重无结果,则返回空字符串
- elif len(school_list2) == 0:
- res_dict[topic_id]["similar_topic_id"], res_dict[topic_id]["similar_score"] = '', ''
- return res_dict
- # mysql数据查询函数
- def mysql_fetch(self, topic_id):
- try:
- # sql数据查询语句
- fetch_sql = "select is_owned,is_mark from task_teacher_topic where topic_id=%d" % (int(topic_id))
- fetch_dict = mysql_operate(fetch_sql)
- return fetch_dict
- except Exception as e:
- return None
- # 判断校本题目是否标注
- def math_mark_judge(self, sim_list):
- # 遍历查重返回列表
- for ele_list in sim_list:
- # 数据查询
- fetch_dict = self.mysql_fetch(ele_list[0])
- # 判断数据返回状态
- if fetch_dict is not None and len(fetch_dict) > 0:
- if fetch_dict.get("is_mark", 0) == 1 or fetch_dict.get("is_owned", 0) == 1:
- return ele_list
- return []
|