import time from config import mongo_coll_school, log_msg from mysql_operate import mysql_operate class School_Dup_Logic(): def __init__(self, logger): self.logger = logger # 校本题库查重逻辑判断 def __call__(self, hnsw_process, retrieve_list): # 先进行云题库查重, 再进行校本题库查重 cloud_list, school_list = hnsw_process(retrieve_list, hnsw_index=1) # 返回字典 res_dict = dict() # 遍历retrieve_list for i,data in enumerate(retrieve_list): topic_id = data["topic_id"] res_dict[topic_id] = {'school_id': data['school_id']} # 先进行云题库查重结果标注判断,若不满足条件,则再进行校本题库查重结果标注判断 # 云题库查重结果标注判断 ele_list = self.math_mark_judge(cloud_list[i]) if len(ele_list) > 0: res_dict[topic_id]["similar_topic_id"] = ele_list[0] res_dict[topic_id]["similar_score"] = ele_list[1] # 日志采集 self.logger.info(log_msg.format(id=topic_id, type="chc查重", message="云题库{}已标注".format(topic_id))) else: # 校本题库标注预处理 school_list1, school_list2 = [], [] for ele in school_list[i]: school_data = mongo_coll_school.find_one({"topic_id": ele[0]}) if "save_time" in school_data: if school_data["save_time"] <= (time.time() - 7 * 24 * 3600): school_list1.append(ele) elif school_data["save_time"] > (time.time() - 7 * 24 * 3600): school_list2.append(ele) else: school_list1, school_list2 = school_list[i], school_list[i] break # 校本题库查重结果标注判断 ele_list = self.math_mark_judge(school_list1) if len(ele_list) > 0: res_dict[topic_id]["similar_topic_id"] = ele_list[0] res_dict[topic_id]["similar_score"] = ele_list[1] # 日志采集 self.logger.info(log_msg.format(id=topic_id, type="chc查重", message="校本题库{}已标注".format(topic_id))) # 若无标注结果,则返回相似度最高的查重结果 elif len(school_list2) > 0: first_dup_list = school_list2[0] res_dict[topic_id]["similar_topic_id"] = first_dup_list[0] res_dict[topic_id]["similar_score"] = first_dup_list[1] # 若查重无结果,则返回空字符串 elif len(school_list2) == 0: res_dict[topic_id]["similar_topic_id"], res_dict[topic_id]["similar_score"] = '', '' return res_dict # mysql数据查询函数 def mysql_fetch(self, topic_id): try: # sql数据查询语句 fetch_sql = "select is_owned,is_mark from task_teacher_topic where topic_id=%d" % (int(topic_id)) fetch_dict = mysql_operate(fetch_sql) return fetch_dict except Exception as e: return None # 判断校本题目是否标注 def math_mark_judge(self, sim_list): # 遍历查重返回列表 for ele_list in sim_list: # 数据查询 fetch_dict = self.mysql_fetch(ele_list[0]) # 判断数据返回状态 if fetch_dict is not None and len(fetch_dict) > 0: if fetch_dict.get("is_mark", 0) == 1 or fetch_dict.get("is_owned", 0) == 1: return ele_list return []