import json from fuzzywuzzy import fuzz class Comprehensive_Score(): def __init__(self, dev_mode): with open("model_data/keyword_mapping.json", 'r', encoding="utf8") as f: keyword_mapping = json.load(f) # 根据ksy和fxb判断值的次方 self.power = 2 if dev_mode == "ksy" else 1 self.knowledge2id = keyword_mapping["knowledge2id"] self.quantity2id = keyword_mapping["quantity2id"] self.init_id2max_id = keyword_mapping["init_id2max_id"] def __call__(self, query, refer, scale): score_dict = dict() quesType = self.compute_quesType(query["quesType"], refer["quesType"]["quesType"]) knowledge = self.compute_knowledge(query["knowledge"], refer["knowledge"]) solving_type = self.compute_solving_type(query["solving_type"], refer["solving_type"]) difficulty = self.compute_difficulty(query["difficulty"], refer["difficulty"]) physical_quantity = self.compute_physical_quantity(query["physical_quantity"], refer["physical_quantity"]) # image_semantics = self.compute_image_semantics(query["image_semantics"], refer["image_semantics"]) sum_score = quesType * scale["quesType"] + knowledge * scale["knowledge"] + \ solving_type * scale["solving_type"] + difficulty * scale["difficulty"] + \ physical_quantity * scale["physical_quantity"] sum_score = int(sum_score * 100) / 100 sum_score = min(sum_score, 1.0) score_dict["quesType"] = quesType score_dict["knowledge"] = knowledge score_dict["solving_type"] = solving_type score_dict["difficulty"] = difficulty score_dict["physical_quantity"] = physical_quantity # score_dict["image_semantics"] = image_semantics return sum_score, score_dict # 知识点/物理场景/物理量相互关联得分计算 def compute_relate_score(self, query_list, refer_list, keyword2id, mode): query_set, refer_set = set(query_list), set(refer_list) if query_set == refer_set: return 1.0 if len(query_set) > len(refer_set): query_set, refer_set = refer_set, query_set accumulate_score = 0 max_length = len(refer_set) # 双层循环计算知识点之间关联得分 for query in query_set: query_score = 0 query_id = keyword2id.get(query, 0) for refer in refer_set: refer_id = keyword2id.get(refer, 0) if query_id == refer_id: query_score += 1 continue # 知识点 if mode == 1: if abs(query_id - refer_id) < 10 ** self.power: query_score += 0.3 elif abs(query_id - refer_id) < 100 ** self.power: query_score += 0.2 else: continue elif mode == 2: if abs(query_id - refer_id) < 100: query_score += 0.2 else: continue fuzz_score = fuzz.ratio(query, refer) if fuzz_score >= 0.4: query_score += 0.1 # refer长度为1特殊处理 if max_length == 1: return query_score # 限定关联得分上限 max_score = 1 / max_length + 1 if query in refer_set else (max_length - 1) / max_length if query_score > max_score: accumulate_score += max_score else: accumulate_score += query_score return min(accumulate_score / max_length, 0.85) # 题型相似度评分 def compute_quesType(self, query, refer): score = 0.0 if query == refer: score = 1.0 return score # 知识点相似度评分 def compute_knowledge(self, query_list, refer_list): score = self.compute_relate_score(query_list, refer_list, self.knowledge2id, mode=1) return int(score * 100) / 100 # 试题求解类型相似度评分 def compute_solving_type(self, query_list, refer_list): query_set, refer_set = set(query_list), set(refer_list) if len(query_set) > len(refer_set): query_set, refer_set = refer_set, query_set same_count = sum([1 for ele in query_set if ele in refer_set]) score = same_count / len(refer_set) return int(score * 100) / 100 # 难度相似度评分 def compute_difficulty(self, query, refer): score = 1 - abs(float(query) - float(refer)) return int(score * 100) / 100 # 物理量相似度评分 def compute_physical_quantity(self, query_list, refer_list): score = self.compute_relate_score(query_list, refer_list, self.quantity2id, mode=2) return int(score * 100) / 100 # # 图片语义相似度评分 # def compute_image_semantics(self, query_list, refer_list): # query_set, refer_set = set(query_list), set(refer_list) # if len(query_set) == 0 and len(refer_set) == 0: # return 1 # elif len(query_set) == 0 or len(refer_set) == 0: # return 0 # elif len(query_set) > len(refer_set): # query_set, refer_set = refer_set, query_set # same_count = sum([1 for ele in query_set if ele in refer_set]) # score = same_count / len(refer_set) # return int(score * 100) / 100