|
@@ -0,0 +1,131 @@
|
|
|
+import json
|
|
|
+from fuzzywuzzy import fuzz
|
|
|
+
|
|
|
+
|
|
|
+class Comprehensive_Score():
|
|
|
+ def __init__(self):
|
|
|
+ with open("model_data/keyword_mapping.json", 'r', encoding="utf8") as f:
|
|
|
+ keyword_mapping = json.load(f)
|
|
|
+ self.scene2id = keyword_mapping["scene2id"]
|
|
|
+ self.knowledge2id = keyword_mapping["knowledge2id"]
|
|
|
+ self.quantity2id = keyword_mapping["quantity2id"]
|
|
|
+ self.init_id2max_id = keyword_mapping["init_id2max_id"]
|
|
|
+
|
|
|
+ def __call__(self, query, refer, scale):
|
|
|
+ score_dict = dict()
|
|
|
+ quesType = self.compute_quesType(query["quesType"], refer["quesType"]["quesType"])
|
|
|
+ knowledge = self.compute_knowledge(query["knowledge"], refer["knowledge"])
|
|
|
+ physical_scene = self.compute_physical_scene(query["physical_scene"], refer["physical_scene"])
|
|
|
+ solving_type = self.compute_solving_type(query["solving_type"], refer["solving_type"])
|
|
|
+ difficulty = self.compute_difficulty(query["difficulty"], refer["difficulty"])
|
|
|
+ physical_quantity = self.compute_physical_quantity(query["physical_quantity"], refer["physical_quantity"])
|
|
|
+ # image_semantics = self.compute_image_semantics(query["image_semantics"], refer["image_semantics"])
|
|
|
+
|
|
|
+ sum_score = quesType * scale["quesType"] + knowledge * scale["knowledge"] + physical_scene * scale["physical_scene"] + \
|
|
|
+ solving_type * scale["solving_type"] + difficulty * scale["difficulty"] + \
|
|
|
+ physical_quantity * scale["physical_quantity"]# + image_semantics * scale["image_semantics"]
|
|
|
+ sum_score = int(sum_score * 100) / 100
|
|
|
+ sum_score = min(sum_score, 1.0)
|
|
|
+
|
|
|
+ score_dict["quesType"] = quesType
|
|
|
+ score_dict["knowledge"] = knowledge
|
|
|
+ score_dict["physical_scene"] = physical_scene
|
|
|
+ score_dict["solving_type"] = solving_type
|
|
|
+ score_dict["difficulty"] = difficulty
|
|
|
+ score_dict["physical_quantity"] = physical_quantity
|
|
|
+ # score_dict["image_semantics"] = image_semantics
|
|
|
+
|
|
|
+ return sum_score, score_dict
|
|
|
+
|
|
|
+ # 知识点/物理场景/物理量相互关联得分计算
|
|
|
+ def compute_relate_score(self, query_list, refer_list, keyword2id, mode=0):
|
|
|
+ query_set, refer_set = set(query_list), set(refer_list)
|
|
|
+ if query_set == refer_set:
|
|
|
+ return 1.0
|
|
|
+ if len(query_set) > len(refer_set):
|
|
|
+ query_set, refer_set = refer_set, query_set
|
|
|
+ accumulate_score = 0
|
|
|
+ max_length = len(refer_set)
|
|
|
+ # 双层循环计算知识点之间关联得分
|
|
|
+ for query in query_set:
|
|
|
+ query_score = 0
|
|
|
+ query_id = keyword2id.get(query, 0)
|
|
|
+ for refer in refer_set:
|
|
|
+ refer_id = keyword2id.get(refer, 0)
|
|
|
+ if query_id == refer_id:
|
|
|
+ query_score += 1
|
|
|
+ continue
|
|
|
+ # 知识点
|
|
|
+ if mode == 0:
|
|
|
+ if abs(query_id - refer_id) < 10: query_score += 0.3
|
|
|
+ elif abs(query_id - refer_id) < 100: query_score += 0.2
|
|
|
+ else: continue
|
|
|
+ elif mode == 1:
|
|
|
+ if abs(query_id - refer_id) < 10: query_score += 0.5
|
|
|
+ else: continue
|
|
|
+ elif mode == 3:
|
|
|
+ if abs(query_id - refer_id) < 100: query_score += 0.2
|
|
|
+ else: continue
|
|
|
+ fuzz_score = fuzz.ratio(query, refer)
|
|
|
+ if fuzz_score >= 0.4:
|
|
|
+ query_score += 0.1
|
|
|
+ # refer长度为1特殊处理
|
|
|
+ if max_length == 1:
|
|
|
+ return query_score
|
|
|
+ # 限定关联得分上限
|
|
|
+ max_score = 1 / max_length + 1 if query in refer_set else (max_length - 1) / max_length
|
|
|
+ if query_score > max_score:
|
|
|
+ accumulate_score += max_score
|
|
|
+ else:
|
|
|
+ accumulate_score += query_score
|
|
|
+
|
|
|
+ return min(accumulate_score / max_length, 0.85)
|
|
|
+
|
|
|
+ # 题型相似度评分
|
|
|
+ def compute_quesType(self, query, refer):
|
|
|
+ score = 0.0
|
|
|
+ if query == refer:
|
|
|
+ score = 1.0
|
|
|
+ return score
|
|
|
+
|
|
|
+ # 知识点相似度评分
|
|
|
+ def compute_knowledge(self, query_list, refer_list):
|
|
|
+ score = self.compute_relate_score(query_list, refer_list, self.knowledge2id, mode=0)
|
|
|
+ return int(score * 100) / 100
|
|
|
+
|
|
|
+ # 物理场景相似度评分
|
|
|
+ def compute_physical_scene(self, query_list, refer_list):
|
|
|
+ score = self.compute_relate_score(query_list, refer_list, self.scene2id, mode=1)
|
|
|
+ return int(score * 100) / 100
|
|
|
+
|
|
|
+ # 试题求解类型相似度评分
|
|
|
+ def compute_solving_type(self, query_list, refer_list):
|
|
|
+ query_set, refer_set = set(query_list), set(refer_list)
|
|
|
+ if len(query_set) > len(refer_set):
|
|
|
+ query_set, refer_set = refer_set, query_set
|
|
|
+ same_count = sum([1 for ele in query_set if ele in refer_set])
|
|
|
+ score = same_count / len(refer_set)
|
|
|
+ return int(score * 100) / 100
|
|
|
+
|
|
|
+ # 难度相似度评分
|
|
|
+ def compute_difficulty(self, query, refer):
|
|
|
+ score = 1 - abs(float(query) - float(refer))
|
|
|
+ return int(score * 100) / 100
|
|
|
+
|
|
|
+ # 物理量相似度评分
|
|
|
+ def compute_physical_quantity(self, query_list, refer_list):
|
|
|
+ score = self.compute_relate_score(query_list, refer_list, self.quantity2id, mode=2)
|
|
|
+ return int(score * 100) / 100
|
|
|
+
|
|
|
+ # 图片语义相似度评分
|
|
|
+ def compute_image_semantics(self, query_list, refer_list):
|
|
|
+ query_set, refer_set = set(query_list), set(refer_list)
|
|
|
+ if len(query_set) == 0 and len(refer_set) == 0:
|
|
|
+ return 1
|
|
|
+ elif len(query_set) == 0 or len(refer_set) == 0:
|
|
|
+ return 0
|
|
|
+ elif len(query_set) > len(refer_set):
|
|
|
+ query_set, refer_set = refer_set, query_set
|
|
|
+ same_count = sum([1 for ele in query_set if ele in refer_set])
|
|
|
+ score = same_count / len(refer_set)
|
|
|
+ return int(score * 100) / 100
|