123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- import json
- from fuzzywuzzy import fuzz
- class Comprehensive_Score():
- def __init__(self, dev_mode):
- with open("model_data/keyword_mapping.json", 'r', encoding="utf8") as f:
- keyword_mapping = json.load(f)
- # 根据ksy和fxb判断值的次方
- self.power = 2 if dev_mode == "ksy" else 1
- self.knowledge2id = keyword_mapping["knowledge2id"]
- self.quantity2id = keyword_mapping["quantity2id"]
- self.init_id2max_id = keyword_mapping["init_id2max_id"]
- def __call__(self, query, refer, scale):
- score_dict = dict()
- quesType = self.compute_quesType(query["quesType"], refer["quesType"]["quesType"])
- knowledge = self.compute_knowledge(query["knowledge"], refer["knowledge"])
- solving_type = self.compute_solving_type(query["solving_type"], refer["solving_type"])
- difficulty = self.compute_difficulty(query["difficulty"], refer["difficulty"])
- physical_quantity = self.compute_physical_quantity(query["physical_quantity"], refer["physical_quantity"])
- # image_semantics = self.compute_image_semantics(query["image_semantics"], refer["image_semantics"])
- sum_score = quesType * scale["quesType"] + knowledge * scale["knowledge"] + \
- solving_type * scale["solving_type"] + difficulty * scale["difficulty"] + \
- physical_quantity * scale["physical_quantity"]
- sum_score = int(sum_score * 100) / 100
- sum_score = min(sum_score, 1.0)
- score_dict["quesType"] = quesType
- score_dict["knowledge"] = knowledge
- score_dict["solving_type"] = solving_type
- score_dict["difficulty"] = difficulty
- score_dict["physical_quantity"] = physical_quantity
- # score_dict["image_semantics"] = image_semantics
- return sum_score, score_dict
- # 知识点/物理场景/物理量相互关联得分计算
- def compute_relate_score(self, query_list, refer_list, keyword2id, mode):
- query_set, refer_set = set(query_list), set(refer_list)
- if query_set == refer_set:
- return 1.0
- if len(query_set) > len(refer_set):
- query_set, refer_set = refer_set, query_set
- accumulate_score = 0
- max_length = len(refer_set)
- # 双层循环计算知识点之间关联得分
- for query in query_set:
- query_score = 0
- query_id = keyword2id.get(query, 0)
- for refer in refer_set:
- refer_id = keyword2id.get(refer, 0)
- if query_id == refer_id:
- query_score += 1
- continue
- # 知识点
- if mode == 1:
- if abs(query_id - refer_id) < 10 ** self.power: query_score += 0.3
- elif abs(query_id - refer_id) < 100 ** self.power: query_score += 0.2
- else: continue
- elif mode == 2:
- if abs(query_id - refer_id) < 100: query_score += 0.2
- else: continue
- fuzz_score = fuzz.ratio(query, refer)
- if fuzz_score >= 0.4:
- query_score += 0.1
- # refer长度为1特殊处理
- if max_length == 1:
- return query_score
- # 限定关联得分上限
- max_score = 1 / max_length + 1 if query in refer_set else (max_length - 1) / max_length
- if query_score > max_score:
- accumulate_score += max_score
- else:
- accumulate_score += query_score
- return min(accumulate_score / max_length, 0.85)
- # 题型相似度评分
- def compute_quesType(self, query, refer):
- score = 0.0
- if query == refer:
- score = 1.0
- return score
- # 知识点相似度评分
- def compute_knowledge(self, query_list, refer_list):
- score = self.compute_relate_score(query_list, refer_list, self.knowledge2id, mode=1)
- return int(score * 100) / 100
- # 试题求解类型相似度评分
- def compute_solving_type(self, query_list, refer_list):
- query_set, refer_set = set(query_list), set(refer_list)
- if len(query_set) > len(refer_set):
- query_set, refer_set = refer_set, query_set
- same_count = sum([1 for ele in query_set if ele in refer_set])
- score = same_count / len(refer_set)
- return int(score * 100) / 100
-
- # 难度相似度评分
- def compute_difficulty(self, query, refer):
- score = 1 - abs(float(query) - float(refer))
- return int(score * 100) / 100
- # 物理量相似度评分
- def compute_physical_quantity(self, query_list, refer_list):
- score = self.compute_relate_score(query_list, refer_list, self.quantity2id, mode=2)
- return int(score * 100) / 100
- # # 图片语义相似度评分
- # def compute_image_semantics(self, query_list, refer_list):
- # query_set, refer_set = set(query_list), set(refer_list)
- # if len(query_set) == 0 and len(refer_set) == 0:
- # return 1
- # elif len(query_set) == 0 or len(refer_set) == 0:
- # return 0
- # elif len(query_set) > len(refer_set):
- # query_set, refer_set = refer_set, query_set
- # same_count = sum([1 for ele in query_set if ele in refer_set])
- # score = same_count / len(refer_set)
- # return int(score * 100) / 100
|