comprehensive_score.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. import json
  2. from fuzzywuzzy import fuzz
  3. class Comprehensive_Score():
  4. def __init__(self):
  5. with open("model_data/keyword_mapping.json", 'r', encoding="utf8") as f:
  6. keyword_mapping = json.load(f)
  7. self.scene2id = keyword_mapping["scene2id"]
  8. self.knowledge2id = keyword_mapping["knowledge2id"]
  9. self.quantity2id = keyword_mapping["quantity2id"]
  10. self.init_id2max_id = keyword_mapping["init_id2max_id"]
  11. def __call__(self, query, refer, scale):
  12. score_dict = dict()
  13. quesType = self.compute_quesType(query["quesType"], refer["quesType"]["quesType"])
  14. knowledge = self.compute_knowledge(query["knowledge"], refer["knowledge"])
  15. physical_scene = self.compute_physical_scene(query["physical_scene"], refer["physical_scene"])
  16. solving_type = self.compute_solving_type(query["solving_type"], refer["solving_type"])
  17. difficulty = self.compute_difficulty(query["difficulty"], refer["difficulty"])
  18. physical_quantity = self.compute_physical_quantity(query["physical_quantity"], refer["physical_quantity"])
  19. # image_semantics = self.compute_image_semantics(query["image_semantics"], refer["image_semantics"])
  20. sum_score = quesType * scale["quesType"] + knowledge * scale["knowledge"] + physical_scene * scale["physical_scene"] + \
  21. solving_type * scale["solving_type"] + difficulty * scale["difficulty"] + \
  22. physical_quantity * scale["physical_quantity"]# + image_semantics * scale["image_semantics"]
  23. sum_score = int(sum_score * 100) / 100
  24. sum_score = min(sum_score, 1.0)
  25. score_dict["quesType"] = quesType
  26. score_dict["knowledge"] = knowledge
  27. score_dict["physical_scene"] = physical_scene
  28. score_dict["solving_type"] = solving_type
  29. score_dict["difficulty"] = difficulty
  30. score_dict["physical_quantity"] = physical_quantity
  31. # score_dict["image_semantics"] = image_semantics
  32. return sum_score, score_dict
  33. # 知识点/物理场景/物理量相互关联得分计算
  34. def compute_relate_score(self, query_list, refer_list, keyword2id, mode=0):
  35. query_set, refer_set = set(query_list), set(refer_list)
  36. if query_set == refer_set:
  37. return 1.0
  38. if len(query_set) > len(refer_set):
  39. query_set, refer_set = refer_set, query_set
  40. accumulate_score = 0
  41. max_length = len(refer_set)
  42. # 双层循环计算知识点之间关联得分
  43. for query in query_set:
  44. query_score = 0
  45. query_id = keyword2id.get(query, 0)
  46. for refer in refer_set:
  47. refer_id = keyword2id.get(refer, 0)
  48. if query_id == refer_id:
  49. query_score += 1
  50. continue
  51. # 知识点
  52. if mode == 0:
  53. if abs(query_id - refer_id) < 10: query_score += 0.3
  54. elif abs(query_id - refer_id) < 100: query_score += 0.2
  55. else: continue
  56. elif mode == 1:
  57. if abs(query_id - refer_id) < 10: query_score += 0.5
  58. else: continue
  59. elif mode == 3:
  60. if abs(query_id - refer_id) < 100: query_score += 0.2
  61. else: continue
  62. fuzz_score = fuzz.ratio(query, refer)
  63. if fuzz_score >= 0.4:
  64. query_score += 0.1
  65. # refer长度为1特殊处理
  66. if max_length == 1:
  67. return query_score
  68. # 限定关联得分上限
  69. max_score = 1 / max_length + 1 if query in refer_set else (max_length - 1) / max_length
  70. if query_score > max_score:
  71. accumulate_score += max_score
  72. else:
  73. accumulate_score += query_score
  74. return min(accumulate_score / max_length, 0.85)
  75. # 题型相似度评分
  76. def compute_quesType(self, query, refer):
  77. score = 0.0
  78. if query == refer:
  79. score = 1.0
  80. return score
  81. # 知识点相似度评分
  82. def compute_knowledge(self, query_list, refer_list):
  83. score = self.compute_relate_score(query_list, refer_list, self.knowledge2id, mode=0)
  84. return int(score * 100) / 100
  85. # 物理场景相似度评分
  86. def compute_physical_scene(self, query_list, refer_list):
  87. score = self.compute_relate_score(query_list, refer_list, self.scene2id, mode=1)
  88. return int(score * 100) / 100
  89. # 试题求解类型相似度评分
  90. def compute_solving_type(self, query_list, refer_list):
  91. query_set, refer_set = set(query_list), set(refer_list)
  92. if len(query_set) > len(refer_set):
  93. query_set, refer_set = refer_set, query_set
  94. same_count = sum([1 for ele in query_set if ele in refer_set])
  95. score = same_count / len(refer_set)
  96. return int(score * 100) / 100
  97. # 难度相似度评分
  98. def compute_difficulty(self, query, refer):
  99. score = 1 - abs(float(query) - float(refer))
  100. return int(score * 100) / 100
  101. # 物理量相似度评分
  102. def compute_physical_quantity(self, query_list, refer_list):
  103. score = self.compute_relate_score(query_list, refer_list, self.quantity2id, mode=2)
  104. return int(score * 100) / 100
  105. # 图片语义相似度评分
  106. def compute_image_semantics(self, query_list, refer_list):
  107. query_set, refer_set = set(query_list), set(refer_list)
  108. if len(query_set) == 0 and len(refer_set) == 0:
  109. return 1
  110. elif len(query_set) == 0 or len(refer_set) == 0:
  111. return 0
  112. elif len(query_set) > len(refer_set):
  113. query_set, refer_set = refer_set, query_set
  114. same_count = sum([1 for ele in query_set if ele in refer_set])
  115. score = same_count / len(refer_set)
  116. return int(score * 100) / 100