answer_match.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. import re
  2. def get_ans_match(item_res, all_ans, ans_no):
  3. """
  4. 根据切分后的答案及其题号,与前面试题进行匹配更新,一般情况all_ans和ans_no的个数应该是相同的
  5. :param item_res:
  6. :param all_ans:
  7. :param ans_no:
  8. :return:
  9. """
  10. for k, one_ans in enumerate(all_ans):
  11. temp_id = "no"
  12. # 从试题的res寻找与当前答案题号相同的题目的位置
  13. temp_id_list = [i for i, v in enumerate(item_res) if k<len(ans_no) and v["item_id"] == ans_no[k]]
  14. if len(temp_id_list) == 1:
  15. temp_id = temp_id_list[0]
  16. elif len(temp_id_list) > 1:
  17. for j in temp_id_list:
  18. if "key" not in item_res[j].keys():
  19. temp_id = j
  20. if temp_id != 'no': # 找到题目和答案相同的题目的序号时,没找到就先不要答案了
  21. res_con = item_res[temp_id]['stem']
  22. if k<= len(item_res)-1:
  23. simp_res = only_parse_split(one_ans, item_res[temp_id]["type"], res_con)
  24. item_res[temp_id].update(simp_res)
  25. else:
  26. item_res[temp_id].update({'key': "", 'parse': ""})
  27. return item_res
  28. def only_parse_split(one_item_ans, item_type, res_con, reparse_n=1):
  29. """
  30. 拆分出答案和解析,主要针对答案页中的每个题的答案进行拆分
  31. :one_item_ans: 单道题的答案解析部分,
  32. :reparse_n == 1:表示再解析
  33. :return:{'key': ,"parse": }
  34. """
  35. # one_item_ans = re.sub(r"[1-9][0-9]?.{,3}[((].*?\d+分[))]|(\[.*?\])?\(.*?\d+分\)", "", one_item_ans[:20]) + one_item_ans[20:]
  36. one_item_ans = re.sub("\n\s*(化学|物理|生物|和|与)+\s*【答案】\s*$", '', one_item_ans)
  37. dd = {'parse': one_item_ans, 'key': ""}
  38. if "选修" in one_item_ans.replace(" ", "")[:10] or \
  39. re.search("[((][12][))]\s【(解析|答案)】", one_item_ans.replace(" ", "")): # 2021-5-24
  40. return dd
  41. temp_ans = one_item_ans
  42. one_item_ans = one_item_ans.split("【答案】", maxsplit=1) # 答案关键字可能在后面
  43. if len(one_item_ans) == 2 and "【解析】" in one_item_ans[0]:
  44. one_item_ans = temp_ans
  45. else:
  46. one_item_ans = one_item_ans[-1]
  47. simp_item = re.sub(r"(【([解分][析答]|详解|点[评睛])】|答案|解析|详解)\s*[::]?", "", one_item_ans)
  48. simp_item = re.sub("[^\u4e00-\u9fa5∵∴]", "", simp_item)
  49. # deng_num = re.findall(r"((?!(src|width|height|style)).)+?([==]).+?", one_item_ans, re.S)
  50. tempitem = re.sub("(src|width|height|style)[==]", "", one_item_ans)
  51. deng_num = re.findall(r"([==]).+?", tempitem, re.S)
  52. huanheng_num = re.findall("\n+", one_item_ans, re.S)
  53. if len(simp_item) < 10 and re.search("因为?|因此|所以|根据|依据|若|假设", simp_item) is None and len(deng_num) < 2:
  54. dd['parse'] = ""
  55. if len(huanheng_num) > 1:
  56. dd['parse'] = one_item_ans
  57. sim_parse = dd['parse'] # 去掉点评后用于找答案
  58. if re.search(r"【(解析|解答|分析|详解|点评|点睛)】\n?|(解析|解答|分析|(?<!联)详?解|点评|点睛)\s*[::]", one_item_ans):
  59. dd1 = dict(zip(["key", "parse_title", "parse"],
  60. re.split(r"【(解析|解答|分析|详解|点评|点睛)】\n?", one_item_ans, maxsplit=1)))
  61. if len(dd1)==1:
  62. dd1 = dict(zip(["key", "parse_title", "parse"],
  63. re.split(r"(解)\s*[::]", one_item_ans, maxsplit=1)))
  64. if "【答案】" in temp_ans:
  65. dd["key"] = dd1["key"].strip()
  66. if not dd["key"] and dd1["parse"].strip():
  67. dd["key"] = "见解析"
  68. if len(dd1) >= 3:
  69. dd["parse"] = "【" + dd1["parse_title"] + "】" + dd1["parse"]
  70. del dd1["parse_title"]
  71. return dd
  72. if len(dd1) >= 3:
  73. dd["key"] = dd1["key"].strip()
  74. rest_parse = ""
  75. if re.search("^<img .+?/>$", dd["key"]):
  76. dd["key"] = "见解析"
  77. rest_parse = dd1["key"].strip()
  78. if dd1["parse_title"] == "解":
  79. dd["parse"] = "解:" + dd1["parse"]
  80. else:
  81. dd["parse"] = "【" + dd1["parse_title"] + "】" + dd1["parse"]
  82. if rest_parse:
  83. dd["parse"] = rest_parse + "\n" + dd["parse"]
  84. if not dd["key"] and (dd1["parse"].strip() or rest_parse):
  85. dd["key"] = "见解析"
  86. del dd1["parse_title"]
  87. return dd
  88. sim_parse = re.split("【点评】|【点睛】", dd["parse"])[0].strip()
  89. # 将解析中末尾出现的图片去掉
  90. while re.search('\n\s*<imgsrc\d+\sw_h=(\d+\*\d{3})/>\s*$', sim_parse):
  91. sim_parse = re.sub('\n\s*<imgsrc\d+\sw_h=(\d+\*\d{3})/>\s*$', "", sim_parse)
  92. if item_type.replace("题", "") in ["单选", "多选", "选择", "不定选择"]:
  93. ans = re.search(r'故选择?\s*[::]?\s*<imgsrc\d+\sdata-latex="\$?([A-Z;;和与、、\s]+)\$?"/>'
  94. r'|故选择?\s*[::]?\s*([A-Z;;和与、、\s]+)', dd["parse"].replace("$", ""))
  95. ans1 = re.search(r'故答案[为是有]\s*[::]\s*<imgsrc\d+\sdata-latex="\$?([A-Z;;和与、、\s]+)\$?"/>'
  96. r'|故答案[为是有]\s*[::]?\s*([A-Z;;和与、、\s]+)', dd["parse"].replace("$", ""))
  97. if ans:
  98. dd["key"] = ans.group(1) if ans.group(1) is not None else ans.group(2)
  99. if ans1:
  100. dd["key"] = ans1.group(1) if ans1.group(1) is not None else ans1.group(2)
  101. elif not dd['key']:
  102. dd['key'] = one_item_ans.strip()
  103. if dd['parse']:
  104. dd['key'] = "见解析"
  105. dd['key'] = re.sub(r"[.;;.]\s*$", "", dd['key'])
  106. elif re.search("证明|求证", res_con):
  107. dd['key'] = "见解析"
  108. elif item_type: # 把所有的图片能先提前替换比较好,后面匹配的话会容易些
  109. ans0 = re.search(r'故选\s*[::]?\s*([A-Z;;和与、、\s]+)[..;;。]?$', sim_parse) # 试验题中可能还有选择题
  110. ans01 = re.search(r'故选\s*[::]\s*<imgsrc\d+\sdata-latex="\$?([A-Z;;和与、、\s]+)\$?"/>', sim_parse) # 可能开始题型写错
  111. ans1 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*(((?!(<img)).)+?)[..]?\s*(\n|$)', sim_parse)
  112. ans11 = re.search(r'((?<!解)答\s*[::]|整理得\s*[::]?)\s*(.+?)([..;;]?\s*$|[..]\s*\n)', sim_parse)
  113. ans2 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*(<imgsrc.+?/>)[..]?\s*(\n|$)', sim_parse, re.S)
  114. ans22 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*([^∴∵故因所即【】]+?)([..]\s*(\n|$)|$)', sim_parse)
  115. ans21 = re.search(r'综上所述\s*[::]\s*([^∴∵故因所即【】]+?)[..;;]\s*$', sim_parse)
  116. ans3 = re.search(r'(故|因[而此]|所以|∴)\s*[::]?.+?[为是填]\s*[::]?\s*([^∴∵故因所即则【】]+?)([..;;,,]\s*$|[..]\s*\n)', sim_parse) # 改添
  117. ans31 = re.search(r'(故|因[而此]|所以|∴)\s*([^当为是填∴∵因所故即则【】]+?)[..;;]\s*$', sim_parse) # 改添
  118. ans32 = re.search(r'(故|因[而此]|所以)\s*[::]?[^当为是填∴∵因所故即【】]+?[为是填]\s*[::]?\s*(<imgsrc.+?/>)[..]?\s*(\n|$)', sim_parse, re.S)
  119. ans4 = re.search(r'\n\s*[==]([^=\n]+?)[..]?\s*$', sim_parse)
  120. # ans42 = re.search(r'[==](?!")(((?!([故=∴即]|原式|因[而此]|所以|\n|=[^"])).)+?)[..]?\s*$', sim_parse)
  121. ans41 = re.search(r'原式\s*[==].+?[==](?!")(((?!(=|=[^"])).)+?|\s*<imgsrc.+?/>)([..]?\s*$|[..]\s*\n)', sim_parse)
  122. ans42 = re.search("解集?[得为::]+?\s*(\$.+?)$|[::]\s*(\$.+?)$", one_item_ans)
  123. if reparse_n != 2 and "【答案】" not in one_item_ans and dd['parse'] and \
  124. len(re.findall(r"[((]\d[))]|[\n::;;。】]([((](i{1,3}|[ⅰⅱⅲⅳⅠⅡⅢIV①②③④])[))]|[①②③④]\s*(?![+\-]))",
  125. sim_parse.replace(" ", ""))) > 1 and not (item_type == '填空题' and len(re.findall(r"_{2,}|_+([^_]*?)_+", res_con)) == 1):
  126. dd["key"] = "见解析"
  127. elif ans0:
  128. dd["key"] = ans0.group(1)
  129. elif ans01:
  130. dd["key"] = ans01.group(1)
  131. elif ans1 or ans11:
  132. dd["key"] = ans1.group(3) if ans1 else ans11.group(2)
  133. elif ans2:
  134. dd["key"] = ans2.group(3)
  135. elif ans22:
  136. dd["key"] = ans22.group(3)
  137. elif ans21:
  138. dd["key"] = ans21.group(1)
  139. elif (ans3 or ans31 or ans32) and '证明' not in one_item_ans:
  140. if ans3:
  141. dd["key"] = ans3.group(2)
  142. if ans31:
  143. dd["key"] = ans31.group(2)
  144. speci_key_info = re.search("解集?[得为::]+?\s*(\$.+?)$|[::]\s*(\$.+?)$", dd["key"])
  145. if speci_key_info:
  146. dd["key"] = speci_key_info.group(1) if speci_key_info.group(1) else speci_key_info.group(2)
  147. if ans32:
  148. dd["key"] = ans32.group(2)
  149. elif ans42:
  150. dd["key"] = ans42.group(1) if ans42.group(1) else ans42.group(2)
  151. if not dd["parse"]:
  152. dd["parse"] = one_item_ans
  153. elif (ans4 or ans41) and '证明' not in one_item_ans:
  154. if ans4:
  155. dd["key"] = ans4.group(1)
  156. if ans41:
  157. dd["key"] = ans41.group(1)
  158. # if ans42:
  159. # dd["key"] = ans42.group(1)
  160. elif not re.sub("[\s略解析【】]", "" ,dd['parse']):
  161. dd['key'] = one_item_ans.strip()
  162. else:
  163. if dd["key"]:
  164. dd['parse'] = dd["key"] + dd['parse']
  165. dd["key"] = "见解析"
  166. else: # 题型未知
  167. if len(simp_item) < 10:
  168. dd["key"] = re.sub(r"【答案】|答案\s*[::]", "", one_item_ans.strip())
  169. else:
  170. ans1 = re.search(
  171. r'故答?案?选择?\s*[::]\s*<imgsrc\d+\sdata-latex="([A-Z;;和与、、\s]+)"/>|故答?案?选择?\s*[::]?\s*([A-Z;;和与、、\s]+)',
  172. dd["parse"].replace("$", ""))
  173. ans2 = re.search(r'故\s*[::]?\s*答案分?别?[为是]?\s*[::]?\s*(.+?)[..]\s*(\n|$)', dd["parse"])
  174. ans3 = re.search(r'(【答案】|答案)\s*[::]?(.+?)(\n|$)', dd["parse"])
  175. if ans1:
  176. dd["key"] = ans1.group(1) if ans1.group(1) is not None else ans1.group(2)
  177. elif ans2:
  178. dd["key"] = ans2.group(1)
  179. elif ans3:
  180. dd["key"] = ans3.group(2)
  181. dd["parse"] = dd["parse"].replace(ans3.group(0), "")
  182. elif not dd['key']:
  183. dd['key'] = "见解析"
  184. # print('最后:',dd)
  185. return dd