123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195 |
- import re
- def get_ans_match(item_res, all_ans, ans_no):
- """
- 根据切分后的答案及其题号,与前面试题进行匹配更新,一般情况all_ans和ans_no的个数应该是相同的
- :param item_res:
- :param all_ans:
- :param ans_no:
- :return:
- """
- for k, one_ans in enumerate(all_ans):
- temp_id = "no"
- # 从试题的res寻找与当前答案题号相同的题目的位置
- temp_id_list = [i for i, v in enumerate(item_res) if k<len(ans_no) and v["item_id"] == ans_no[k]]
- if len(temp_id_list) == 1:
- temp_id = temp_id_list[0]
- elif len(temp_id_list) > 1:
- for j in temp_id_list:
- if "key" not in item_res[j].keys():
- temp_id = j
- if temp_id != 'no': # 找到题目和答案相同的题目的序号时,没找到就先不要答案了
- res_con = item_res[temp_id]['stem']
- if k<= len(item_res)-1:
- simp_res = only_parse_split(one_ans, item_res[temp_id]["type"], res_con)
- item_res[temp_id].update(simp_res)
- else:
- item_res[temp_id].update({'key': "", 'parse': ""})
- return item_res
- def only_parse_split(one_item_ans, item_type, res_con, reparse_n=1):
- """
- 拆分出答案和解析,主要针对答案页中的每个题的答案进行拆分
- :one_item_ans: 单道题的答案解析部分,
- :reparse_n == 1:表示再解析
- :return:{'key': ,"parse": }
- """
- # one_item_ans = re.sub(r"[1-9][0-9]?.{,3}[((].*?\d+分[))]|(\[.*?\])?\(.*?\d+分\)", "", one_item_ans[:20]) + one_item_ans[20:]
- one_item_ans = re.sub("\n\s*(化学|物理|生物|和|与)+\s*【答案】\s*$", '', one_item_ans)
- dd = {'parse': one_item_ans, 'key': ""}
- if "选修" in one_item_ans.replace(" ", "")[:10] or \
- re.search("[((][12][))]\s【(解析|答案)】", one_item_ans.replace(" ", "")): # 2021-5-24
- return dd
- temp_ans = one_item_ans
- one_item_ans = one_item_ans.split("【答案】", maxsplit=1) # 答案关键字可能在后面
- if len(one_item_ans) == 2 and "【解析】" in one_item_ans[0]:
- one_item_ans = temp_ans
- else:
- one_item_ans = one_item_ans[-1]
- simp_item = re.sub(r"(【([解分][析答]|详解|点[评睛])】|答案|解析|详解)\s*[::]?", "", one_item_ans)
- simp_item = re.sub("[^\u4e00-\u9fa5∵∴]", "", simp_item)
- # deng_num = re.findall(r"((?!(src|width|height|style)).)+?([==]).+?", one_item_ans, re.S)
- tempitem = re.sub("(src|width|height|style)[==]", "", one_item_ans)
- deng_num = re.findall(r"([==]).+?", tempitem, re.S)
- huanheng_num = re.findall("\n+", one_item_ans, re.S)
- if len(simp_item) < 10 and re.search("因为?|因此|所以|根据|依据|若|假设", simp_item) is None and len(deng_num) < 2:
- dd['parse'] = ""
- if len(huanheng_num) > 1:
- dd['parse'] = one_item_ans
- sim_parse = dd['parse'] # 去掉点评后用于找答案
- if re.search(r"【(解析|解答|分析|详解|点评|点睛)】\n?|(解析|解答|分析|(?<!联)详?解|点评|点睛)\s*[::]", one_item_ans):
- dd1 = dict(zip(["key", "parse_title", "parse"],
- re.split(r"【(解析|解答|分析|详解|点评|点睛)】\n?", one_item_ans, maxsplit=1)))
- if len(dd1)==1:
- dd1 = dict(zip(["key", "parse_title", "parse"],
- re.split(r"(解)\s*[::]", one_item_ans, maxsplit=1)))
- if "【答案】" in temp_ans:
- dd["key"] = dd1["key"].strip()
- if not dd["key"] and dd1["parse"].strip():
- dd["key"] = "见解析"
- if len(dd1) >= 3:
- dd["parse"] = "【" + dd1["parse_title"] + "】" + dd1["parse"]
- del dd1["parse_title"]
- return dd
- if len(dd1) >= 3:
- dd["key"] = dd1["key"].strip()
- rest_parse = ""
- if re.search("^<img .+?/>$", dd["key"]):
- dd["key"] = "见解析"
- rest_parse = dd1["key"].strip()
- if dd1["parse_title"] == "解":
- dd["parse"] = "解:" + dd1["parse"]
- else:
- dd["parse"] = "【" + dd1["parse_title"] + "】" + dd1["parse"]
- if rest_parse:
- dd["parse"] = rest_parse + "\n" + dd["parse"]
- if not dd["key"] and (dd1["parse"].strip() or rest_parse):
- dd["key"] = "见解析"
- del dd1["parse_title"]
- return dd
- sim_parse = re.split("【点评】|【点睛】", dd["parse"])[0].strip()
- # 将解析中末尾出现的图片去掉
- while re.search('\n\s*<imgsrc\d+\sw_h=(\d+\*\d{3})/>\s*$', sim_parse):
- sim_parse = re.sub('\n\s*<imgsrc\d+\sw_h=(\d+\*\d{3})/>\s*$', "", sim_parse)
- if item_type.replace("题", "") in ["单选", "多选", "选择", "不定选择"]:
- ans = re.search(r'故选择?\s*[::]?\s*<imgsrc\d+\sdata-latex="\$?([A-Z;;和与、、\s]+)\$?"/>'
- r'|故选择?\s*[::]?\s*([A-Z;;和与、、\s]+)', dd["parse"].replace("$", ""))
- ans1 = re.search(r'故答案[为是有]\s*[::]\s*<imgsrc\d+\sdata-latex="\$?([A-Z;;和与、、\s]+)\$?"/>'
- r'|故答案[为是有]\s*[::]?\s*([A-Z;;和与、、\s]+)', dd["parse"].replace("$", ""))
- if ans:
- dd["key"] = ans.group(1) if ans.group(1) is not None else ans.group(2)
- if ans1:
- dd["key"] = ans1.group(1) if ans1.group(1) is not None else ans1.group(2)
- elif not dd['key']:
- dd['key'] = one_item_ans.strip()
- if dd['parse']:
- dd['key'] = "见解析"
- dd['key'] = re.sub(r"[.;;.]\s*$", "", dd['key'])
- elif re.search("证明|求证", res_con):
- dd['key'] = "见解析"
- elif item_type: # 把所有的图片能先提前替换比较好,后面匹配的话会容易些
- ans0 = re.search(r'故选\s*[::]?\s*([A-Z;;和与、、\s]+)[..;;。]?$', sim_parse) # 试验题中可能还有选择题
- ans01 = re.search(r'故选\s*[::]\s*<imgsrc\d+\sdata-latex="\$?([A-Z;;和与、、\s]+)\$?"/>', sim_parse) # 可能开始题型写错
- ans1 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*(((?!(<img)).)+?)[..]?\s*(\n|$)', sim_parse)
- ans11 = re.search(r'((?<!解)答\s*[::]|整理得\s*[::]?)\s*(.+?)([..;;]?\s*$|[..]\s*\n)', sim_parse)
- ans2 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*(<imgsrc.+?/>)[..]?\s*(\n|$)', sim_parse, re.S)
- ans22 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*([^∴∵故因所即【】]+?)([..]\s*(\n|$)|$)', sim_parse)
- ans21 = re.search(r'综上所述\s*[::]\s*([^∴∵故因所即【】]+?)[..;;]\s*$', sim_parse)
- ans3 = re.search(r'(故|因[而此]|所以|∴)\s*[::]?.+?[为是填]\s*[::]?\s*([^∴∵故因所即则【】]+?)([..;;,,]\s*$|[..]\s*\n)', sim_parse) # 改添
- ans31 = re.search(r'(故|因[而此]|所以|∴)\s*([^当为是填∴∵因所故即则【】]+?)[..;;]\s*$', sim_parse) # 改添
- ans32 = re.search(r'(故|因[而此]|所以)\s*[::]?[^当为是填∴∵因所故即【】]+?[为是填]\s*[::]?\s*(<imgsrc.+?/>)[..]?\s*(\n|$)', sim_parse, re.S)
- ans4 = re.search(r'\n\s*[==]([^=\n]+?)[..]?\s*$', sim_parse)
- # ans42 = re.search(r'[==](?!")(((?!([故=∴即]|原式|因[而此]|所以|\n|=[^"])).)+?)[..]?\s*$', sim_parse)
- ans41 = re.search(r'原式\s*[==].+?[==](?!")(((?!(=|=[^"])).)+?|\s*<imgsrc.+?/>)([..]?\s*$|[..]\s*\n)', sim_parse)
- ans42 = re.search("解集?[得为::]+?\s*(\$.+?)$|[::]\s*(\$.+?)$", one_item_ans)
- if reparse_n != 2 and "【答案】" not in one_item_ans and dd['parse'] and \
- len(re.findall(r"[((]\d[))]|[\n::;;。】]([((](i{1,3}|[ⅰⅱⅲⅳⅠⅡⅢIV①②③④])[))]|[①②③④]\s*(?![+\-]))",
- sim_parse.replace(" ", ""))) > 1 and not (item_type == '填空题' and len(re.findall(r"_{2,}|_+([^_]*?)_+", res_con)) == 1):
- dd["key"] = "见解析"
- elif ans0:
- dd["key"] = ans0.group(1)
- elif ans01:
- dd["key"] = ans01.group(1)
- elif ans1 or ans11:
- dd["key"] = ans1.group(3) if ans1 else ans11.group(2)
- elif ans2:
- dd["key"] = ans2.group(3)
- elif ans22:
- dd["key"] = ans22.group(3)
- elif ans21:
- dd["key"] = ans21.group(1)
- elif (ans3 or ans31 or ans32) and '证明' not in one_item_ans:
- if ans3:
- dd["key"] = ans3.group(2)
- if ans31:
- dd["key"] = ans31.group(2)
- speci_key_info = re.search("解集?[得为::]+?\s*(\$.+?)$|[::]\s*(\$.+?)$", dd["key"])
- if speci_key_info:
- dd["key"] = speci_key_info.group(1) if speci_key_info.group(1) else speci_key_info.group(2)
- if ans32:
- dd["key"] = ans32.group(2)
- elif ans42:
- dd["key"] = ans42.group(1) if ans42.group(1) else ans42.group(2)
- if not dd["parse"]:
- dd["parse"] = one_item_ans
- elif (ans4 or ans41) and '证明' not in one_item_ans:
- if ans4:
- dd["key"] = ans4.group(1)
- if ans41:
- dd["key"] = ans41.group(1)
- # if ans42:
- # dd["key"] = ans42.group(1)
- elif not re.sub("[\s略解析【】]", "" ,dd['parse']):
- dd['key'] = one_item_ans.strip()
- else:
- if dd["key"]:
- dd['parse'] = dd["key"] + dd['parse']
- dd["key"] = "见解析"
- else: # 题型未知
- if len(simp_item) < 10:
- dd["key"] = re.sub(r"【答案】|答案\s*[::]", "", one_item_ans.strip())
- else:
- ans1 = re.search(
- r'故答?案?选择?\s*[::]\s*<imgsrc\d+\sdata-latex="([A-Z;;和与、、\s]+)"/>|故答?案?选择?\s*[::]?\s*([A-Z;;和与、、\s]+)',
- dd["parse"].replace("$", ""))
- ans2 = re.search(r'故\s*[::]?\s*答案分?别?[为是]?\s*[::]?\s*(.+?)[..]\s*(\n|$)', dd["parse"])
- ans3 = re.search(r'(【答案】|答案)\s*[::]?(.+?)(\n|$)', dd["parse"])
- if ans1:
- dd["key"] = ans1.group(1) if ans1.group(1) is not None else ans1.group(2)
- elif ans2:
- dd["key"] = ans2.group(1)
- elif ans3:
- dd["key"] = ans3.group(2)
- dd["parse"] = dd["parse"].replace(ans3.group(0), "")
- elif not dd['key']:
- dd['key'] = "见解析"
- # print('最后:',dd)
- return dd
|