import re def get_ans_match(item_res, all_ans, ans_no): """ 根据切分后的答案及其题号,与前面试题进行匹配更新,一般情况all_ans和ans_no的个数应该是相同的 :param item_res: :param all_ans: :param ans_no: :return: """ for k, one_ans in enumerate(all_ans): temp_id = "no" # 从试题的res寻找与当前答案题号相同的题目的位置 temp_id_list = [i for i, v in enumerate(item_res) if k 1: for j in temp_id_list: if "key" not in item_res[j].keys(): temp_id = j if temp_id != 'no': # 找到题目和答案相同的题目的序号时,没找到就先不要答案了 res_con = item_res[temp_id]['stem'] if k<= len(item_res)-1: simp_res = only_parse_split(one_ans, item_res[temp_id]["type"], res_con) item_res[temp_id].update(simp_res) else: item_res[temp_id].update({'key': "", 'parse': ""}) return item_res def only_parse_split(one_item_ans, item_type, res_con, reparse_n=1): """ 拆分出答案和解析,主要针对答案页中的每个题的答案进行拆分 :one_item_ans: 单道题的答案解析部分, :reparse_n == 1:表示再解析 :return:{'key': ,"parse": } """ # one_item_ans = re.sub(r"[1-9][0-9]?.{,3}[((].*?\d+分[))]|(\[.*?\])?\(.*?\d+分\)", "", one_item_ans[:20]) + one_item_ans[20:] one_item_ans = re.sub("\n\s*(化学|物理|生物|和|与)+\s*【答案】\s*$", '', one_item_ans) dd = {'parse': one_item_ans, 'key': ""} if "选修" in one_item_ans.replace(" ", "")[:10] or \ re.search("[((][12][))]\s【(解析|答案)】", one_item_ans.replace(" ", "")): # 2021-5-24 return dd temp_ans = one_item_ans one_item_ans = one_item_ans.split("【答案】", maxsplit=1) # 答案关键字可能在后面 if len(one_item_ans) == 2 and "【解析】" in one_item_ans[0]: one_item_ans = temp_ans else: one_item_ans = one_item_ans[-1] simp_item = re.sub(r"(【([解分][析答]|详解|点[评睛])】|答案|解析|详解)\s*[::]?", "", one_item_ans) simp_item = re.sub("[^\u4e00-\u9fa5∵∴]", "", simp_item) # deng_num = re.findall(r"((?!(src|width|height|style)).)+?([==]).+?", one_item_ans, re.S) tempitem = re.sub("(src|width|height|style)[==]", "", one_item_ans) deng_num = re.findall(r"([==]).+?", tempitem, re.S) huanheng_num = re.findall("\n+", one_item_ans, re.S) if len(simp_item) < 10 and re.search("因为?|因此|所以|根据|依据|若|假设", simp_item) is None and len(deng_num) < 2: dd['parse'] = "" if len(huanheng_num) > 1: dd['parse'] = one_item_ans sim_parse = dd['parse'] # 去掉点评后用于找答案 if re.search(r"【(解析|解答|分析|详解|点评|点睛)】\n?|(解析|解答|分析|(?= 3: dd["parse"] = "【" + dd1["parse_title"] + "】" + dd1["parse"] del dd1["parse_title"] return dd if len(dd1) >= 3: dd["key"] = dd1["key"].strip() rest_parse = "" if re.search("^$", dd["key"]): dd["key"] = "见解析" rest_parse = dd1["key"].strip() if dd1["parse_title"] == "解": dd["parse"] = "解:" + dd1["parse"] else: dd["parse"] = "【" + dd1["parse_title"] + "】" + dd1["parse"] if rest_parse: dd["parse"] = rest_parse + "\n" + dd["parse"] if not dd["key"] and (dd1["parse"].strip() or rest_parse): dd["key"] = "见解析" del dd1["parse_title"] return dd sim_parse = re.split("【点评】|【点睛】", dd["parse"])[0].strip() # 将解析中末尾出现的图片去掉 while re.search('\n\s*\s*$', sim_parse): sim_parse = re.sub('\n\s*\s*$', "", sim_parse) if item_type.replace("题", "") in ["单选", "多选", "选择", "不定选择"]: ans = re.search(r'故选择?\s*[::]?\s*' r'|故选择?\s*[::]?\s*([A-Z;;和与、、\s]+)', dd["parse"].replace("$", "")) ans1 = re.search(r'故答案[为是有]\s*[::]\s*' r'|故答案[为是有]\s*[::]?\s*([A-Z;;和与、、\s]+)', dd["parse"].replace("$", "")) if ans: dd["key"] = ans.group(1) if ans.group(1) is not None else ans.group(2) if ans1: dd["key"] = ans1.group(1) if ans1.group(1) is not None else ans1.group(2) elif not dd['key']: dd['key'] = one_item_ans.strip() if dd['parse']: dd['key'] = "见解析" dd['key'] = re.sub(r"[.;;.]\s*$", "", dd['key']) elif re.search("证明|求证", res_con): dd['key'] = "见解析" elif item_type: # 把所有的图片能先提前替换比较好,后面匹配的话会容易些 ans0 = re.search(r'故选\s*[::]?\s*([A-Z;;和与、、\s]+)[..;;。]?$', sim_parse) # 试验题中可能还有选择题 ans01 = re.search(r'故选\s*[::]\s*', sim_parse) # 可能开始题型写错 ans1 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*(((?!()[..]?\s*(\n|$)', sim_parse, re.S) ans22 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*([^∴∵故因所即【】]+?)([..]\s*(\n|$)|$)', sim_parse) ans21 = re.search(r'综上所述\s*[::]\s*([^∴∵故因所即【】]+?)[..;;]\s*$', sim_parse) ans3 = re.search(r'(故|因[而此]|所以|∴)\s*[::]?.+?[为是填]\s*[::]?\s*([^∴∵故因所即则【】]+?)([..;;,,]\s*$|[..]\s*\n)', sim_parse) # 改添 ans31 = re.search(r'(故|因[而此]|所以|∴)\s*([^当为是填∴∵因所故即则【】]+?)[..;;]\s*$', sim_parse) # 改添 ans32 = re.search(r'(故|因[而此]|所以)\s*[::]?[^当为是填∴∵因所故即【】]+?[为是填]\s*[::]?\s*()[..]?\s*(\n|$)', sim_parse, re.S) ans4 = re.search(r'\n\s*[==]([^=\n]+?)[..]?\s*$', sim_parse) # ans42 = re.search(r'[==](?!")(((?!([故=∴即]|原式|因[而此]|所以|\n|=[^"])).)+?)[..]?\s*$', sim_parse) ans41 = re.search(r'原式\s*[==].+?[==](?!")(((?!(=|=[^"])).)+?|\s*)([..]?\s*$|[..]\s*\n)', sim_parse) ans42 = re.search("解集?[得为::]+?\s*(\$.+?)$|[::]\s*(\$.+?)$", one_item_ans) if reparse_n != 2 and "【答案】" not in one_item_ans and dd['parse'] and \ len(re.findall(r"[((]\d[))]|[\n::;;。】]([((](i{1,3}|[ⅰⅱⅲⅳⅠⅡⅢIV①②③④])[))]|[①②③④]\s*(?![+\-]))", sim_parse.replace(" ", ""))) > 1 and not (item_type == '填空题' and len(re.findall(r"_{2,}|_+([^_]*?)_+", res_con)) == 1): dd["key"] = "见解析" elif ans0: dd["key"] = ans0.group(1) elif ans01: dd["key"] = ans01.group(1) elif ans1 or ans11: dd["key"] = ans1.group(3) if ans1 else ans11.group(2) elif ans2: dd["key"] = ans2.group(3) elif ans22: dd["key"] = ans22.group(3) elif ans21: dd["key"] = ans21.group(1) elif (ans3 or ans31 or ans32) and '证明' not in one_item_ans: if ans3: dd["key"] = ans3.group(2) if ans31: dd["key"] = ans31.group(2) speci_key_info = re.search("解集?[得为::]+?\s*(\$.+?)$|[::]\s*(\$.+?)$", dd["key"]) if speci_key_info: dd["key"] = speci_key_info.group(1) if speci_key_info.group(1) else speci_key_info.group(2) if ans32: dd["key"] = ans32.group(2) elif ans42: dd["key"] = ans42.group(1) if ans42.group(1) else ans42.group(2) if not dd["parse"]: dd["parse"] = one_item_ans elif (ans4 or ans41) and '证明' not in one_item_ans: if ans4: dd["key"] = ans4.group(1) if ans41: dd["key"] = ans41.group(1) # if ans42: # dd["key"] = ans42.group(1) elif not re.sub("[\s略解析【】]", "" ,dd['parse']): dd['key'] = one_item_ans.strip() else: if dd["key"]: dd['parse'] = dd["key"] + dd['parse'] dd["key"] = "见解析" else: # 题型未知 if len(simp_item) < 10: dd["key"] = re.sub(r"【答案】|答案\s*[::]", "", one_item_ans.strip()) else: ans1 = re.search( r'故答?案?选择?\s*[::]\s*|故答?案?选择?\s*[::]?\s*([A-Z;;和与、、\s]+)', dd["parse"].replace("$", "")) ans2 = re.search(r'故\s*[::]?\s*答案分?别?[为是]?\s*[::]?\s*(.+?)[..]\s*(\n|$)', dd["parse"]) ans3 = re.search(r'(【答案】|答案)\s*[::]?(.+?)(\n|$)', dd["parse"]) if ans1: dd["key"] = ans1.group(1) if ans1.group(1) is not None else ans1.group(2) elif ans2: dd["key"] = ans2.group(1) elif ans3: dd["key"] = ans3.group(2) dd["parse"] = dd["parse"].replace(ans3.group(0), "") elif not dd['key']: dd['key'] = "见解析" # print('最后:',dd) return dd