#!/usr/bin/env/python # -*- coding:utf-8 -*- import re from structure.ans_structure import only_parse_split from structure.option import option_structure def get_slave(one_item, con, parse, ans, parse_split=1): """ 带小问的大题 按小问切分 parse_split=1:解析拆 :return: """ # if re.search(r"[;;]", ans) and len(re.findall(r"[((]\s*\d\s*[))]", con)) > 1: # 模板要求老师小题题号(1)(2) th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)", "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"} con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*([1-9]|1[0-9])\s*[))])", r"\1" + "\n" + r"\2", con) parse = re.sub("(<[/a-z]+>)\s*([((]\s*([1-9]|1[0-9])\s*[))])", r"\1" + "\n" + r"\2", parse) # 小题干先按\n(\d)拆,拆不出来时再考虑按其他格式拆 pattern1 = re.compile(r"(?<=[\n::;;。])\s*([((]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[))]" r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])") # pattern11 = re.compile(r"\n([((](\d|i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])[))]" # r"|[①②③④⑤⑥⑦⑧⑨⑩])(?![+-])") pattern12 = re.compile(r"\n([((](i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])[))]|[①②③④⑤⑥⑦⑧⑨⑩])(?![+-])") pattern13 = re.compile(r"(?<=[\n::;;。求])\s*([((]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[))]" r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])") pattern2 = re.compile(r"((?<=[\n::;;。])\s*|\s{2,})([((]\s*([1-9]|1[0-9])\s*[))])\s*(?!小?[题问]中)") # pattern22 = re.compile(r"(?<=[\n::;;。求])\s*([((]\s*[1-9]\s*[))])\s*(?!小?[题问]中)|\n\s*[((]\s*\d{2}\s*[))]") pattern22 = re.compile(r"(?<=[\n::;;。求])\s*([((]\s*([1-9]|1[0-9])\s*[))])\s*(?!小?[题问]中)") # ①②③④⑤⑥⑦⑧⑨⑩ split_style = 1 if len(re.findall(r"\n\s*[((]\s*\d\s*[))]\s*(?!小?[题问]中)", "\n"+con)) > 1: con = re.sub(pattern22, "【ⅳ】", "\n" + con) else: con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con) parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], parse) if len(re.findall(r"\n\s*[((]\s*\d\s*[))]\s*(?!小?[题问]中)", "\n"+con)) > 1: con = re.sub(pattern22, "【ⅳ】", "\n" + con) elif len(re.findall(pattern12, "\n" + con.replace(" ", ""))) > 1: con = re.sub(pattern13, "【ⅳ】", "\n" + con) split_style = 2 else: split_style = 0 if split_style: con_list = re.split(r"【ⅳ】", con) # ---------------答案和解析拆分--------------------------------------- # ans_list = [] if ans != "见解析" else "见解析" ans_list = [] parse_list = [] # syn_list = [] analy_comment = [] # parse_common = "" raw_ans = ans ans_summarize = "" if parse_split: # 解析需拆分 # 答案 不能只用空格隔开,答案序号要与题干序号保持一致 if ans.strip() == "见解析": ans_list = ["见解析"] * (len(con_list) - 1) else: if re.search(r"(\n\s*|\s{2,})[((]\s*\d\s*[))]", "\n" + ans): if len(re.findall(pattern2, "\n" + ans)) > 1: # 优先按(\d)拆分 ans = re.sub(pattern2, "【ⅳ】", "\n" + ans) elif len(re.findall(pattern1, "\n"+ans)) > 1: ans = re.sub(pattern1, "【ⅳ】", "\n" + ans) elif len(re.findall(pattern12, "\n" + ans.replace(" ", ""))) > 1: ans = re.sub(pattern1, "【ⅳ】", "\n" + ans) ans_list.extend(re.split(r"【ⅳ】", ans)) ans_summarize = ans_list[0] ans_list = ans_list[1:] while ans_list and not ans_list[0]: ans_list = ans_list[1:] if len(con_list) - len(ans_list) != 1: may_ans_list = re.split(r"[((]\s*\d\s*[))]", raw_ans) if len(may_ans_list) == len(con_list): ans_summarize = may_ans_list[0] ans_list = may_ans_list[1:] # 解析 if parse: if re.search('【(详解|解析|解答|分析)】', parse): # 2020-6-10 temp_parse = re.split('【详解】|【解析】|【解答】', parse, maxsplit=1) if len(temp_parse) == 1: # 无【详解】|【解析】|【解答】字段 temp_parse = re.split('【分析】', parse, maxsplit=1) parse = "【分析】" + temp_parse[1].strip() else: parse = temp_parse[1].strip() analy_comment.append(temp_parse[0]) if re.search("【(点评|点睛)】", parse): comment = re.split('(【点评】|【点睛】)', parse, maxsplit=1) analy_comment.append(comment[-2] + comment[-1]) parse = comment[0] # \d、(1)xxxx if "topic_num" in one_item: one_item["item_id"] = one_item["topic_num"] if one_item["item_id"]: other_parse_info = re.search("\n\s*" + str(one_item["item_id"]) + "\s*[、..、]\s*[((]\s*1\s*[))]", "\n" + parse) if other_parse_info: parse = "(1)" + parse[other_parse_info.end():] analy_comment.append(parse[:other_parse_info.start()]) # ------解析拆分小问------ if len(re.findall(r"[((]\d[))]", parse.replace(" ", ""))) > 1: parse = re.sub(r"(?<=[\n::;;。])\s*([((]\s*([1-9]|1[0-9])\s*[))])", "【ⅳ】", "\n" + parse) parse = re.sub(r"(/>|【解】)\s*([((]\s*([1-9]|1[0-9])\s*[))])", r"\1【ⅳ】", parse) else: pattern3 = re.compile(r"(/>)\s*([((]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[))]" r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])") parse = re.sub(pattern1, "【ⅳ】", "\n"+parse) parse = re.sub(pattern3, r"\1【ⅳ】", parse) # 将解析末尾出现的‘故答案为’在成功slave后删掉,容易判断错误 # if re.search('(故|因[而此]|所以)\s*[::]?\s*答案分?别?([为是]|填)?\s*[::]\s*(.+?)(\n|$)', parse): # ans_s = re.search('(\n.*?|^.*?|

)((故|因[而此]|所以)\s*[::]?\s*答案分?别?([为是]|填)?\s*[::]\s*(.+?))(\n|$)', parse) # # print("ans_s:",ans_s.group(5)) # # print(ans_s.group(0)) # if ans_s.group(5) and ans_s.group(5).count("【ⅳ】") > 1: # ans_summarize = ans_s.group(2) # ans_s_index = parse.index(ans_summarize) if ans_s.group(1) == '

' or not ans_s.group(1).strip() \ # or ans_s.group(1).strip() is None else parse.index(ans_s.group(1)) # # ans_summarize = [ans_s.group(2), ans_s_index] # parse = parse.replace(ans_summarize, "") # # elif ans_s.group(5) and "【ⅳ】" in ans_s.group(5): 小问答案里也可以有“故答案为” # # aa5 = ans_s.group(5).replace("【ⅳ】", "") # # parse = parse.replace(ans_s.group(5), aa5) # ----------------------------------------------- parse = re.sub("(【ⅳ】\s*解答?\s[::])\s*【ⅳ】", r"\1", parse) little_parse = re.split(r"【ⅳ】", parse) if len(little_parse) > 1: # if re.search("\n\s*(【分析】|分析\s*[::]).+?", "\n"+little_parse[0].strip()): # analy_comment.append(little_parse[0]) if len(little_parse[0].strip()) >= 5: analy_comment.append(little_parse[0]) parse_list = little_parse[1:] # print(analy_comment) # ---------------------------拆分后组合---------------------------------- # 解析不拆分时,小问也要拆; # 小问解析个数与小问一致时才组合的 one_item = split2little_con(con_list, ans_list, parse_list, one_item) if parse_split and "slave" in one_item: # 解析需拆分 # 里层答案/解析存在时,外层就不需要了 if one_item["slave"][0]["parse"].strip(): one_item['parse'] = "\n".join(analy_comment).strip() if one_item["slave"][0]["key"].strip(): one_item['key'] = ans_summarize.strip() else: # 不存在多问时 if re.findall(r"_{2,}", one_item["stem"]): one_item["blank_num"] = len(re.findall(r"_{2,}", one_item["stem"])) one_item["answer_type"] = "填空题" # if parse_split and re.search("^[A-Z]{2,}$", re.sub("\W", "", ans)): # one_item["type"] = "多选题" elif len(re.findall(r"[\n\s\u4e00-\u9fa5]\s*[A-D]\s*[..、、]", one_item["stem"])) >= 3: # 增加对选项的拆分处理 one_item = option_structure(one_item, con, ans, 1) one_item["answer_type"] = "选择题" if 'options' not in one_item: one_item["options"] = [] elif re.search("[((]\s+[))]\s*$", one_item["stem"]) or one_item["type"] == "判断题": one_item["answer_type"] = "判断题" if re.match("【?(对的?|正确的?|[T√])】?$", one_item["key"].strip()): one_item["key"] = "正确" elif re.match("【?(错误?的?|不对的?|不正确的?|[F×])】?$", one_item["key"].strip()): one_item["key"] = "错误" elif re.search("[横划画]线处填写", one_item["stem"]) and "com_stem" in one_item: one_item["answer_type"] = "填空题" if "com_stem" in one_item: blank_num = len(re.findall(r"_{2,}", one_item["com_stem"])) if blank_num > 0: one_item["blank_num"] = blank_num else: one_item["answer_type"] = "解答题" else: one_item["answer_type"] = "解答题" return one_item def split2little_con(con_list, ans_list, parse_list, one_item): """ 将按小问切分开的题干、答案、解析 进行 【结构化组合】 :param con_list:切开了小问的题干 :param ans_list:切开了小问的答案 :param parse_list:切开了小问的解析 :param one_item: 初步切开的一道题目 :param is_sub_item: 答案是否按小题号获取(还是按照空的个数获取)的标志 :param ans_summarize: 解析中的综述 [内容,索引] :return: """ # print(con_list) # print(ans_list) # print(parse_list) # print('***********************') if len(con_list) > 1: if con_list[0] == "": # 说明全是小题,没有总题文 one_item["stem"] = "" else: one_item["stem"] = con_list[0] slave = [] for index, s in enumerate(con_list[1:]): # 以题干拆分为主 blank_num = len(re.findall(r"_{2,}", s)) s = re.sub(r"[((]\d+分[))]", "", s[:9]) + s[9:] one_slave = {"slave_no": "(%s)" % (index + 1), "stem": s, "key": "", "parse": "", "answer_type": "解答题", "errmsgs": [], } if len(con_list) - len(parse_list) == 1: one_slave["parse"] = parse_list[index] # 按索引取解析 if isinstance(ans_list, list) and len(con_list) - len(ans_list) == 1: one_slave["key"] = ans_list[index] # 判断小题干是否可以是选择题(含选择题时,作答类型也不一定是选择题) if len(re.findall(r"[\n\s\u4e00-\u9fa5]\s*[A-D]\s*[..、、]", s)) >= 3: raw_ans = one_slave["key"] raw_stem = one_slave["stem"] one_slave = option_structure(one_slave, s, one_slave["key"], 1, is_slave=1) one_slave["answer_type"] = "选择题" if "options" not in one_slave or not one_slave["options"]: one_slave["key"] = raw_ans # 选择题解析不成功时,答案还原 elif blank_num > 1: one_slave["answer_type"] = "填空题" one_slave["key"] = raw_ans one_slave["stem"] = raw_stem del one_slave["options"], one_slave["options_rank"] if "options" not in one_slave or not one_slave["options"]: if blank_num > 0: one_slave["blank_num"] = blank_num one_slave["answer_type"] = "填空题" elif re.search("[((]\s+[))]\s*$", s): one_slave["answer_type"] = "判断题" elif re.search("[横划画]线处填写", s): one_slave["answer_type"] = "填空题" blank_num = len(re.findall(r"_{2,}", one_item["stem"])) if blank_num > 0: one_slave["blank_num"] = blank_num else: one_slave["answer_type"] = "解答题" # if "errmsgs" in one_slave: # del one_slave["errmsgs"] # 对带小题的大题,对每个小题的答案重新再提取一次 # if one_slave["parse"].strip() and (not ans_list or "key" not in one_slave or not one_slave["key"]): # or one_slave["key"] == '见解析' # new_ans = only_parse_split(one_slave["parse"], one_item["type"], one_slave["stem"], # reparse_n=2) # 再解析 # if new_ans["key"] == "见解析": # new_ans["key"] = "" # if new_ans["key"] and not new_ans["parse"]: # pass # else: # one_slave["key"] = new_ans["key"] # one_slave["parse"] = new_ans["parse"] slave.append(one_slave) one_item["slave"] = slave one_item["slave_no"] = "1-{}".format(len(slave)) if len(slave) > 1 else "1" # 有slave的话,就把外面的ans和parse字段给删除掉 # if slave: # pass # # del one_item["key"] # # if parse_list: # # del one_item["parse"] # else: # one_item["stem"] = old_con # if ans_summarize: # one_item["parse"] = one_item["parse"][:ans_summarize[1]] + '\n' + ans_summarize[0] + '\n' + one_item["parse"][ans_summarize[1]:] return one_item