#!/usr/bin/env/python # -*- coding:utf-8 -*- import re from structure.ans_structure import only_parse_split def get_slave(one_item, con, parse, ans): """ 带小问的大题 按小问切分 :return: """ # if re.search(r"[;;]", ans) and len(re.findall(r"[((]\s*\d\s*[))]", con)) > 1: # 模板要求老师小题题号(1)(2) th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)", "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"} con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con) parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], parse) con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "\n" + r"\2", con) parse = re.sub("(<[/a-z]+>)\s*([((]\s*\d\s*[))])", r"\1" + "\n" + r"\2", parse) # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse) kuo_num = len(re.findall(r"[((]\d[))]", con.replace(" ", ""))) circle_num = len(re.findall(r"\n[((](i{1,3}|[ⅰⅱⅲⅳ①②③④])[))]|\n[①②③④]\s*(?![+-])", con.replace(" ", ""))) if len(re.findall(r"[((]\d[))]|\n[((](i{1,3}|[ⅰⅱⅲⅳ①②③④])[))]|\n[①②③④]\s*(?![+-])", con.replace(" ", ""))) > 1: by_sub_item = True # 答案是按照小题获取还是按照空的个数,答案老师有时候全部用;隔开,有时候又会分题号 # 题干 if kuo_num > 1: con = re.sub(r"((?<=[\n::;;。求])|^)\s*([((]\s*\d\s*[))])\s*(?!小?题?中)", "【ⅳ】", con) elif circle_num > 1: con = re.sub(r"((?<=[\n::;;。求])|^)\s*([((]\s*(\d|i{1,3}|[ⅰⅱⅲⅳ①②③④])\s*[))]|[①②③④]\s*(?![+-]))", "【ⅳ】", con) # print(con) # print('-------------------------------') con_list = re.split(r"【ⅳ】", con) # 答案 不能只用空格隔开 ans_list = [] if ans != "见解析" else "见解析" if re.search(r"[;;]|\n[((](\d|i{1,3}|[ⅰⅱⅲⅳ①②③④])[))]|\n[①②③④]\s*(?![+-])", ans.replace(" ", "")): if len(re.findall(r"[((]\d[))]", ans.replace(" ", ""))) > 1: # 优先按(\d)拆分 # and len(re.findall(r"\n[((](i{1,3}|[ⅰⅱⅲⅳ①②③④])[))]|\n[①②③④]\s*(?![+-])", ans.replace(" ", ""))) > 0: ans = re.sub(r"((?<=[\n::;;。])|^)\s*([((]\s*\d\s*[))])", "【ⅳ】", ans) elif len(re.findall(r"\n[((](i{1,3}|[ⅰⅱⅲⅳ①②③④])[))]|\n[①②③④]\s*(?![+-])", ans.replace(" ", ""))) > 1: ans = re.sub(r"((?<=[\n::;;。])|^)\s*([((]\s*(\d|i{1,3}|[ⅰⅱⅲⅳ①②③④])\s*[))]|[①②③④]\s*(?![+-]))", "【ⅳ】", ans) ans_list.extend(re.split(r"【ⅳ】", ans)) while not ans_list[0]: ans_list = ans_list[1:] if len(ans_list) < len(con_list) - 1: ans_list = re.split(r"[;;](?! height)", ans) by_sub_item = False # 解析 parse_list = [] syn_list = [] analy_comment = [] parse_common = "" ans_summarize = "" if parse: if re.search('【(详解|解析|解答)】', parse): # 2020-6-10 temp_parse = re.split('【详解】|【解析】|【解答】', parse) parse = temp_parse[1] # parse_list.append(temp_parse[0]) # 若分析也分小问来,则单独拆分 if len(re.findall(r"[((]\d[))]", temp_parse[0].replace(" ", ""))) > 1: syn = re.sub(r"((?<=[\n::;;。】])|^)\s*([((]\s*\d\s*[))])", "【ⅳ】", temp_parse[0]) syn_list.extend(re.split(r"【ⅳ】", syn)) syn_list.append(temp_parse[0]) # 【详解】|【解析】|【解答】 前面的部分 if re.search("【(点评|点睛)】", parse): comment = re.split('(【点评】|【点睛】)', parse) analy_comment.append(comment[-2] + comment[-1]) parse = comment[0] # 解析拆分小问 if len(re.findall(r"[((]\d[))]", parse.replace(" ", ""))) > 1: parse = re.sub(r"((?<=[\n::;;。])|^)\s*([((]\s*\d\s*[))])", "【ⅳ】", parse) parse = re.sub(r"(/>)\s*([((]\s*\d\s*[))])", r"\1【ⅳ】", parse) else: parse = re.sub(r"((?<=[\n::;;。])|^)\s*([((]\s*(\d|i{1,3}|[ⅰⅱⅲⅳ①②③④])\s*[))]|[①②③④]\s*(?![+-]))", "【ⅳ】", parse) parse = re.sub(r"(/>)\s*([((]\s*(\d|i{1,3}|[ⅰⅱⅲⅳ①②③④])\s*[))]|[①②③④]\s*(?![+-]))", r"\1【ⅳ】", parse) # 将解析末尾出现的‘故答案为’在成功slave后删掉 if re.search('(故|因[而此]|所以)\s*[::]?\s*答案分?别?([为是]|填)?\s*[::]\s*(.+?)(\n|$)', parse): ans_s = re.search('(\n.*?|^.*?|

)((故|因[而此]|所以)\s*[::]?\s*答案分?别?([为是]|填)?\s*[::]\s*(.+?))(\n|$)', parse) # print("ans_s:",ans_s) # print(parse) if ans_s.group(5) and ans_s.group(5).count("【ⅳ】") > 1: ans_summarize = ans_s.group(2) ans_s_index = parse.index(ans_summarize) if ans_s.group(1) == '

' or not ans_s.group(1).strip() \ or ans_s.group(1).strip() is None else parse.index(ans_s.group(1)) ans_summarize = [ans_s.group(2), ans_s_index] parse = parse.replace(ans_summarize[0], "") elif ans_s.group(5) and "【ⅳ】" in ans_s.group(5): aa5 = ans_s.group(5).replace("【ⅳ】", "") parse = parse.replace(ans_s.group(5), aa5) parse = re.sub("(【ⅳ】\s*解答?\s[::])\s*【ⅳ】", r"\1", parse) little_parse = re.split(r"【ⅳ】", parse) if len(syn_list) - 1 == len(little_parse) and len(little_parse) > 2: # 不拼接;syn_list长有4,little_parse长为3 parse_list = ["分析:{}\n解答:{}".format(syn_list[k + 1], p) for k, p in enumerate(little_parse[1:])] parse_common = syn_list[0] + '\n' + little_parse[0] # 分小问解析的共同部分 else: if syn_list and len(re.sub("[^\u4e00-\u9fa5]", "", syn_list[-1])) > 4: # 有4个汉字以上 analy = syn_list[-1] analy_comment.insert(0, analy) parse_list.extend(little_parse) if len(parse_list) > 1: # if parse_list[0].strip(): # common = parse_list[0] # parse_list = ["{} {}".format(common, p) for p in parse_list] parse_common = parse_list[0] parse_list = parse_list[1:] one_item = split2little_con(con_list, ans_list, parse_list, one_item, by_sub_item, ans_summarize) if "slave" in one_item and one_item["slave"]: one_item['parse'] = parse_common if analy_comment: one_item['analy'] = "\n".join(analy_comment) else: if re.findall(r"_{2,}", one_item["content"]): one_item["blank_num"] = len(re.findall(r"_{2,}", one_item["content"])) if re.search("^[A-Z]{2,}$", re.sub("\W", "", ans)): one_item["item_topic_name"] = "多选题" return one_item def split2little_con(con_list, ans_list, parse_list, one_item, is_sub_item, ans_summarize): """ 将按小问切分开的题干、答案、解析 进行 【结构化组合】 :param con_list:切开了小问的题干 :param ans_list:切开了小问的答案 :param parse_list:切开了小问的解析 :param one_item: 初步切开的一道题目 :param is_sub_item: 答案是否按小题号获取(还是按照空的个数获取)的标志 :param ans_summarize: 解析中的综述 [内容,索引] :return: """ # print(con_list) # print(ans_list) # print(parse_list) # print('***********************') old_con = one_item["content"] if len(con_list) > 1: if con_list[0] == "": # 说明全是小题,没有总题文 one_item["content"] = "" else: one_item["content"] = con_list[0] slave = [] for index, s in enumerate(con_list[1:]): blank_num = len(re.findall(r"_{2,}", s)) s = re.sub(r"[((]\d+分[))]", "", s[:9]) + s[9:] one_slave = {} if len(con_list) - len(parse_list) == 1: one_slave = {"topic_no": index + 1, "content": s, # "answer": ans_list[index], "parse": parse_list[index]} # 按索引取解析 if isinstance(ans_list, list) and len(con_list) - len(ans_list) == 1: one_slave["answer"] = ans_list[index] elif not parse_list and isinstance(ans_list, list) and len(con_list) - len(ans_list) == 1: one_slave = {"topic_no": index + 1, "content": s, "answer": ans_list[index], } if one_slave: if is_sub_item is False: # 有具体答案的情况,可能要按照空的个数来拿,但至少一个小题一个答案 if blank_num >= 1: # 填空题 one_ans = re.sub(r"(\n|^)\s*[((]\s*" + str(index + 1) + r"\s*[))](.+)", r"\2", ";".join(ans_list[:blank_num])) one_ans = re.sub(r"((?<=[\n;;。])|^)\s*([((]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④])\s*[))]|[①②③④]\s*(?![+-]))(.+)", r"【ⅳ】\4", one_ans) one_slave["answer"] = one_ans.replace("【ⅳ】", "") ans_list = ans_list[blank_num:] else: # 没有空的时候,这个题也应该至少有一个答案 one_slave["answer"] = ans_list[0] if len(ans_list) > 0 else "" ans_list = ans_list[1:] elif type(ans_list) == str: # 无具体答案的情况:答案为‘见解析’ one_slave["answer"] = ans_list if one_item["item_topic_name"] == "填空题": one_item["item_topic_name"] = "解答题" if blank_num > 0: one_slave["blank_num"] = blank_num else: if one_item["item_topic_name"] == "填空题": one_item["item_topic_name"] = "解答题" # 对带小题的大题,对每个小题的答案重新再提取一次 if not ans_list or "answer" not in one_slave or not one_slave["answer"] or one_slave["answer"] == '见解析': new_ans = only_parse_split(one_slave["parse"], one_item["item_topic_name"], reparse_n = 2) # 再解析 if new_ans["answer"]: one_slave["answer"] = new_ans["answer"] if not new_ans["parse"]: one_slave["parse"] = "" slave.append(one_slave) one_item["slave"] = slave one_item["topic_no"] = "1-{}".format(len(slave)) if len(slave) > 1 else "1" # 有slave的话,就把外面的ans和parse字段给删除掉 if slave: del one_item["answer"] if parse_list: del one_item["parse"] else: one_item["content"] = old_con if ans_summarize: one_item["parse"] = one_item["parse"][:ans_summarize[1]] + '\n' + ans_summarize[0] + '\n' + one_item["parse"][ans_summarize[1]:] return one_item