#!/usr/bin/env/python # -*- coding:utf-8 -*- import re from pprint import pprint def regroup(res_list, item_groups, ans_groups): """ 将多个题共用一个题干的情况进行slave重组,如地理 item_groups中的groups_data,key值表示带公共题干的试题位置,从0开始计; value值:'fei'表示本题不是小题多问; '\d-\d'表示哪几个题合成slave; ''空表示带公共题干试题开始位置,没有slave范围 例:item_groups: {'is_groups': 1, 'groups_data': {0: 'fei', 5: '', 8: '', 11: 'fei', 12: '', 15: '', 19: '20-21', 23: 'fei'}} :param res_list: :param item_groups: :param ans_groups: :return: """ new_res_dict = [] groups_data = item_groups["groups_data"] start_no = list(groups_data.keys()) # 与真实题号差1 start_no.sort() # 排序 def takefirst(elem): return int(elem.split("-")[0]) ans_start_no = [] if ans_groups: ans_start_no = list(ans_groups.keys()) ans_start_no.sort(key=takefirst) # 排序 contained_no = [] # 答案参与重组的题号 for ans_no in ans_start_no: st1, ed1 = ans_no.split("-") contained_no.extend(list(range(int(st1)-1, int(ed1)))) not_contained_no = set(range(len(res_list))) - set(contained_no) # 不严格,题号不一定从1开始 added_nos = [] # 已经slave了的真实题号 # 开始是单层题型结构时 temp_no = -1 while groups_data and groups_data[start_no[0]] == "fei": if start_no[0] - temp_no == 1: new_res_dict.append(res_list[start_no[0]]) elif start_no[0] - temp_no > 1: new_res_dict.extend(res_list[temp_no+1: start_no[0]+1]) temp_no = start_no[0] del start_no[0] if start_no[0] > 0: new_res_dict.extend(res_list[temp_no+1:start_no[0]]) one_group = {} alone_item_nos = [] print("start_no:", start_no) fei_no = {} for n, group_no in enumerate(start_no): # print("added_nos:", added_nos, group_no) if "com_stem" not in res_list[group_no]: # 不带"com_stem" if group_no == start_no[-1] and groups_data[group_no] == "fei": # 最后一个不管 continue if groups_data[group_no] == "fei": # 可能存在多个fei if n - 1 in fei_no: new_res_dict.extend(res_list[fei_no[n - 1] + 1: group_no + 1]) else: new_res_dict.append(res_list[group_no]) added_nos.append(group_no+1) fei_no[n] = group_no continue # 其他情况,须是独立的题号 if not "#" + str(group_no + 1) + "-" in "#" + "#".join(ans_groups.keys()): new_res_dict.append(res_list[group_no]) added_nos.append(group_no+1) # continue else: # 遇到带"com_stem"的试题 one_group["com_stem"] = res_list[group_no]["com_stem"] del res_list[group_no]["com_stem"] if "-" in groups_data[group_no]: # slave范围知道时 st, end = groups_data[group_no].split("-") if not added_nos: # 开始 if n + 1 < len(start_no) and start_no[n + 1] <= int(end): # 公共题文中的结束序号信息有误,以下一个题的key值为主 one_group["slave"] = res_list[int(st) - 1: start_no[n + 1]] added_nos.append(start_no[n + 1]) else: added_nos.append(int(end)) if len(res_list) < int(end): st = int(st) - int(res_list[0]["topic_num"]) + 1 end = int(end) - int(res_list[0]["topic_num"]) + 1 one_group["slave"] = res_list[int(st) - 1:int(end)] elif int(st) <= added_nos[-1]: # 公共题文中的初始序号信息有误 if n + 1 < len(start_no): # 不是最后一组 if int(end) < start_no[n + 1]: one_group["slave"] = res_list[added_nos[-1]:int(end)] added_nos.append(int(end)) else: # 结束序号有误, one_group["slave"] = res_list[added_nos[-1]: start_no[n + 1]] added_nos.append(start_no[n + 1]) else: if int(end) >= added_nos[-1]: one_group["slave"] = res_list[added_nos[-1]:int(end)] added_nos.append(int(end)) else: # end值出错 if str(group_no+1) + "-" in "#".join(ans_groups.keys()): # 修改20240621 # end = re.search("[^#]{}-(\d+)[$#]".format(group_no), "#".join(ans_groups.keys())).group(1) # one_group["slave"] = res_list[group_no: int(end) + 1] end_info1 = re.search(r"#{}-(\d+)($|#)".format(group_no+1), "#".join(ans_groups.keys())) end_info2 = re.search(r"[^#]{}-(\d+)($|#)".format(group_no), "#".join(ans_groups.keys())) if end_info1: end = end_info1.group(1) one_group["slave"] = res_list[group_no: int(end)] added_nos.append(int(end)) elif end_info2: #???? end = end_info2.group(1) one_group["slave"] = res_list[group_no: int(end) + 1] added_nos.append(int(end)+1) else: one_group["slave"] = [] else: endp = [m for m, j in enumerate(res_list[added_nos[-1]:]) if j["type"] != res_list[added_nos[-1]]["type"]] if endp: one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])] added_nos.append(endp[0] + len(res_list[:added_nos[-1]])) else: one_group["slave"] = res_list[group_no:] else: added_nos.append(int(end)) one_group["slave"] = res_list[int(st) - 1:int(end)] if int(st) > added_nos[-1] + 1: new_res_dict.extend(res_list[added_nos[-1]:int(st) - 1]) # 2024.9.10补充 if n+1 < len(start_no) and int(end) < start_no[n+1]: # 中间单独的题目 alone_item_nos.append([int(end), start_no[n + 1]]) added_nos.append(start_no[n + 1]) else: # salve范围不知道时 # print("yyy:", group_no, start_no, groups_data) if group_no != start_no[-1]: # 不是最后一个 if groups_data[group_no] == "fei": # 可能存在多个fei if n - 1 in fei_no: new_res_dict.append(res_list[fei_no[n-1]+1: group_no+1]) else: new_res_dict.append(res_list[group_no]) added_nos.append(group_no+1) fei_no[n] = group_no continue elif "#" + str(group_no + 1) + "-" in "#" + "#".join(ans_groups.keys()): # 以答案的序号为准 aa = ("#" + "#".join(ans_groups.keys())).split("#{}-".format(group_no + 1)) end = aa[-1].split("#", maxsplit=1)[0] one_group["slave"] = res_list[group_no: int(end)] added_nos.append(int(end)) if int(end) < start_no[n+1]: # 中间单独的题目 alone_item_nos.append([int(end), start_no[n + 1]]) added_nos.append(start_no[n + 1]) # new_res_dict.extend(res_list[int(end)+1:]) else: one_group["slave"] = res_list[group_no: start_no[n+1]] added_nos.append(start_no[n+1]) else: if groups_data[group_no] == "fei": continue elif "#{}-".format(group_no + 1) in "#" + "#".join(ans_groups.keys()): # 以答案的序号为准 aa = ("#" + "#".join(ans_groups.keys())).split("#{}-".format(group_no + 1)) end = aa[-1].split("#", maxsplit=1)[-1] one_group["slave"] = res_list[group_no: int(end)] added_nos.append(int(end)) else: # 只根据题型来判断截止点,不靠谱,先按作答类型 endp = [] if added_nos: endp = [m for m, j in enumerate(res_list[added_nos[-1]:]) if "answer_type" in j and "作文" in j["answer_type"] and j["answer_type"] != res_list[added_nos[-1]]["answer_type"]] if not endp or (endp[0] <= 1 and len(res_list[added_nos[-1]:]) > 6) or endp[0] > 6: # 默认最多小题是6个 endp = [m for m, j in enumerate(res_list[added_nos[-1]:]) if "answer_type" in j and "answer_type" in res_list[added_nos[-1]] and j["answer_type"] != res_list[added_nos[-1]]["answer_type"]] # print("endp:", endp, ) if endp and endp[0] > 1: one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])] added_nos.append(endp[0] + len(res_list[:added_nos[-1]])) # new_res_dict.extend(res_list[added_nos[-1]:]) else: one_group["slave"] = res_list[group_no:] added_nos.append(len(res_list)) one_group["type"] = one_group["slave"][0]["type"] if one_group["slave"] else "" one_group["que_num"] = len(one_group["slave"]) if one_group["slave"]: if one_group["slave"][-1]["topic_num"] != one_group["slave"][0]["topic_num"]: one_group["topic_num"] = "{}-{}".format(one_group["slave"][0]["topic_num"], one_group["slave"][-1]["topic_num"]) else: one_group["topic_num"] = one_group["slave"][0]["topic_num"] else: one_group["topic_num"] = "" # ---------小题答案拆分---------- one_group = ans_regroup(ans_start_no, one_group, ans_groups) if "key" not in one_group: one_group["key"] = "" if "parse" not in one_group: one_group["parse"] = "" new_res_dict.append(one_group) if alone_item_nos: # 把中间单独的不参与重组的题目也加上 for alone_no in alone_item_nos: new_res_dict.extend(res_list[alone_no[0]: alone_no[1]]) alone_item_nos = [] one_group = {} # 把末尾不参与重组的题目也加上 if added_nos[-1] < len(res_list): new_res_dict.extend(res_list[added_nos[-1]:]) # ---------------------题目重组end---------------------------------- if not_contained_no: # 答案不参与重组的题号 for one_no in not_contained_no: for idx, one_res in enumerate(new_res_dict): if one_no+1 == one_res["topic_num"]: parse_split2group(one_res) # ------对重组后的每个大题更新题型结构,并对公共题文初步添加缩进------- for one_res in new_res_dict: if "com_stem" in one_res: # 公共题文中暂不考虑填空个数 # 添加缩进属性

、居中属性

# new_com_stem = suojin(one_res["com_stem"]) new_com_stem = one_res["com_stem"] new_com_stem = re.sub(r"(完成|回答)下?[面列]?的?第?(\d{1,2})[-到至第~~-]+?(\d{1,2})小?题", r"\1下面小题", new_com_stem) new_com_stem = new_com_stem.replace(" ", "  ") # 允许手动调整的空格保留 # 字体设置:带缩进的行均设置为楷体,其他是宋体 new_com_stem = re.sub(r'(

\n*|

\n*' r'|

\n*)([^\n]+?)', r'\1\2', new_com_stem) one_res["stem"] = new_com_stem + "\n" + one_res["stem"] if "stem" in one_res else new_com_stem del one_res["com_stem"] elif "slave" in one_res and one_res["slave"] and "stem" in one_res: # 带小问的试题题文也设置一下字体 lw_com_stem = re.sub(r'(

\n*|

\n*' r'|

\n*)([^\n]+?)', r'\1\2', one_res["stem"]) one_res["stem"] = lw_com_stem one_res["topic_num"] = str(one_res["topic_num"]) if "slave" in one_res: one_res['type'] = '小题多问类' elif "options" in one_res: one_res['type'] = '选择类' else: one_res['type'] = '解答类' if re.search("(阅读|针对|结合).{,4}[资材]料|(\n|^)\s*材料一\s", one_res['stem']): one_res["stem"] = re.sub(r'(

\n*|

\n*' r'|

\n*)([^\n]+?)', r'\1\2', one_res["stem"]) # ind_label = '

' # if "【范文】" in one_res['key']: # "写作" # anss = re.split("\n+", one_res['key']) # ids = [n for n, a in enumerate(anss) if "【范文】" in a][0] # may_title = anss[ids].replace("【范文】", "").strip() # if not may_title: # ids += 1 # may_title = anss[ids].strip() # if 0 < len(may_title) < 5: # new_ans = "\n".join(anss[:ids]) + '

' + anss[ids] + "

" \ # + ind_label + ('

' + ind_label).join(anss) + "

" # else: # new_ans = ind_label + '

'.join(anss) + "

" # one_res['key'] = new_ans # elif re.search(r"(阅读|针对).{,4}[资材]料|(\n|^)\s*材料一\s", one_res['stem']) \ # and "text-indent: 2em" not in one_res['stem']: # one_res['stem'] = suojin(one_res['stem']) return new_res_dict def ans_regroup(ans_start_no, one_group, ans_groups): """ 答案重组 ans_start_no:ans_groups中的题号组 :return: """ if ans_start_no: for k in ans_start_no: if k == one_group["topic_num"]: st1, end1 = k.split("-") # 真实题号组 # --------------------------解析---------------------------- parse_list = [] if len(re.findall("【详解】", ans_groups[k]["parse"])) > 1: parse_list = re.split("【详解】", ans_groups[k]["parse"]) comm_parse, parse_list = parse_list[0], parse_list[1:] else: t_seq_no = list(range(int(st1), int(end1) + 1)) t_seq_no = list(map(str, t_seq_no)) if any([True if len(no) > 1 else False for no in t_seq_no]): parse_list = re.split(r"(?<=[】\n])\s*(" + "|".join(t_seq_no) + r")\s*[、..、]", "\n" + ans_groups[k]["parse"]) comm_parse, parse_list = parse_list[0], parse_list[1:] parse_list = [pr for idn, pr in enumerate(parse_list) if idn % 2 == 1] else: parse_list = re.split(r"(?<=[】\n])\s*[" + "".join(t_seq_no) + r"]\s*[、..、]", "\n" + ans_groups[k]["parse"]) comm_parse, parse_list = parse_list[0], parse_list[1:] if len(parse_list) > 1: if len(parse_list) == int(end1) + 1 - int(st1): if comm_parse: one_group["parse"] = comm_parse for i in range(len(parse_list)): pr = parse_list[i].strip() if i == len(parse_list) - 1 and re.search("\n\s*[【参考]*?译文\s*[】::]|\n\s*【点睛】", pr): pr, hd, one_group["parse"] = re.split("\n\s*([【参考]*?译文\s*[】::]|【点睛】)", pr, maxsplit=1) one_group["parse"] = hd + one_group["parse"] one_group["slave"][i]["parse"] = pr if "slave" in one_group["slave"][i]: # 解析再拆-->小问解析 slave_parse_list = re.split("(?<=[\s\n])[((]\s*\d{1,2}[))]", "\n" + pr) if len(slave_parse_list) - 1 == len(one_group["slave"][i]["slave"]): for pi in range(len(slave_parse_list) - 1): one_group["slave"][i]["slave"][pi]["parse"] = slave_parse_list[pi + 1].strip() one_group["slave"][i]["parse"] = slave_parse_list[0].strip() else: # 就将各题解析合在一起 one_group["parse"] = ans_groups[k]["parse"] else: one_group['parse'] = ans_groups[k]["parse"] # --------------------------答案---------------------------- ans_list = re.split("(?<=[】\n])\d{1,2}\s*[、..、]|\s{2,}\d{1,2}\s*[、..、]|(?<=[A-E])\s*\d{1,2}\s*[、..、]", "\n" + ans_groups[k]["key"])[1:] if len(ans_list) > 1: if len(ans_list) == int(end1) + 1 - int(st1): for j in range(len(ans_list)): one_group["slave"][j]["key"] = ans_list[j].strip() if "slave" in one_group["slave"][j]: # 答案再拆 slave_ans = re.sub(r"([((]\s*\d\s*[))])\s*[、..、,,::]\s*\1", r"\1", ans_list[j]) slave_ans_list = re.split("(?<=[\s\n])[((]\s*\d{1,2}[))]", "\n" + slave_ans.strip()) if len(slave_ans_list) - 1 == len(one_group["slave"][j]["slave"]): for aj in range(len(slave_ans_list) - 1): one_group["slave"][j]["slave"][aj]["key"] = slave_ans_list[aj + 1].strip() one_group["slave"][j]["key"] = slave_ans_list[0].strip() else: one_group["key"] = ans_groups[k]["key"] # ans_start_no.remove(k) # break else: one_group['key'] = ans_groups[k]["key"] # 先暂时不去掉 # for si, s in enumerate(one_group["slave"]): # if "errmsgs" in s: # del one_group["slave"][si]["errmsgs"] else: # ans_groups为空时 # 针对答案在后面且【答案】1.xx 2.xx \n【解析】1.xx 2.xx \n【答案】3.xx 4.xx \n【解析】3.xx 4.xx # 或1.xx 2.xx \n【解析】1.xx 2.xx \n 3.xx 4.xx \n【解析】3.xx 4.xx if (one_group["slave"][0]["parse"] in ["略", ""] or one_group["slave"][0]["key"] in ["略", "", "见解析"]) \ and ("-"in str(one_group["topic_num"]) and len(one_group["slave"]) > 1 and one_group["slave"][-1]["parse"].strip()): st1, end1 = one_group["topic_num"].split("-") # 真实题号组 t_seq_no = list(range(int(st1), int(end1) + 1)) t_seq_no = list(map(str, t_seq_no)) parse_list = re.split(r"(?<=[】\n])\s*(" + "|".join(t_seq_no) + r")\s*[、..、]", "\n" + one_group["slave"][-1]["parse"]) comm_parse, parse_list = parse_list[0], parse_list[1:] parse_list = [pr.strip() for idn, pr in enumerate(parse_list) if idn % 2 == 1] if len(parse_list) in [int(end1) + 1 - int(st1), int(end1) - int(st1)]: if comm_parse: one_group["parse"] = comm_parse for ni, pr in enumerate(parse_list): if ni == int(end1) - int(st1): # 最后一个 pr = re.sub("\n\s*【答案】$", "", pr) if re.search("\n\s*[【参考]*?译文\s*[】::]|\n\s*【点睛】", pr): pr, hd, one_group["parse"] = re.split("\n\s*([【参考]*?译文\s*[】::]|【点睛】)", pr, maxsplit=1) one_group["parse"] = hd + one_group["parse"] if one_group["slave"][ni]["key"] in ["略", "", "见解析"]: one_group["slave"][ni]["key"] = one_group["slave"][ni]["parse"] one_group["slave"][ni]["parse"] = pr return one_group def suojin(item_str): """ 文本缩进处理 :param item_str: :return: """ ind_label = '

' con_list = re.split("\n+", item_str.strip()) if len(con_list) > 1 and re.search("(阅读|针对).{,4}[资材]料", con_list[0]): new_con = con_list[0] + ind_label + ('

' + ind_label).join(con_list[1:]) + "

" else: new_con = ind_label + ('

' + ind_label).join(con_list) + "

" new_con = re.sub(r'

(\s*($|

)', r'\1\n\2', new_con, flags=re.S).strip() return new_con def parse_split2group(item_list): """ 有slave的题目将外层的解析拆入salve中 :return: """ # print(item_list) raw_item_list = item_list.copy() flag = 0 # print(item_list) if "com_stem" in item_list and "slave" in item_list and len(item_list["slave"]) == 1: # 嵌套 item_list = item_list["slave"][0] flag = 1 if "slave" in item_list and (item_list["key"] or item_list["parse"]) and \ any([True if not (s["key"] + s["parse"]).strip() else False for s in item_list["slave"]]): # 解析 parse_list = re.split(r"(?<=[\s\n】])[((]\s*[\dl]{1,2}\s*[))]", "\n" + item_list["parse"].strip()) if len(parse_list) - 1 == len(item_list["slave"]): for pi in range(len(parse_list) - 1): item_list["slave"][pi]["parse"] = parse_list[pi + 1].strip() item_list["parse"] = parse_list[0].strip() # 答案 ans = re.sub(r"([((]\s*\d\s*[))])\s*[、..、,,::]\s*(\1)", r"\2", item_list["key"]) ans_list = re.split("(?<=[\s\n】])[((]\s*[\dl]{1,2}\s*[))]", "\n" + ans.strip()) if len(ans_list) - 1 == len(item_list["slave"]): for aj in range(len(ans_list) - 1): item_list["slave"][aj]["key"] = ans_list[aj + 1].strip() item_list["key"] = ans_list[0].strip() # 2021-12-21 if "com_stem" in item_list: item_list["stem"] = item_list["com_stem"].strip() + "
" + item_list["stem"] \ if "stem" in item_list else item_list["com_stem"] del item_list["com_stem"] if flag: raw_item_list["slave"] = [item_list] item_list = raw_item_list return item_list def regroup_old(res_list, item_groups): """ 将多个题共用一个题干的情况进行slave重组,如地理 :param res_list: 拆分为小题后的结果 :return: """ new_res_dict = [] start_no = [i for i in item_groups.keys() if i != "pos"] if not start_no: return res_list def takefirst(elem): return int(elem.split("-")[0]) start_no.sort(key=takefirst) # 排序 print(start_no) one_group = {} added_nos = [] # 已经slave了的题号 for n, group_no in enumerate(start_no): one_group["common_stem"] = item_groups[group_no] st, end = group_no.split("-") # 真实题号组 if not added_nos: # 开始 if item_groups["pos"][n + 1] <= int(end): # 公共题文中的结束序号信息有误 one_group["slave"] = res_list[int(st) - 1:item_groups["pos"][n + 1] - 1] added_nos.append(item_groups["pos"][n + 1] - 1) else: added_nos.append(int(end)) one_group["slave"] = res_list[int(st) - 1:int(end)] elif int(st) <= added_nos[-1]: # 公共题文中的初始序号信息有误 if n + 1 < len(item_groups["pos"]): # 不是最后一组 if int(end) < item_groups["pos"][n + 1]: one_group["slave"] = res_list[added_nos[-1]:int(end)] added_nos.append(int(end)) else: # 结束序号有误,以pos为主 one_group["slave"] = res_list[added_nos[-1]:item_groups["pos"][n + 1] - 1] added_nos.append(item_groups["pos"][n + 1] - 1) else: if int(end) >= added_nos[-1]: one_group["slave"] = res_list[added_nos[-1]:int(end)] added_nos.append(int(end)) else: # end值出错 endp = [m for m, j in enumerate(res_list[added_nos[-1]:]) if j["type"] != res_list[added_nos[-1]]["type"]] if endp: one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])] added_nos.append(endp[0] + len(res_list[:added_nos[-1]])) else: added_nos.append(int(end)) one_group["slave"] = res_list[int(st) - 1:int(end)] if int(st) > added_nos[-1] + 1: new_res_dict.extend(res_list[added_nos[-1]:int(st) - 1]) one_group["type"] = one_group["slave"][0]["type"] one_group["que_num"] = len(one_group["slave"]) new_res_dict.append(one_group) one_group = {} if added_nos[-1] < len(res_list): new_res_dict.extend(res_list[added_nos[-1]:]) return new_res_dict