#!/usr/bin/env/python # -*- coding:utf-8 -*- import re from pprint import pprint def regroup(res_list, item_groups, ans_groups): """ 将多个题共用一个题干的情况进行slave重组,如地理 item_groups中的groups_data,key值表示带公共题干的试题位置,从0开始计; value值:'fei'表示本题不是小题多问; '\d-\d'表示哪几个题合成slave; ''空表示带公共题干试题开始位置,没有slave范围 例:item_groups: {'is_groups': 1, 'groups_data': {0: 'fei', 5: '', 8: '', 11: 'fei', 12: '', 15: '', 19: '20-21', 23: 'fei'}} :param res_list: :param item_groups: :param ans_groups: :return: """ new_res_dict = [] groups_data = item_groups["groups_data"] start_no = list(groups_data.keys()) start_no.sort() # 排序 def takefirst(elem): return int(elem.split("-")[0]) ans_start_no = [] if ans_groups: ans_start_no = list(ans_groups.keys()) ans_start_no.sort(key=takefirst) # 排序 contained_no = [] for ans_no in ans_start_no: st1, ed1 = ans_no.split("-") contained_no.extend(list(range(int(st1)-1, int(ed1)))) not_contained_no = set(range(len(res_list))) - set(contained_no) added_nos = [] # 已经slave了的题号 # 开始是单层题型结构时 temp_no = -1 while groups_data and groups_data[start_no[0]] == "fei": new_res_dict.append(res_list[start_no[0]]) temp_no = start_no[0] del start_no[0] if start_no[0] > 0: new_res_dict.extend(res_list[temp_no+1:start_no[0]]) one_group = {} alone_item_nos = [] print("start_no:", start_no) for n, group_no in enumerate(start_no): if "com_stem" not in res_list[group_no]: # 不带"com_stem" if group_no == start_no[-1] and groups_data[group_no] == "fei": # 最后一个不管 continue new_res_dict.append(res_list[group_no]) added_nos.append(group_no+1) continue # 遇到带"com_stem"的试题 one_group["com_stem"] = res_list[group_no]["com_stem"] del res_list[group_no]["com_stem"] if "-" in groups_data[group_no]: # slave范围知道时 st, end = groups_data[group_no].split("-") if not added_nos: # 开始 if n + 1 < len(start_no) and start_no[n + 1] <= int(end): # 公共题文中的结束序号信息有误,以下一个题的key值为主 one_group["slave"] = res_list[int(st) - 1: start_no[n + 1]] added_nos.append(start_no[n + 1]) else: added_nos.append(int(end)) if len(res_list) < int(end): st = int(st) - int(res_list[0]["topic_num"]) + 1 end = int(end) - int(res_list[0]["topic_num"]) + 1 one_group["slave"] = res_list[int(st) - 1:int(end)] elif int(st) <= added_nos[-1]: # 公共题文中的初始序号信息有误 if n + 1 < len(start_no): # 不是最后一组 if int(end) < start_no[n + 1]: one_group["slave"] = res_list[added_nos[-1]:int(end)] added_nos.append(int(end)) else: # 结束序号有误, one_group["slave"] = res_list[added_nos[-1]: start_no[n + 1]] added_nos.append(start_no[n + 1]) else: if int(end) >= added_nos[-1]: one_group["slave"] = res_list[added_nos[-1]:int(end)] added_nos.append(int(end)) else: # end值出错 if str(group_no+1) + "-" in "#".join(ans_groups.keys()): end = re.search("[^#]{}-(\d+)[$#]".format(group_no), "#".join(ans_groups.keys())).group(1) one_group["slave"] = res_list[group_no: int(end) + 1] else: endp = [m for m, j in enumerate(res_list[added_nos[-1]:]) if j["type"] != res_list[added_nos[-1]]["type"]] if endp: one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])] added_nos.append(endp[0] + len(res_list[:added_nos[-1]])) else: one_group["slave"] = res_list[group_no:] else: added_nos.append(int(end)) one_group["slave"] = res_list[int(st) - 1:int(end)] if int(st) > added_nos[-1] + 1: new_res_dict.extend(res_list[added_nos[-1]:int(st) - 1]) else: # salve范围不知道时 if group_no != start_no[-1]: # 不是最后一个 # print("yyy:", group_no, start_no, groups_data) if groups_data[group_no] == "fei": new_res_dict.append(res_list[group_no]) added_nos.append(group_no) continue elif "#" + str(group_no + 1) + "-" in "#" + "#".join(ans_groups.keys()): # 以答案的序号为准 aa = ("#" + "#".join(ans_groups.keys())).split("#{}-".format(group_no + 1)) end = aa[-1].split("#", maxsplit=1)[0] one_group["slave"] = res_list[group_no: int(end)] added_nos.append(int(end)) if int(end) < start_no[n+1]: # 中间单独的题目 alone_item_nos.append([int(end), start_no[n + 1]]) added_nos.append(start_no[n + 1]) # new_res_dict.extend(res_list[int(end)+1:]) else: one_group["slave"] = res_list[group_no: start_no[n+1]] added_nos.append(start_no[n+1]) else: if groups_data[group_no] == "fei": continue elif str(group_no + 1) + "-" in "#".join(ans_groups.keys()): # 以答案的序号为准 aa = ("#" + "#".join(ans_groups.keys())).split("#{}-".format(group_no + 1)) end = aa[-1].split("#", maxsplit=1)[-1] one_group["slave"] = res_list[group_no: int(end)] added_nos.append(int(end)) else: endp = [m for m, j in enumerate(res_list[added_nos[-1]:]) if j["type"] != res_list[added_nos[-1]]["type"]] if added_nos else [] if endp: one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])] added_nos.append(endp[0] + len(res_list[:added_nos[-1]])) # new_res_dict.extend(res_list[added_nos[-1]:]) else: one_group["slave"] = res_list[group_no:] added_nos.append(len(res_list)) one_group["type"] = one_group["slave"][0]["type"] if one_group["slave"] else "" one_group["que_num"] = len(one_group["slave"]) if one_group["slave"]: if one_group["slave"][-1]["topic_num"] != one_group["slave"][0]["topic_num"]: one_group["topic_num"] = "{}-{}".format(one_group["slave"][0]["topic_num"], one_group["slave"][-1]["topic_num"]) else: one_group["topic_num"] = one_group["slave"][0]["topic_num"] else: one_group["topic_num"] = "" if ans_start_no: for k in ans_start_no: if k == one_group["topic_num"]: st1, end1 = k.split("-") # 真实题号组 parse_list = [] if len(re.findall("【详解】", ans_groups[k]["parse"])) > 1: parse_list = re.split("【详解】", ans_groups[k]["parse"])[1:] else: t_seq_no = list(range(int(st1), int(end1)+1)) t_seq_no = list(map(str, t_seq_no)) if any([True if len(no) > 1 else False for no in t_seq_no]): parse_list = re.split(r"(?<=[】\n])\s*(" + "|".join(t_seq_no) + r")\s*[、..、]", "\n" + ans_groups[k]["parse"])[1:] parse_list = [pr for idn, pr in enumerate(parse_list) if idn % 2 == 1] else: parse_list = re.split(r"(?<=[】\n])\s*["+"".join(t_seq_no)+r"]\s*[、..、]", "\n"+ans_groups[k]["parse"])[1:] if len(parse_list) > 1: ans_list = re.split("(?<=[】\s])\d{1,2}\s*[、..、]|^\d{1,2}\s*[、..、]", ans_groups[k]["key"])[1:] if len(parse_list) == int(end1)+1 - int(st1): for i in range(len(parse_list)): pr = parse_list[i].strip() if i == len(parse_list) - 1 and re.search("\n\s*[【参考]*?译文\s*[】::]", pr): pr, hd, one_group["parse"] = re.split("\n\s*([【参考]*?译文\s*[】::])", pr) one_group["parse"] = hd + one_group["parse"] one_group["slave"][i]["parse"] = pr if "本题缺少答案和解析" in one_group["slave"][i]["errmsgs"]: one_group["slave"][i]["errmsgs"] = one_group["slave"][i]["errmsgs"]\ .replace("本题缺少答案和解析", "") if "slave" in one_group["slave"][i]: # 解析再拆 slave_parse_list = re.split("(?<=[\s\n])[((]\s*\d{1,2}[))]", "\n" + parse_list[i].strip()) if len(slave_parse_list)-1 == len(one_group["slave"][i]["slave"]): for pi in range(len(slave_parse_list)-1): one_group["slave"][i]["slave"][pi]["parse"] = slave_parse_list[pi+1].strip() one_group["slave"][i]["parse"] = slave_parse_list[0].strip() else: # 就将各题解析合在一起 one_group["parse"] = ans_groups[k]["parse"] if len(ans_list) == int(end1)+1 - int(st1): for j in range(len(ans_list)): one_group["slave"][j]["key"] = ans_list[j].strip() if "本题缺少答案和解析" in one_group["slave"][j]["errmsgs"]: one_group["slave"][j]["errmsgs"] = one_group["slave"][j]["errmsgs"]\ .replace("本题缺少答案和解析", "") if "slave" in one_group["slave"][j]: # 答案再拆 slave_ans = re.sub(r"([((]\s*\d\s*[))])\s*[、..、,,::]\s*\1", r"\1", ans_list[j]) slave_ans_list = re.split("(?<=[\s\n])[((]\s*\d{1,2}[))]", "\n" + slave_ans.strip()) if len(slave_ans_list) - 1 == len(one_group["slave"][j]["slave"]): for aj in range(len(slave_ans_list)-1): one_group["slave"][j]["slave"][aj]["key"] = slave_ans_list[aj+1].strip() one_group["slave"][j]["key"] = slave_ans_list[0].strip() else: one_group["key"] = ans_groups[k]["key"] ans_start_no.remove(k) break else: one_group['key'] = ans_groups[k]["key"] one_group['parse'] = ans_groups[k]["parse"] for si, s in enumerate(one_group["slave"]): if "本题缺少答案和解析" in s["errmsgs"]: one_group["slave"][si]["errmsgs"] = s["errmsgs"].replace("本题缺少答案和解析", "") else: # 针对答案在后面且【答案】1.xx 2.xx \n【解析】1.xx 2.xx \n【答案】3.xx 4.xx \n【解析】3.xx 4.xx if one_group["slave"][0]["parse"] in ["略", ""] and one_group["slave"][-1]["parse"]: st1, end1 = one_group["topic_num"].split("-") # 真实题号组 t_seq_no = list(range(int(st1), int(end1) + 1)) t_seq_no = list(map(str, t_seq_no)) parse_list = re.split(r"(?<=[】\n])\s*(" + "|".join(t_seq_no) + r")\s*[、..、]", "\n" + one_group["slave"][-1]["parse"])[1:] parse_list = [pr.strip() for idn, pr in enumerate(parse_list) if idn % 2 == 1] if len(parse_list) == int(end1) + 1 - int(st1): for ni, pr in enumerate(parse_list): if ni == int(end1) - int(st1): pr = re.sub("\n\s*【答案】$", "", pr) if re.search("\n\s*[【参考]*?译文\s*[】::]", pr): pr, hd, one_group["parse"] = re.split("\n\s*[【参考]*?译文\s*[】::]", pr) one_group["parse"] = hd + one_group["parse"] one_group["slave"][ni]["parse"] = pr new_res_dict.append(one_group) if alone_item_nos: for alone_no in alone_item_nos: new_res_dict.extend(res_list[alone_no[0]: alone_no[1]]) alone_item_nos = [] one_group = {} if added_nos[-1] < len(res_list): new_res_dict.extend(res_list[added_nos[-1]:]) if not_contained_no: for one_no in not_contained_no: for idx, one_res in enumerate(new_res_dict): if one_no+1 == one_res["topic_num"]: one_res = parse_split2group(one_res) # if "slave" in one_res: # print(one_res) for one_res in new_res_dict: if "com_stem" in one_res: # 公共题文中暂不考虑填空个数 # 添加缩进属性

、居中属性

new_com_stem = suojin(one_res["com_stem"]) new_com_stem = new_com_stem.replace(" ", "  ") # 允许手动调整的空格保留 one_res["stem"] = new_com_stem + "\n" + one_res["stem"] if "stem" in one_res else new_com_stem del one_res["com_stem"] elif "slave" in one_res: new_stem = suojin(one_res["stem"]) one_res["stem"] = new_stem one_res["topic_num"] = str(one_res["topic_num"]) if "slave" in one_res: one_res['type'] = '小题多问类' elif "options" in one_res: one_res['type'] = '选择类' else: one_res['type'] = '解答类' ind_label = '

' if "【范文】" in one_res['key']: # "写作" anss = re.split("\n+", one_res['key']) ids = [n for n, a in enumerate(anss) if "【范文】" in a][0] may_title = anss[ids].replace("【范文】", "").strip() if not may_title: ids += 1 may_title = anss[ids].strip() if 0 < len(may_title) < 5: new_ans = "\n".join(anss[:ids]) + '

' + anss[ids] + "

" \ + ind_label + ('

' + ind_label).join(anss) + "

" else: new_ans = ind_label + '

'.join(anss) + "

" one_res['key'] = new_ans elif re.search(r"(阅读|针对).{,4}[资材]料|(\n|^)\s*材料一\s", one_res['stem']) \ and "text-indent: 2em" not in one_res['stem']: one_res['stem'] = suojin(one_res['stem']) return new_res_dict def suojin(item_str): """ 文本缩进处理 :param item_str: :return: """ ind_label = '

' con_list = re.split("\n+", item_str.strip()) if len(con_list) > 1 and re.search("(阅读|针对).{,4}[资材]料", con_list[0]): new_con = con_list[0] + ind_label + ('

' + ind_label).join(con_list[1:]) + "

" else: new_con = ind_label + ('

' + ind_label).join(con_list) + "

" new_con = re.sub(r'

(\s*($|

)', r'\1\n\2', new_con, flags=re.S).strip() return new_con def parse_split2group(item_list): """ 有slave的题目将外层的解析拆入salve中 :return: """ # print(item_list) raw_item_list = item_list.copy() flag = 0 # print(item_list) if "com_stem" in item_list and "slave" in item_list and len(item_list["slave"]) == 1: # 嵌套 item_list = item_list["slave"][0] flag = 1 if "slave" in item_list and (item_list["key"] or item_list["parse"]) and \ any([True if not (s["key"] + s["parse"]).strip() else False for s in item_list["slave"]]): # 解析 parse_list = re.split(r"(?<=[\s\n】])[((]\s*[\dl]{1,2}\s*[))]", "\n" + item_list["parse"].strip()) if len(parse_list) - 1 == len(item_list["slave"]): for pi in range(len(parse_list) - 1): item_list["slave"][pi]["parse"] = parse_list[pi + 1].strip() item_list["parse"] = parse_list[0].strip() # 答案 ans = re.sub(r"([((]\s*\d\s*[))])\s*[、..、,,::]\s*(\1)", r"\2", item_list["key"]) ans_list = re.split("(?<=[\s\n】])[((]\s*[\dl]{1,2}\s*[))]", "\n" + ans.strip()) if len(ans_list) - 1 == len(item_list["slave"]): for aj in range(len(ans_list) - 1): item_list["slave"][aj]["key"] = ans_list[aj + 1].strip() item_list["key"] = ans_list[0].strip() # 2021-12-21 if "com_stem" in item_list: item_list["stem"] = item_list["com_stem"].strip() + "
" + item_list["stem"] \ if "stem" in item_list else item_list["com_stem"] del item_list["com_stem"] if flag: raw_item_list["slave"] = [item_list] item_list = raw_item_list return item_list def regroup_old(res_list, item_groups): """ 将多个题共用一个题干的情况进行slave重组,如地理 :param res_list: 拆分为小题后的结果 :return: """ new_res_dict = [] start_no = [i for i in item_groups.keys() if i != "pos"] if not start_no: return res_list def takefirst(elem): return int(elem.split("-")[0]) start_no.sort(key=takefirst) # 排序 print(start_no) one_group = {} added_nos = [] # 已经slave了的题号 for n, group_no in enumerate(start_no): one_group["common_stem"] = item_groups[group_no] st, end = group_no.split("-") # 真实题号组 if not added_nos: # 开始 if item_groups["pos"][n + 1] <= int(end): # 公共题文中的结束序号信息有误 one_group["slave"] = res_list[int(st) - 1:item_groups["pos"][n + 1] - 1] added_nos.append(item_groups["pos"][n + 1] - 1) else: added_nos.append(int(end)) one_group["slave"] = res_list[int(st) - 1:int(end)] elif int(st) <= added_nos[-1]: # 公共题文中的初始序号信息有误 if n + 1 < len(item_groups["pos"]): # 不是最后一组 if int(end) < item_groups["pos"][n + 1]: one_group["slave"] = res_list[added_nos[-1]:int(end)] added_nos.append(int(end)) else: # 结束序号有误,以pos为主 one_group["slave"] = res_list[added_nos[-1]:item_groups["pos"][n + 1] - 1] added_nos.append(item_groups["pos"][n + 1] - 1) else: if int(end) >= added_nos[-1]: one_group["slave"] = res_list[added_nos[-1]:int(end)] added_nos.append(int(end)) else: # end值出错 endp = [m for m, j in enumerate(res_list[added_nos[-1]:]) if j["type"] != res_list[added_nos[-1]]["type"]] if endp: one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])] added_nos.append(endp[0] + len(res_list[:added_nos[-1]])) else: added_nos.append(int(end)) one_group["slave"] = res_list[int(st) - 1:int(end)] if int(st) > added_nos[-1] + 1: new_res_dict.extend(res_list[added_nos[-1]:int(st) - 1]) one_group["type"] = one_group["slave"][0]["type"] one_group["que_num"] = len(one_group["slave"]) new_res_dict.append(one_group) one_group = {} if added_nos[-1] < len(res_list): new_res_dict.extend(res_list[added_nos[-1]:]) return new_res_dict