123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400 |
- #!/usr/bin/env/python
- # -*- coding:utf-8 -*-
- import re
- from pprint import pprint
- def regroup(res_list, item_groups, ans_groups):
- """
- 将多个题共用一个题干的情况进行slave重组,如地理
- item_groups中的groups_data,key值表示带公共题干的试题位置,从0开始计;
- value值:'fei'表示本题不是小题多问;
- '\d-\d'表示哪几个题合成slave;
- ''空表示带公共题干试题开始位置,没有slave范围
- 例:item_groups: {'is_groups': 1, 'groups_data':
- {0: 'fei', 5: '', 8: '', 11: 'fei', 12: '', 15: '', 19: '20-21', 23: 'fei'}}
- :param res_list:
- :param item_groups:
- :param ans_groups:
- :return:
- """
- new_res_dict = []
- groups_data = item_groups["groups_data"]
- start_no = list(groups_data.keys())
- start_no.sort() # 排序
- def takefirst(elem):
- return int(elem.split("-")[0])
- ans_start_no = []
- if ans_groups:
- ans_start_no = list(ans_groups.keys())
- ans_start_no.sort(key=takefirst) # 排序
- contained_no = []
- for ans_no in ans_start_no:
- st1, ed1 = ans_no.split("-")
- contained_no.extend(list(range(int(st1)-1, int(ed1))))
- not_contained_no = set(range(len(res_list))) - set(contained_no)
- added_nos = [] # 已经slave了的题号
- # 开始是单层题型结构时
- temp_no = -1
- while groups_data and groups_data[start_no[0]] == "fei":
- new_res_dict.append(res_list[start_no[0]])
- temp_no = start_no[0]
- del start_no[0]
- if start_no[0] > 0:
- new_res_dict.extend(res_list[temp_no+1:start_no[0]])
- one_group = {}
- alone_item_nos = []
- print("start_no:", start_no)
- for n, group_no in enumerate(start_no):
- if "com_stem" not in res_list[group_no]: # 不带"com_stem"
- if group_no == start_no[-1] and groups_data[group_no] == "fei": # 最后一个不管
- continue
- new_res_dict.append(res_list[group_no])
- added_nos.append(group_no+1)
- continue
- # 遇到带"com_stem"的试题
- one_group["com_stem"] = res_list[group_no]["com_stem"]
- del res_list[group_no]["com_stem"]
- if "-" in groups_data[group_no]: # slave范围知道时
- st, end = groups_data[group_no].split("-")
- if not added_nos: # 开始
- if n + 1 < len(start_no) and start_no[n + 1] <= int(end): # 公共题文中的结束序号信息有误,以下一个题的key值为主
- one_group["slave"] = res_list[int(st) - 1: start_no[n + 1]]
- added_nos.append(start_no[n + 1])
- else:
- added_nos.append(int(end))
- if len(res_list) < int(end):
- st = int(st) - int(res_list[0]["topic_num"]) + 1
- end = int(end) - int(res_list[0]["topic_num"]) + 1
- one_group["slave"] = res_list[int(st) - 1:int(end)]
- elif int(st) <= added_nos[-1]: # 公共题文中的初始序号信息有误
- if n + 1 < len(start_no): # 不是最后一组
- if int(end) < start_no[n + 1]:
- one_group["slave"] = res_list[added_nos[-1]:int(end)]
- added_nos.append(int(end))
- else: # 结束序号有误,
- one_group["slave"] = res_list[added_nos[-1]: start_no[n + 1]]
- added_nos.append(start_no[n + 1])
- else:
- if int(end) >= added_nos[-1]:
- one_group["slave"] = res_list[added_nos[-1]:int(end)]
- added_nos.append(int(end))
- else: # end值出错
- if str(group_no+1) + "-" in "#".join(ans_groups.keys()):
- end = re.search("[^#]{}-(\d+)[$#]".format(group_no), "#".join(ans_groups.keys())).group(1)
- one_group["slave"] = res_list[group_no: int(end) + 1]
- else:
- endp = [m for m, j in enumerate(res_list[added_nos[-1]:])
- if j["type"] != res_list[added_nos[-1]]["type"]]
- if endp:
- one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])]
- added_nos.append(endp[0] + len(res_list[:added_nos[-1]]))
- else:
- one_group["slave"] = res_list[group_no:]
- else:
- added_nos.append(int(end))
- one_group["slave"] = res_list[int(st) - 1:int(end)]
- if int(st) > added_nos[-1] + 1:
- new_res_dict.extend(res_list[added_nos[-1]:int(st) - 1])
- else: # salve范围不知道时
- if group_no != start_no[-1]: # 不是最后一个
- # print("yyy:", group_no, start_no, groups_data)
- if groups_data[group_no] == "fei":
- new_res_dict.append(res_list[group_no])
- added_nos.append(group_no)
- continue
- elif "#" + str(group_no + 1) + "-" in "#" + "#".join(ans_groups.keys()): # 以答案的序号为准
- aa = ("#" + "#".join(ans_groups.keys())).split("#{}-".format(group_no + 1))
- end = aa[-1].split("#", maxsplit=1)[0]
- one_group["slave"] = res_list[group_no: int(end)]
- added_nos.append(int(end))
- if int(end) < start_no[n+1]: # 中间单独的题目
- alone_item_nos.append([int(end), start_no[n + 1]])
- added_nos.append(start_no[n + 1])
- # new_res_dict.extend(res_list[int(end)+1:])
- else:
- one_group["slave"] = res_list[group_no: start_no[n+1]]
- added_nos.append(start_no[n+1])
- else:
- if groups_data[group_no] == "fei":
- continue
- elif str(group_no + 1) + "-" in "#".join(ans_groups.keys()): # 以答案的序号为准
- aa = ("#" + "#".join(ans_groups.keys())).split("#{}-".format(group_no + 1))
- end = aa[-1].split("#", maxsplit=1)[-1]
- one_group["slave"] = res_list[group_no: int(end)]
- added_nos.append(int(end))
- else:
- endp = [m for m, j in enumerate(res_list[added_nos[-1]:])
- if j["type"] != res_list[added_nos[-1]]["type"]] if added_nos else []
- if endp:
- one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])]
- added_nos.append(endp[0] + len(res_list[:added_nos[-1]]))
- # new_res_dict.extend(res_list[added_nos[-1]:])
- else:
- one_group["slave"] = res_list[group_no:]
- added_nos.append(len(res_list))
- one_group["type"] = one_group["slave"][0]["type"] if one_group["slave"] else ""
- one_group["que_num"] = len(one_group["slave"])
- if one_group["slave"]:
- if one_group["slave"][-1]["topic_num"] != one_group["slave"][0]["topic_num"]:
- one_group["topic_num"] = "{}-{}".format(one_group["slave"][0]["topic_num"], one_group["slave"][-1]["topic_num"])
- else:
- one_group["topic_num"] = one_group["slave"][0]["topic_num"]
- else:
- one_group["topic_num"] = ""
- if ans_start_no:
- for k in ans_start_no:
- if k == one_group["topic_num"]:
- st1, end1 = k.split("-") # 真实题号组
- parse_list = []
- if len(re.findall("【详解】", ans_groups[k]["parse"])) > 1:
- parse_list = re.split("【详解】", ans_groups[k]["parse"])[1:]
- else:
- t_seq_no = list(range(int(st1), int(end1)+1))
- t_seq_no = list(map(str, t_seq_no))
- if any([True if len(no) > 1 else False for no in t_seq_no]):
- parse_list = re.split(r"(?<=[】\n])\s*(" + "|".join(t_seq_no) + r")\s*[、..、]",
- "\n" + ans_groups[k]["parse"])[1:]
- parse_list = [pr for idn, pr in enumerate(parse_list) if idn % 2 == 1]
- else:
- parse_list = re.split(r"(?<=[】\n])\s*["+"".join(t_seq_no)+r"]\s*[、..、]",
- "\n"+ans_groups[k]["parse"])[1:]
- if len(parse_list) > 1:
- ans_list = re.split("(?<=[】\s])\d{1,2}\s*[、..、]|^\d{1,2}\s*[、..、]", ans_groups[k]["key"])[1:]
- if len(parse_list) == int(end1)+1 - int(st1):
- for i in range(len(parse_list)):
- pr = parse_list[i].strip()
- if i == len(parse_list) - 1 and re.search("\n\s*[【参考]*?译文\s*[】::]", pr):
- pr, hd, one_group["parse"] = re.split("\n\s*([【参考]*?译文\s*[】::])", pr)
- one_group["parse"] = hd + one_group["parse"]
- one_group["slave"][i]["parse"] = pr
- if "本题缺少答案和解析" in one_group["slave"][i]["errmsgs"]:
- one_group["slave"][i]["errmsgs"] = one_group["slave"][i]["errmsgs"]\
- .replace("本题缺少答案和解析", "")
- if "slave" in one_group["slave"][i]: # 解析再拆
- slave_parse_list = re.split("(?<=[\s\n])[((]\s*\d{1,2}[))]", "\n" + parse_list[i].strip())
- if len(slave_parse_list)-1 == len(one_group["slave"][i]["slave"]):
- for pi in range(len(slave_parse_list)-1):
- one_group["slave"][i]["slave"][pi]["parse"] = slave_parse_list[pi+1].strip()
- one_group["slave"][i]["parse"] = slave_parse_list[0].strip()
- else:
- # 就将各题解析合在一起
- one_group["parse"] = ans_groups[k]["parse"]
- if len(ans_list) == int(end1)+1 - int(st1):
- for j in range(len(ans_list)):
- one_group["slave"][j]["key"] = ans_list[j].strip()
- if "本题缺少答案和解析" in one_group["slave"][j]["errmsgs"]:
- one_group["slave"][j]["errmsgs"] = one_group["slave"][j]["errmsgs"]\
- .replace("本题缺少答案和解析", "")
- if "slave" in one_group["slave"][j]: # 答案再拆
- slave_ans = re.sub(r"([((]\s*\d\s*[))])\s*[、..、,,::]\s*\1", r"\1", ans_list[j])
- slave_ans_list = re.split("(?<=[\s\n])[((]\s*\d{1,2}[))]", "\n" + slave_ans.strip())
- if len(slave_ans_list) - 1 == len(one_group["slave"][j]["slave"]):
- for aj in range(len(slave_ans_list)-1):
- one_group["slave"][j]["slave"][aj]["key"] = slave_ans_list[aj+1].strip()
- one_group["slave"][j]["key"] = slave_ans_list[0].strip()
- else:
- one_group["key"] = ans_groups[k]["key"]
- ans_start_no.remove(k)
- break
- else:
- one_group['key'] = ans_groups[k]["key"]
- one_group['parse'] = ans_groups[k]["parse"]
- for si, s in enumerate(one_group["slave"]):
- if "本题缺少答案和解析" in s["errmsgs"]:
- one_group["slave"][si]["errmsgs"] = s["errmsgs"].replace("本题缺少答案和解析", "")
- else:
- # 针对答案在后面且【答案】1.xx 2.xx \n【解析】1.xx 2.xx \n【答案】3.xx 4.xx \n【解析】3.xx 4.xx
- if one_group["slave"][0]["parse"] in ["略", ""] and one_group["slave"][-1]["parse"]:
- st1, end1 = one_group["topic_num"].split("-") # 真实题号组
- t_seq_no = list(range(int(st1), int(end1) + 1))
- t_seq_no = list(map(str, t_seq_no))
- parse_list = re.split(r"(?<=[】\n])\s*(" + "|".join(t_seq_no) + r")\s*[、..、]",
- "\n" + one_group["slave"][-1]["parse"])[1:]
- parse_list = [pr.strip() for idn, pr in enumerate(parse_list) if idn % 2 == 1]
- if len(parse_list) == int(end1) + 1 - int(st1):
- for ni, pr in enumerate(parse_list):
- if ni == int(end1) - int(st1):
- pr = re.sub("\n\s*【答案】$", "", pr)
- if re.search("\n\s*[【参考]*?译文\s*[】::]", pr):
- pr, hd, one_group["parse"] = re.split("\n\s*[【参考]*?译文\s*[】::]", pr)
- one_group["parse"] = hd + one_group["parse"]
- one_group["slave"][ni]["parse"] = pr
- new_res_dict.append(one_group)
- if alone_item_nos:
- for alone_no in alone_item_nos:
- new_res_dict.extend(res_list[alone_no[0]: alone_no[1]])
- alone_item_nos = []
- one_group = {}
- if added_nos[-1] < len(res_list):
- new_res_dict.extend(res_list[added_nos[-1]:])
- if not_contained_no:
- for one_no in not_contained_no:
- for idx, one_res in enumerate(new_res_dict):
- if one_no+1 == one_res["topic_num"]:
- one_res = parse_split2group(one_res)
- # if "slave" in one_res:
- # print(one_res)
- for one_res in new_res_dict:
- if "com_stem" in one_res: # 公共题文中暂不考虑填空个数
- # 添加缩进属性<p style="text-indent: 2em">、居中属性<p style="text-align:center">
- new_com_stem = suojin(one_res["com_stem"])
- new_com_stem = new_com_stem.replace(" ", " ") # 允许手动调整的空格保留
- one_res["stem"] = new_com_stem + "\n" + one_res["stem"] if "stem" in one_res else new_com_stem
- del one_res["com_stem"]
- elif "slave" in one_res:
- new_stem = suojin(one_res["stem"])
- one_res["stem"] = new_stem
- one_res["topic_num"] = str(one_res["topic_num"])
- if "slave" in one_res:
- one_res['type'] = '小题多问类'
- elif "options" in one_res:
- one_res['type'] = '选择类'
- else:
- one_res['type'] = '解答类'
- ind_label = '<p style="text-indent: 2em">'
- if "【范文】" in one_res['key']: # "写作"
- anss = re.split("\n+", one_res['key'])
- ids = [n for n, a in enumerate(anss) if "【范文】" in a][0]
- may_title = anss[ids].replace("【范文】", "").strip()
- if not may_title:
- ids += 1
- may_title = anss[ids].strip()
- if 0 < len(may_title) < 5:
- new_ans = "\n".join(anss[:ids]) + '<p style="text-align:center">' + anss[ids] + "</p>" \
- + ind_label + ('</p>' + ind_label).join(anss) + "</p>"
- else:
- new_ans = ind_label + '</p><p style="text-indent: 2em">'.join(anss) + "</p>"
- one_res['key'] = new_ans
- elif re.search(r"(阅读|针对).{,4}[资材]料|(\n|^)\s*材料一\s", one_res['stem']) \
- and "text-indent: 2em" not in one_res['stem']:
- one_res['stem'] = suojin(one_res['stem'])
- return new_res_dict
- def suojin(item_str):
- """
- 文本缩进处理
- :param item_str:
- :return:
- """
- ind_label = '<p style="text-indent: 2em">'
- con_list = re.split("\n+", item_str.strip())
- if len(con_list) > 1 and re.search("(阅读|针对).{,4}[资材]料", con_list[0]):
- new_con = con_list[0] + ind_label + ('</p>' + ind_label).join(con_list[1:]) + "</p>"
- else:
- new_con = ind_label + ('</p>' + ind_label).join(con_list) + "</p>"
- new_con = re.sub(r'<p style="text-indent: 2em">(\s*<img .+?)</p>($|<p style="text-indent: 2em">)',
- r'\1\n\2', new_con, flags=re.S).strip()
- return new_con
- def parse_split2group(item_list):
- """
- 有slave的题目将外层的解析拆入salve中
- :return:
- """
- # print(item_list)
- raw_item_list = item_list.copy()
- flag = 0
- # print(item_list)
- if "com_stem" in item_list and "slave" in item_list and len(item_list["slave"]) == 1: # 嵌套
- item_list = item_list["slave"][0]
- flag = 1
- if "slave" in item_list and (item_list["key"] or item_list["parse"]) and \
- any([True if not (s["key"] + s["parse"]).strip() else False for s in item_list["slave"]]):
- # 解析
- parse_list = re.split(r"(?<=[\s\n】])[((]\s*[\dl]{1,2}\s*[))]", "\n" + item_list["parse"].strip())
- if len(parse_list) - 1 == len(item_list["slave"]):
- for pi in range(len(parse_list) - 1):
- item_list["slave"][pi]["parse"] = parse_list[pi + 1].strip()
- item_list["parse"] = parse_list[0].strip()
- # 答案
- ans = re.sub(r"([((]\s*\d\s*[))])\s*[、..、,,::]\s*(\1)", r"\2", item_list["key"])
- ans_list = re.split("(?<=[\s\n】])[((]\s*[\dl]{1,2}\s*[))]", "\n" + ans.strip())
- if len(ans_list) - 1 == len(item_list["slave"]):
- for aj in range(len(ans_list) - 1):
- item_list["slave"][aj]["key"] = ans_list[aj + 1].strip()
- item_list["key"] = ans_list[0].strip()
- # 2021-12-21
- if "com_stem" in item_list:
- item_list["stem"] = item_list["com_stem"].strip() + "<br/>" + item_list["stem"] \
- if "stem" in item_list else item_list["com_stem"]
- del item_list["com_stem"]
- if flag:
- raw_item_list["slave"] = [item_list]
- item_list = raw_item_list
- return item_list
- def regroup_old(res_list, item_groups):
- """
- 将多个题共用一个题干的情况进行slave重组,如地理
- :param res_list: 拆分为小题后的结果
- :return:
- """
- new_res_dict = []
- start_no = [i for i in item_groups.keys() if i != "pos"]
- if not start_no:
- return res_list
- def takefirst(elem):
- return int(elem.split("-")[0])
- start_no.sort(key=takefirst) # 排序
- print(start_no)
- one_group = {}
- added_nos = [] # 已经slave了的题号
- for n, group_no in enumerate(start_no):
- one_group["common_stem"] = item_groups[group_no]
- st, end = group_no.split("-") # 真实题号组
- if not added_nos: # 开始
- if item_groups["pos"][n + 1] <= int(end): # 公共题文中的结束序号信息有误
- one_group["slave"] = res_list[int(st) - 1:item_groups["pos"][n + 1] - 1]
- added_nos.append(item_groups["pos"][n + 1] - 1)
- else:
- added_nos.append(int(end))
- one_group["slave"] = res_list[int(st) - 1:int(end)]
- elif int(st) <= added_nos[-1]: # 公共题文中的初始序号信息有误
- if n + 1 < len(item_groups["pos"]): # 不是最后一组
- if int(end) < item_groups["pos"][n + 1]:
- one_group["slave"] = res_list[added_nos[-1]:int(end)]
- added_nos.append(int(end))
- else: # 结束序号有误,以pos为主
- one_group["slave"] = res_list[added_nos[-1]:item_groups["pos"][n + 1] - 1]
- added_nos.append(item_groups["pos"][n + 1] - 1)
- else:
- if int(end) >= added_nos[-1]:
- one_group["slave"] = res_list[added_nos[-1]:int(end)]
- added_nos.append(int(end))
- else: # end值出错
- endp = [m for m, j in enumerate(res_list[added_nos[-1]:])
- if j["type"] != res_list[added_nos[-1]]["type"]]
- if endp:
- one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])]
- added_nos.append(endp[0] + len(res_list[:added_nos[-1]]))
- else:
- added_nos.append(int(end))
- one_group["slave"] = res_list[int(st) - 1:int(end)]
- if int(st) > added_nos[-1] + 1:
- new_res_dict.extend(res_list[added_nos[-1]:int(st) - 1])
- one_group["type"] = one_group["slave"][0]["type"]
- one_group["que_num"] = len(one_group["slave"])
- new_res_dict.append(one_group)
- one_group = {}
- if added_nos[-1] < len(res_list):
- new_res_dict.extend(res_list[added_nos[-1]:])
- return new_res_dict
|