#!/usr/bin/env/python # -*- coding:utf-8 -*- # 本文件包含以下函数 # stem_ans_split:将切出来的一道题 按答案解析 进一步细分 # correct_wrong_no :针对分错的题号进行 纠正 或 报错 # stems_structure_byno:按题号进行切分; # dati2slave :带小问的大题 按小问切分 # split2little_con: 将带小问的填空题或解答题 按 小问 继续划分,小问已切分好 # get_options_arrange: 判断word中选项每行排版个数 import re from washutil import table_label_cleal from ans_structrue import only_parse_split, get_ans_from_parse from pprint import pprint from collections import Counter def stem_ans_split(one_item_dict, case): """ 将切出来的一道题 按 答案解析 进一步细分 :param one_item_dict: 单道题的初步结构字典{"content": , "item_id": , "errmsgs": [],"item_topic_name":,} :param case: 属于哪种情况 :return: {"content": ,"answer": ,"parse":} """ one_item = one_item_dict["content"] item_type = one_item_dict["item_topic_name"] # print(one_item) if case == 'case0': # 没“答案”关键字 inside_split = re.split(r"【(解析|解答|分析|详解|点评|点睛|考点|专题)】\n*?", table_label_cleal(one_item)) inside_split = ['【' + a + '】' if str(a).strip() in ['解答', '分析', '解析', '详解', '点评', '点睛'] else str(a).replace('None', '').strip() for a in inside_split] # print(':::', inside_split) # print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^') dd = dict(zip(["content", "parse_title"], inside_split[0:2])) dd["parse"] = str(dd["parse_title"]) + "\n".join(inside_split[2:]).replace("\n\n", "\n") dd["parse"] = re.sub(r"^\s*【解析】", "", dd["parse"]) dd["answer"] = "" else: # if case == 'case1': # 有“答案”关键字 dd = dict(zip(["content", "answer"], re.split(r"【答案】\n?", table_label_cleal(one_item), maxsplit=1))) # pprint(dd) # 一般默认‘答案’在‘解析’的前面 subdd = dict(zip(["answer", "parse_title", "parse"], re.split(r"【(解析|解答|分析|详解|点评|点睛)】\n?", dd["answer"], maxsplit=1))) dd["answer"] = subdd["answer"] if "parse_title" in subdd: dd["parse"] = "【" + subdd["parse_title"] + "】" + subdd["parse"] dd["parse"] = re.sub(r"^\s*【解析】", "", dd["parse"]) else: dd["parse"] = "" dd["content"] = re.sub(r"[1-9][0-9]?\s*[..、、]", "", dd["content"][:5]) + dd["content"][5:] # 获取答案 if not dd["answer"]: dd["answer"] = get_ans_from_parse(dd["parse"], item_type, dd["content"]) # 补充!!!------------------------------------------ # if item_type in ["单选题", "多选题", "选择题"]: # (故选[::]([A-Z;;和与、、]+)| # ans = re.search(r'故选[::]?]+?data-latex="\$?([A-Z;;和与、、\s]+)\$?".+?/>|故选[::]?([A-Z;;和与、、\s]+)', # dd["parse"].replace("$", "").replace(" ", "")) # if ans: # dd["answer"] = ans.group(1) if ans.group(1) is not None else ans.group(2) # ans.group(1) != None # else: # dd["answer"] = "" # else: # dd["answer"] = "见解析" # ans = re.search(r'故\s*[::]?\s*答案分?别?[为是]?\s*[::]?\s*(.+?)[..]\s*\n', dd["parse"]) # if ans: # dd["answer"] = ans.group(1) # ------------------------------------------------------ if "parse_title" in dd: del dd["parse_title"] return dd def stem_ans_split2(one_type_list, idx1, idx2, item_type, case): """ 将切出来的一道题 按答案解析 进一步细分 :param one_type_list: 一类题文的list :param idx1:题目开头,包含 :param idx2:下一题开头 :param item_type:题型 :param case: 属于哪种情况 :return:{"content": ,"answer": ,"parse":} """ one_item = one_type_list[idx1:idx2] if idx2 == -1: one_item = one_type_list[idx1:] if case == 'case1': # 没“答案”关键字 inside_split = re.split(r"【(解析|解答|分析|详解|点评|点睛|考点|专题)】\n*?", table_label_cleal("\n".join(one_item))) inside_split = ['【' + a + '】' if str(a).strip() in ['解答', '分析', '解析', '详解', '点评', '点睛'] else str(a).replace('None', '').strip() for a in inside_split] dd = dict(zip(["content", "parse_title"], inside_split[0:2])) dd["parse"] = str(dd["parse_title"]) + "\n".join(inside_split[2:]).replace("\n\n", "\n") else: dd = dict(zip(["content", "answer"], re.split(r"【答案】\n?|答案\s*[::]\n?", table_label_cleal("\n".join(one_item)), maxsplit=1))) subdd = dict(zip(["answer", "parse_title", "parse"], re.split(r"【(解析|解答|分析|详解|点评|点睛)】\n?|(解析|解答|分析|详解|点评|点睛)\s*[::]", dd["answer"], maxsplit=1))) dd["answer"] = subdd["answer"] if "parse_title" in subdd: dd["parse"] = "【" + subdd["parse_title"] + "】" + subdd["parse"] dd["parse"] = re.sub(r"^\s*【解析】", "", dd["parse"]) dd["content"] = re.sub(r"[1-9][0-9]?\s*[..、、]", "", dd["content"][:5]) + dd["content"][5:] dd["item_topic_name"] = item_type if re.sub('[((]', "", item_type) != '本大题' else "解答题" if item_type in ["单选题", "多选题", "选择题"]: # (故选[::]([A-Z;;和与、、]+)| ans = re.search(r'故选[::]?|故选[::]?([A-Z;;和与、、\s]+)', dd["parse"].replace("$", "").replace(" ", "")) if ans: dd["answer"] = ans.group(1) if ans.group(1) is not None else ans.group(2) # ans.group(1) != None else: dd["answer"] = "" else: dd["answer"] = "见解析" ans = re.search(r'故\s*[::]?\s*答案分?别?[为是]?\s*[::]?\s*(.+?)[..]\s*\n', dd["parse"]) if ans: dd["answer"] = ans.group(1) del dd["parse_title"] return dd # def correct_wrong_no(con_list, items_no, item_no_type): # """ # 针对分错的题号进行纠正 ;;带解析的划分题目最好按关键字拆分!!!! # 题号划分错误有:题号重复,题号遗漏,题号偏离很远的错误如88.等 # 无题型行时,con_list中每个元素代表每一行 # 有题型行时,con_list中每个元素代表每个题型中的所有题目 # items_no:初步找到的所有题号 # :return: con_list # """ # # items_no = [1,2,3,4,5,6,7, 8, 9, 10, 11, 6, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] # seq_no = find_seq_num(items_no) # 找到连续的分组 # print("items_no:", items_no) # print("seq_no:", seq_no) # # err_no_idx = {} # 分错的分组序号和错误题号,主要针对2个以内成组的序号 # double_no = [] # 针对2个以上成组,且重复序号 分错的情况 # omit_no = [] # 因没有换行或无题号导致 没有 切分出来的题号 # right_no_list = [] # if len(seq_no) > 1: # 存在分断或分错的地方 # print('按题号切分的过程中,存在分断或分错的地方') # right_no = [i for i in seq_no if len(i) > 2] # if len(find_seq_num(sum(right_no, []))) == 1: # 2个以上成的所有组是连续的 # # 题号序列异常值判断 # right_seq = del_exception_value(items_no) # 主要去掉异常的大值 # # print("right_seq:",right_seq) # right_max_v = -1 # if not right_seq: # right_max_v = max(items_no) # else: # right_max_v = right_seq[-1] # # print("right_max_v:", right_max_v) # if sum(right_no, [])[0] == 1 and sum(right_no, [])[-1] == right_max_v: # 题号从1开始 # # [1,2,3,4,5,6,7, 8, 9, 10, 11, 6, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] # right_no_list.extend([i for k, i in enumerate(seq_no) if len(i) > 2]) # err_no_idx.update({k: i for k, i in enumerate(seq_no) if len(i) <= 2}) # 出现重复题号 # else: # 说明左右两边有遗漏 # # [[1, 2], [4, 5], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]] # # [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [20, 21]] # # [[1, 2], [4, 5], [7, 8, 9, 10, 11], [6], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]] # # todo_no = [i for i in seq_no if len(i) <= 2] # right_no_idx = [k for k, i in enumerate(seq_no) if len(i) > 2] # if seq_no[:right_no_idx[0]]: # k>=1 左边有遗漏 # que_no = set(range(1, sum(right_no, [])[0])) - set(sum(seq_no[:right_no_idx[0]], [])) # omit_no.extend(list(que_no)) # elif len(right_no_idx) == 1 and seq_no[right_no_idx[0]+1:]: # 右边有遗漏 # que_no = set(range(sum(right_no, [])[-1]+1, right_max_v)) - set(sum(seq_no[right_no_idx[0]+1:], [])) # omit_no.extend(list(que_no)) # # print("omit_no:",omit_no) # # 既遗漏又有重复的错误不同时考虑!!!!,先报遗漏错误,教师修改后再对重复部分进行纠正 # else: # # 存在题号错误:一种是与正确的重复,另一种是与序号偏离的很远,如81,目前是暂定取99内的数字作为序号 # # [[1, 2], [4, 5, 6, 7, 8, 9, 10, 11], [13, 14], [16, 17, 18, 19, 20, 21]] # # [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [13, 14], [16, 17, 18, 19, 20, 21]] # num_count = Counter(items_no) # # print("num_count:",num_count) # if len(set(num_count.values())) > 1: # print("存在{题号重复}的切分错误") # for k, v in num_count.items(): # if v >= 2: # 重复2次以上 # # print(items_no.index(k)) # 只能获取第一个元素的索引值 # v2_index = [index for (index, value) in enumerate(items_no) if value == k][1:] # 重复序号的索引 # # 判断重复序号哪个是错误的,这里没有考虑题号遗漏的情况 # if v2_index[0]+items_no[0] > k: # 位置 > 序号, 一般要求题号从1开始 # for subi in v2_index: # # print(subi, k) # double_no.append((k, 'xiao')) # del items_no[subi] # if v2_index[0]++items_no[0] < k: # 位置 < 序号 # for subi in v2_index: # double_no.append((k, 'da')) # del items_no[subi] # # else: # 存在题号遗漏 # print("存在题号遗漏") # for k, i in enumerate(right_no): # if k == 0: # if i[0] == 2: # omit_no.append(1) # if i[0] > 2: # omit_no.append("1~"+str(i[0]-1)) # if 0 < k < len(right_no): # omit_no.extend(list(range(right_no[k-1][-1]+1, i[0]))) # # if omit_no: # # return "第" + ",".join(map(str, omit_no)) + "题的格式是否正确,不要放在表格中,且要求题号从1开始并连续;" \ # # "若格式正确,请将第" + ",".join(map(str, omit_no)) + "题的题号(包括题号后的标点符号)重新手输且与上一题重新换行" # # if double_no and len(find_seq_num(items_no)) == 1: # # 在分错题号前加标识 # all_con = "@@\n" + "@@\n".join(con_list) # for db in double_no: # may_no_st = re.search(r"\n\s*" + str(db[0]) + r'\s*([..、、].+?)', # all_con, re.S).start() # 分错位置在全文中的索引 # if item_no_type == 2: # may_no_st = re.search(r"\n\s*[((]\s*" + str(db[0]) + r'\s*[))]\s*([..、、]?.+?)', # all_con, re.S).start() # 分错位置在全文中的索引 # if db[1] == 'xiao': # 重复的切分错误的序号在正确的后面,第一个匹配到的是正确的 # # all_con = all_con[:may_no_st] + re.sub(r"\s+((?!src).)+?", r"\1", all_con[may_no_st:][:15]) + all_con[may_no_st:][15:] # # 该正则表示空格后面是src字符串时,空格保留;最开始时图片已做过替换,这里也可以去掉图片信息中的空格 # # err_no_st = re.search(r"\n\s*" + str(db[0]) + r'\s*([..、、].+?)', # all_con[may_no_st+10:], re.S).start() # 分错位置在全文中的索引 # if item_no_type == 2: # err_no_st = re.search(r"\n\s*[((]\s*" + str(db[0]) + r'\s*[))]\s*([..、、]?.+?)', # all_con[may_no_st + 10:], re.S).start() # 分错位置在全文中的索引 # # print("err_no_st:", err_no_st, all_con[may_no_st + err_no_st+10:may_no_st + err_no_st+20]) # # all_con = all_con[:may_no_st + err_no_st + 11] + "【fei】" \ # + all_con[may_no_st + err_no_st + 11:] # 在分错题号前加标识 # # if db[1] == 'da': # 重复的切分错误的序号在正确的前面,第一个匹配到的是错误的 # all_con = all_con[:may_no_st + 1] + "【fei】" \ # + all_con[may_no_st + 1:] # 在分错题号前加标识 # # print("all_con:",all_con) # con_list = all_con.split("@@\n")[1:] # # # 针对2个以内成组的序号 加错误标识 # sorted_idx = sorted(err_no_idx.keys(), reverse=False) # 对字典按索引位置排序 # print("err_no_idx:", err_no_idx, "sorted_idx:", sorted_idx) # if err_no_idx: # if sorted_idx[0] > 0: # all_con = "@@\n" + "@@\n".join(con_list) # st_flag = str(seq_no[sorted_idx[0] - 1][-1]) # 分错位置的前一个题号 # # 分错位置的前一个题号在全文中的索引 # # if err_no_idx[sorted_idx[0]][0] == int(st_flag): # # return st_flag + "题题号出现重复" # st_flag_index = re.search(r"\n+\s*" + st_flag + r'\s*([..、、].+?)', all_con, re.S).start() # if item_no_type == 2: # st_flag_index = re.search(r"\n+\s*[((]\s*" + st_flag + r'\s*[))]\s*([..、、]?.+?)', all_con, re.S).start() # for k in sorted_idx: # 遍历键 # for subk in err_no_idx[k]: # 遍历 键 的值 # # print('*****************') # # print("st_flag:", st_flag, '---subk:', subk) # # print("st_flag_index:",st_flag_index) # err_no_st = re.search(r"\n\s*" + str(subk) + r'\s*([..、、].+?)', # all_con[st_flag_index:], re.S).start() # 分错位置在全文中的索引 # if item_no_type == 2: # err_no_st = re.search(r"\n\s*[((]\s*" + str(subk) + r'\s*[))]\s*([..、、]?.+?)', # all_con[st_flag_index:], re.S).start() # 分错位置在全文中的索引 # all_con = all_con[:st_flag_index + err_no_st + 1] + "【fei】" \ # + all_con[st_flag_index + err_no_st + 1:] # 在分错题号前加标识 # con_list = all_con.split("@@\n")[1:] # else: # 拿到了前面不是题号的序号 [27, 27, 1, 2, 3, 4, 5, 6, 7] # all_con = "@@\n" + "@@\n".join(con_list) # if items_no.count(1) == 1: # con_1 = re.split(r"@@\n\s*1\s*[..、、]", all_con)[1] # con_list = ("1、"+con_1).split("@@\n") # right_no_list = sum(right_no_list, []) # # right_no_list = str(right_no_list).replace("[", "").replace("]", "").replace(" ", "").split(",") # # # con_list = re.split(r"\n\s*("+ r"|".join(right_no_list) + ")\s*[..、、]", all_con)[1:] # # if len(con_list) > 1: # # con_list = [con for k, con in enumerate(con_list) if k % 2 == 1] # return con_list def split2one_item(con_list): """ 第一种试卷格式:教师用卷,含答案和解析关键字 输入html文件,先按大题将 一篇文档分开 切分思路: 1.按空行分割,首先将【答案】,【解析】,前面的空行

删掉,然后直接按

来split 格式要求:每小题 21. 数字+英文点号 大题:中文 一二三四+中文顿号 :return: """ # item_no_type = 1 # # all_con = table_label_cleal("\n" + "\n".join(con_list)) # # item_no = [int(no) for no in re.findall(r'\n+\s*([1-9][0-9]?)\s*[..、、]', all_con)] # # if len(item_no) <= 2: # # item_no_type = 2 # # item_no = [int(no) for no in re.findall(r'\n+\s*[((]\s*([1-9][0-9]?)\s*[))]\s*[..、、]?', all_con)] # # if len(item_no) > 3: # # all_con = re.sub(r'\n\s*\(([1-9][0-9]?)\)\s*[..、、]?', "\n" + r"【@\1、", all_con) # # con_list = all_con.replace("【@", "").split("\n")[1:] # # ---------------------------------------------------------------------------- # # 去掉多余空格,作用不大 # con2 = ["【delete】" if (k < len(con_list) - 1 and v.strip() == "" and ( # re.match(r"【(答案|解析)】|(答案|解析)\s*[::]| 0 and v.strip() == "" and ( # re.match(r"【(答案|解析)】$|(答案|解析)\s*[::]", con_list[k - 1].strip()) or # re.match(r"[a-z<>/\s]*[一二三四五六七八九十]\s*[、..、]\s*[^必考基础综合中等]{2,4}题", # con_list[k - 1].strip()))) # else v for k, v in enumerate(con_list)] # con3 = list(filter(lambda x: x != "【delete】", con2)) # while len(con3) > 0: # if con3[-1].strip() == "": # del con3[-1] # if con3[0].strip() == "": # del con3[0] # con3.append("") # 不然最后一个题就漏掉了 # # # 开头没用信息处理 # con3[0] = re.sub(r"([一二三四五六七八九十]\s*[、..、]\s*[^必考基础综合中等]{2,4}题)", r"\n\1", con3[0]) # while con3 and (re.search(r"[\u4e00-\u9fa5]", con3[0]) is None # or re.search(r"[一二三四五六七八九十]\s*[、..、]\s*[^必考基础综合中等]{2,4}题", con3[0]) is None): # del con3[0] # # # ----------------------解析 方案【1】------------------------------------------------------------- # # 根据大题型分,再按【答案|解析】初步拆分题目,再在‘解析’和‘答案’间细分‘题干’和‘解析’ # # 1、获取题型行信息、按题型行切分 # con4, all_type_info, all_type, each_item_score, each_item_score2, select_type_id, choice_class \ # = get_item_head_info("\n" + "\n".join(con3)) # # # 2、据是否有题型行分两步进行 # res = [] # if not all_type: # print("不存在大题题型行或题型行格式有问题") # return "不存在大题题型行或题型行格式有问题,请检查" # 放第【2】种方案中进行处理 # else: # if len(all_type) != len(con4): # print("存在题型行没有换行") # return "存在题型行末尾没有换行,请在所有题型行末尾重新换行" # 放第【2】种方案中进行处理 # else: # # if "非选择题" in all_type: # # return "第" + str(all_type.index("非选择题")+1) + "大题的题型不明确" # index = 0 # for num, one_type in enumerate(con4): # count = 1 # if len(re.findall(r"\n\s*【答案】", one_type)) == len(re.findall(r"\n\s*【解析】", one_type)): # subcon = re.split(r"((?<=\n)\s*【答案】|(?<=\n)\s*【解析】)\n?", one_type.strip()) # # index根据第一道题的题号进行纠正 # st_pat = re.match(r"([1-9]|[1-6][0-9])\s*[..、、].+?", subcon[0].strip()) # if st_pat and num == 0: # st_id = st_pat.group(1) # if int(st_id) != 1: # index = int(st_id) - 1 # # if len(subcon) == 5: # 只有1道题 # dd = dict(zip(["content", "answer", "parse"], # re.split(r"(?<=\n)\s*【答案】|(?<=\n)\s*【解析】", table_label_cleal(one_type)))) # dd["item_topic_name"] = all_type[num] # dd["content"] = re.sub(r"\d+\s*[..、、]", "", dd["content"][:5]) + dd["content"][5:] # dd["score"] = each_item_score[num] # dd["errmsgs"] = [] # dd["item_id"] = count + index # if not dd["score"] and each_item_score2 and str(dd["item_id"]) in each_item_score2.keys(): # dd["score"] = each_item_score2[str(dd["item_id"])] # if select_type_id and dd["item_id"] in select_type_id: # dd['is_optional'] = 'true' # res.append(dd) # # count += 1 # else: # # ------在下一题【解析】在本题【答案】之间找到下一题【content】的位置-------- # for id in range(len(subcon)): # if re.match(r"\n*\s*【解析】", subcon[id]) and id < len(subcon) - 2: # 不是最后一个解析,倒数第二个是最后一个解析 # count += 1 # ssub = subcon[id + 1].strip().split("\n") # 首尾空行先去掉 # blank_line = [i for i, v in enumerate(ssub) if v.strip() == ""] # 空格索引 # # 索引to题号字典 # con_id_line_dict = {i: re.match(r"([1-9]|[1-6][0-9])\s*[..、、]", v.strip()).group(1) # for i, v in enumerate(ssub) # if re.match(r"([1-9]|[1-6][0-9])\s*[..、、]", v.strip())} # # print("con_id_line_dict",con_id_line_dict) # con_id_line = list(con_id_line_dict.keys()) # 行索引,第几行 # topicno = list(con_id_line_dict.values()) # 题号序列 # topicno_line_idx = dict(zip(topicno, con_id_line)) # 题号to行索引字典 # if len(con_id_line) != len(topicno_line_idx): # return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str( # index + count) + "题)的题文和上一题的解析之间出现【多个相同的题目序号】,请重新确认!" # else: # if len(blank_line) == 1 and len(con_id_line) == 1: # 一般情况只有一个空行 # if con_id_line[0] > blank_line[0]: # ssub.insert(con_id_line[0], "【content】") # else: # if str(count + index) == topicno[0]: # 该题的序号正确,优先按序号拆 # ssub.insert(con_id_line[0], "【content】") # else: # ssub[blank_line[0]] = "【content】" # 该题序号不对时再考虑空行 # elif len(blank_line) != 1: # if len(con_id_line) >= 1: # 优先考虑题目序号,多个序号时 # # ssub.insert(con_id_line[-1], "【content】") # 默认最后一个,很粗糙 # if str(count + index) in topicno: # ssub.insert(topicno_line_idx[str(count + index)], "【content】") # else: # return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str( # index + count) + "题)的题文和上一题的解析之间出现【题目序号不连续】,请检查该题目序号并重新手输!" # elif len(blank_line) > 1: # 题目序号有误,多个空行时 # # ssub[blank_line[-1]] = "【content】" # return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str( # index + count) + "题)的题文和上一题的解析之间出现【题目序号有误】,请将题目序号重新手输!" # else: # 无序号,无空行 # return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str( # index + count) + "题)的题文和上一题的解析之间出现【题目序号或空行都有误】,请将题目序号重新手输并查看空行!" # # 如果存在空行有误,且题目序号有误时,那基本就会拆分错误 # else: # len(con_id_line)!=1 # if not con_id_line: # 一个空行,没有序号时 # # ssub[blank_line[0]] = "【content】" # return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str( # index + count) + "题)的题文和上一题的解析之间出现【题目序号有误】,请将题目序号重新手输!" # else: # 1个空行,多个序号时 # print(all_type[num], "第", count, "道题的题文和上一题的解析之间存在【多个题目序号】") # if str(count + index) in topicno: # ssub.insert(topicno_line_idx[str(count + index)], "【content】") # else: # return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str( # index + count) + "题)的题文和上一题的解析之间出现【题目序号不连续】,请检查该题目序号并重新手输!" # # ssub.insert(con_id_line[-1], "【content】") # 须优化 # subcon[id + 1] = "\n".join(ssub) # # ---------------------------------------------------------------- # all_item = re.split(r"【content】", "\n".join(subcon).strip()) # for idk, one_item in enumerate(all_item): # dd = dict(zip(["content", "answer", "parse"], # re.split(r"(?<=\n)\s*【答案】\n?|(?<=\n)\s*【解析】\n?", # table_label_cleal(one_item)))) # dd["item_topic_name"] = all_type[num] # dd["content"] = re.sub(r"\d+\s*[..、、]", "", dd["content"][:5]) + dd["content"][5:] # dd["score"] = each_item_score[num] # dd["errmsgs"] = [] # dd["item_id"] = idk + 1 + index # if choice_class: # for k, v in choice_class.items(): # if count + index in v: # dd["item_topic_name"] = k + "选题" # elif len(choice_class) == 1: # dd["item_topic_name"] = "多选题" if k == "单" else "单选题" # if not dd["score"] and each_item_score2 and str(dd["item_id"]) in each_item_score2.keys(): # dd["score"] = each_item_score2[str(dd["item_id"])] # if select_type_id and dd["item_id"] in select_type_id: # dd['is_optional'] = 'true' # res.append(dd) # # pprint(res) # # print('------------------') # else: # # print("【答案】或【解析】格式有误") # return "第" + str(num + 1) + "大题《" + all_type[num] + "》中【答案】或【解析】格式有误或其中某道题中出现多个相同关键字或漏关键字" # index += count # return res, item_no_type # # def only_parse_split(one_item_ans, item_type, reparse_n = 1): # """ # 拆分出答案和解析 # :one_item: 一道题的答案解析部分, # :return:{'answer': ,"parse": } # """ # dd = {'parse': one_item_ans, 'answer': ""} # simp_item = re.sub("(【([解分][析答]|详解|点[评睛])】|答案|解析|详解)\s*[::]?", "", one_item_ans) # simp_item = re.sub("[^\u4e00-\u9fa5∵∴]", "", simp_item) # if len(simp_item) < 10 and re.search("因为?|因此|所以|根据|依据|若|假设", simp_item) is None: # dd['parse'] = "" # # if re.search(r"【(解析|解答|分析|详解|点评|点睛)】\n?|(解析|解答|分析|详解|点评|点睛)\s*[::]", one_item_ans): # dd1 = dict(zip(["answer", "parse_title", "parse"], # re.split(r"【(解析|解答|分析|详解|点评|点睛)】\n?", one_item_ans, maxsplit=1))) # dd["parse"] = "【" + dd1["parse_title"] + "】" + dd1["parse"] # del dd1["parse_title"] # # if item_type in ["单选题", "多选题", "选择题", "单项选择", "多项选择"]: # ans = re.search(r'故选\s*[::]\s*]+?data-latex="([A-Z;;和与、、\s]+)".+?/>|故选\s*[::]?\s*([A-Z;;和与、、\s]+)', # dd["parse"].replace("$", "")) # if ans: # dd["answer"] = ans.group(1) if ans.group(1) is not None else ans.group(2) # elif not dd['answer']: # dd['answer'] = one_item_ans.strip() # dd['answer'] = re.sub("[.;;.]", "", dd['answer']) # else: # ans1 = re.search(r'故\s*[::]?\s*(答案分?别?[为是]?|填)\s*[::]?\s*(.+?)[..]\s*(\n|$)', dd["parse"]) # ans2 = re.search(r'故\s*[::]?\s*(答案分?别?[为是]?|填)\s*[::]?\s*()[..]?\s*(\n|$)', dd["parse"]) # if reparse_n != 2 and "【答案】" not in one_item_ans and \ # len(re.findall(r"[((]\d[))]|[\n::;;。】]([((](i{1,3}|[ⅰⅱⅲⅳⅠⅡⅢIV①②③④])[))]|[①②③④])", # one_item_ans.replace(" ", ""))) > 1: # dd["answer"] = "见解析" # elif ans1: # dd["answer"] = ans1.group(2) # elif ans2: # dd["answer"] = ans2.group(2) # elif not dd['parse']: # dd['answer'] = one_item_ans.strip() # else: # dd["answer"] = "见解析" # # return dd