cdZWj
/
new_tiku_structure_2021


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
							#!/usr/bin/env/python
# -*- coding:utf-8 -*-

# 本文件包含以下函数
# stem_ans_split：将切出来的一道题 按答案解析 进一步细分
# correct_wrong_no :针对分错的题号进行 纠正 或 报错
# stems_structure_byno：按题号进行切分;
# dati2slave :带小问的大题 按小问切分
# split2little_con: 将带小问的填空题或解答题 按 小问 继续划分，小问已切分好
# get_options_arrange: 判断word中选项每行排版个数


import re
from washutil import table_label_cleal
from ans_structrue import only_parse_split, get_ans_from_parse
from pprint import pprint
from collections import Counter


def stem_ans_split(one_item_dict, case):
    """
    将切出来的一道题 按 答案解析 进一步细分
    :param one_item_dict: 单道题的初步结构字典{"content": , "item_id": , "errmsgs": []，"item_topic_name"：,}
    :param case: 属于哪种情况
    :return: {"content": ,"answer": ,"parse"：}
    """
    one_item = one_item_dict["content"]
    item_type = one_item_dict["item_topic_name"]
    # print(one_item)
    if case == 'case0':  # 没“答案”关键字
        inside_split = re.split(r"【(解析|解答|分析|详解|点评|点睛|考点|专题)】\n*?",
                                table_label_cleal(one_item))
        inside_split = ['【' + a + '】' if str(a).strip() in ['解答', '分析', '解析', '详解', '点评', '点睛']
                        else str(a).replace('None', '').strip() for a in inside_split]
        # print(':::', inside_split)
        # print('^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^')
        dd = dict(zip(["content", "parse_title"], inside_split[0:2]))
        dd["parse"] = str(dd["parse_title"]) + "\n".join(inside_split[2:]).replace("\n\n", "\n")
        dd["parse"] = re.sub(r"^\s*【解析】", "", dd["parse"])
        dd["answer"] = ""
    else:  # if case == 'case1':  # 有“答案”关键字
        dd = dict(zip(["content", "answer"], re.split(r"【答案】\n?",
                                                      table_label_cleal(one_item), maxsplit=1)))
        # pprint(dd)  # 一般默认‘答案’在‘解析’的前面
        subdd = dict(zip(["answer", "parse_title", "parse"],
                         re.split(r"【(解析|解答|分析|详解|点评|点睛)】\n?", dd["answer"], maxsplit=1)))
        dd["answer"] = subdd["answer"]
        if "parse_title" in subdd:
            dd["parse"] = "【" + subdd["parse_title"] + "】" + subdd["parse"]
            dd["parse"] = re.sub(r"^\s*【解析】", "", dd["parse"])
        else:
            dd["parse"] = ""

    dd["content"] = re.sub(r"[1-9][0-9]?\s*[.．、､]", "", dd["content"][:5]) + dd["content"][5:]

    # 获取答案
    if not dd["answer"]:
        dd["answer"] = get_ans_from_parse(dd["parse"], item_type, dd["content"])
        # 补充！！！------------------------------------------
        # if item_type in ["单选题", "多选题", "选择题"]:  # (故选[：:]([A-Z;；和与、､]+)|
        #     ans = re.search(r'故选[：:]?<imgsrc=[^>]+?data-latex="\$?([A-Z;；和与、､\s]+)\$?".+?/>|故选[：:]?([A-Z;；和与、､\s]+)',
        #                     dd["parse"].replace("$", "").replace(" ", ""))
        #     if ans:
        #         dd["answer"] = ans.group(1) if ans.group(1) is not None else ans.group(2)  # ans.group(1) != None
        #     else:
        #         dd["answer"] = ""
        # else:
        #     dd["answer"] = "见解析"
        #     ans = re.search(r'故\s*[：:]?\s*答案分?别?[为是]?\s*[：:]?\s*(.+?)[.．]\s*\n', dd["parse"])
        #     if ans:
        #         dd["answer"] = ans.group(1)
        # ------------------------------------------------------
    if "parse_title" in dd:
        del dd["parse_title"]

    return dd


def stem_ans_split2(one_type_list, idx1, idx2, item_type, case):
    """
    将切出来的一道题 按答案解析 进一步细分
    :param one_type_list: 一类题文的list
    :param idx1:题目开头，包含
    :param idx2:下一题开头
    :param item_type:题型
    :param case: 属于哪种情况
    :return:{"content": ,"answer": ,"parse"：}
    """
    one_item = one_type_list[idx1:idx2]
    if idx2 == -1:
        one_item = one_type_list[idx1:]

    if case == 'case1':  # 没“答案”关键字
        inside_split = re.split(r"【(解析|解答|分析|详解|点评|点睛|考点|专题)】\n*?",
                                table_label_cleal("\n".join(one_item)))
        inside_split = ['【' + a + '】' if str(a).strip() in ['解答', '分析', '解析', '详解', '点评', '点睛']
                        else str(a).replace('None', '').strip() for a in inside_split]
        dd = dict(zip(["content", "parse_title"], inside_split[0:2]))
        dd["parse"] = str(dd["parse_title"]) + "\n".join(inside_split[2:]).replace("\n\n", "\n")
    else:
        dd = dict(zip(["content", "answer"], re.split(r"【答案】\n?|答案\s*[：:]\n?",
                                                      table_label_cleal("\n".join(one_item)), maxsplit=1)))
        subdd = dict(zip(["answer", "parse_title", "parse"],
                         re.split(r"【(解析|解答|分析|详解|点评|点睛)】\n?|(解析|解答|分析|详解|点评|点睛)\s*[：:]", dd["answer"], maxsplit=1)))
        dd["answer"] = subdd["answer"]
        if "parse_title" in subdd:
            dd["parse"] = "【" + subdd["parse_title"] + "】" + subdd["parse"]
            dd["parse"] = re.sub(r"^\s*【解析】", "", dd["parse"])

    dd["content"] = re.sub(r"[1-9][0-9]?\s*[.．、､]", "", dd["content"][:5]) + dd["content"][5:]
    dd["item_topic_name"] = item_type if re.sub('[(（]', "", item_type) != '本大题' else "解答题"
    if item_type in ["单选题", "多选题", "选择题"]:  # (故选[：:]([A-Z;；和与、､]+)|
        ans = re.search(r'故选[：:]?<imgsrc\d+data-latex="([A-Z;；和与、､\s]+)"/>|故选[：:]?([A-Z;；和与、､\s]+)',
                        dd["parse"].replace("$", "").replace(" ", ""))
        if ans:
            dd["answer"] = ans.group(1) if ans.group(1) is not None else ans.group(2)  # ans.group(1) != None
        else:
            dd["answer"] = ""
    else:
        dd["answer"] = "见解析"
        ans = re.search(r'故\s*[：:]?\s*答案分?别?[为是]?\s*[：:]?\s*(.+?)[.．]\s*\n', dd["parse"])
        if ans:
            dd["answer"] = ans.group(1)
    del dd["parse_title"]

    return dd


# def correct_wrong_no(con_list, items_no, item_no_type):
#     """
#     针对分错的题号进行纠正  ；；带解析的划分题目最好按关键字拆分！！！！
#     题号划分错误有：题号重复，题号遗漏，题号偏离很远的错误如88.等
#     无题型行时，con_list中每个元素代表每一行
#     有题型行时，con_list中每个元素代表每个题型中的所有题目
#     items_no：初步找到的所有题号
#     :return:  con_list
#     """
#     # items_no = [1,2,3,4,5,6,7, 8, 9, 10, 11, 6, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
#     seq_no = find_seq_num(items_no)  # 找到连续的分组
#     print("items_no:", items_no)
#     print("seq_no:", seq_no)
#
#     err_no_idx = {}  # 分错的分组序号和错误题号,主要针对2个以内成组的序号
#     double_no = []   # 针对2个以上成组，且重复序号 分错的情况
#     omit_no = []  # 因没有换行或无题号导致 没有 切分出来的题号
#     right_no_list = []
#     if len(seq_no) > 1:  # 存在分断或分错的地方
#         print('按题号切分的过程中,存在分断或分错的地方')
#         right_no = [i for i in seq_no if len(i) > 2]
#         if len(find_seq_num(sum(right_no, []))) == 1:  # 2个以上成的所有组是连续的
#             # 题号序列异常值判断
#             right_seq = del_exception_value(items_no)  # 主要去掉异常的大值
#             # print("right_seq：",right_seq)
#             right_max_v = -1
#             if not right_seq:
#                 right_max_v = max(items_no)
#             else:
#                 right_max_v = right_seq[-1]
#             # print("right_max_v:", right_max_v)
#             if sum(right_no, [])[0] == 1 and sum(right_no, [])[-1] == right_max_v:  # 题号从1开始
#                 # [1,2,3,4,5,6,7, 8, 9, 10, 11, 6, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
#                 right_no_list.extend([i for k, i in enumerate(seq_no) if len(i) > 2])
#                 err_no_idx.update({k: i for k, i in enumerate(seq_no) if len(i) <= 2})  # 出现重复题号
#             else:  # 说明左右两边有遗漏
#                 # [[1, 2], [4, 5], [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]]
#                 # [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [20, 21]]
#                 # [[1, 2], [4, 5], [7, 8, 9, 10, 11], [6], [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]]
#                 # todo_no = [i for i in seq_no if len(i) <= 2]
#                 right_no_idx = [k for k, i in enumerate(seq_no) if len(i) > 2]
#                 if seq_no[:right_no_idx[0]]:  # k>=1 左边有遗漏
#                     que_no = set(range(1, sum(right_no, [])[0])) - set(sum(seq_no[:right_no_idx[0]], []))
#                     omit_no.extend(list(que_no))
#                 elif len(right_no_idx) == 1 and seq_no[right_no_idx[0]+1:]:  # 右边有遗漏
#                     que_no = set(range(sum(right_no, [])[-1]+1, right_max_v)) - set(sum(seq_no[right_no_idx[0]+1:], []))
#                     omit_no.extend(list(que_no))
#                 # print("omit_no:",omit_no)
#                 # 既遗漏又有重复的错误不同时考虑！！！！，先报遗漏错误，教师修改后再对重复部分进行纠正
#         else:
#             # 存在题号错误：一种是与正确的重复，另一种是与序号偏离的很远，如81，目前是暂定取99内的数字作为序号
#             # [[1, 2], [4, 5, 6, 7, 8, 9, 10, 11], [13, 14], [16, 17, 18, 19, 20, 21]]
#             # [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [13, 14], [16, 17, 18, 19, 20, 21]]
#             num_count = Counter(items_no)
#             # print("num_count:",num_count)
#             if len(set(num_count.values())) > 1:
#                 print("存在{题号重复}的切分错误")
#                 for k, v in num_count.items():
#                     if v >= 2:  # 重复2次以上
#                         # print(items_no.index(k))  # 只能获取第一个元素的索引值
#                         v2_index = [index for (index, value) in enumerate(items_no) if value == k][1:]  # 重复序号的索引
#                         # 判断重复序号哪个是错误的，这里没有考虑题号遗漏的情况
#                         if v2_index[0]+items_no[0] > k:  # 位置 > 序号, 一般要求题号从1开始
#                             for subi in v2_index:
#                                 # print(subi, k)
#                                 double_no.append((k, 'xiao'))
#                                 del items_no[subi]
#                         if v2_index[0]++items_no[0] < k:  # 位置 < 序号
#                             for subi in v2_index:
#                                 double_no.append((k, 'da'))
#                                 del items_no[subi]
#
#             else:  # 存在题号遗漏
#                 print("存在题号遗漏")
#                 for k, i in enumerate(right_no):
#                     if k == 0:
#                         if i[0] == 2:
#                             omit_no.append(1)
#                         if i[0] > 2:
#                             omit_no.append("1~"+str(i[0]-1))
#                     if 0 < k < len(right_no):
#                         omit_no.extend(list(range(right_no[k-1][-1]+1, i[0])))
#     # if omit_no:
#     #     return "第" + ",".join(map(str, omit_no)) + "题的格式是否正确，不要放在表格中,且要求题号从1开始并连续；" \
#     #            "若格式正确,请将第" + ",".join(map(str, omit_no)) + "题的题号（包括题号后的标点符号）重新手输且与上一题重新换行"
#
#     if double_no and len(find_seq_num(items_no)) == 1:
#         # 在分错题号前加标识
#         all_con = "@@\n" + "@@\n".join(con_list)
#         for db in double_no:
#             may_no_st = re.search(r"\n\s*" + str(db[0]) + r'\s*([.．、､].+?)',
#                                   all_con, re.S).start()  # 分错位置在全文中的索引
#             if item_no_type == 2:
#                 may_no_st = re.search(r"\n\s*[(（]\s*" + str(db[0]) + r'\s*[)）]\s*([.．、､]?.+?)',
#                                       all_con, re.S).start()  # 分错位置在全文中的索引
#             if db[1] == 'xiao':  # 重复的切分错误的序号在正确的后面，第一个匹配到的是正确的
#                 # all_con = all_con[:may_no_st] + re.sub(r"\s+((?!src).)+?", r"\1", all_con[may_no_st:][:15]) + all_con[may_no_st:][15:]
#                 # 该正则表示空格后面是src字符串时，空格保留;最开始时图片已做过替换，这里也可以去掉图片信息中的空格
#
#                 err_no_st = re.search(r"\n\s*" + str(db[0]) + r'\s*([.．、､].+?)',
#                                   all_con[may_no_st+10:], re.S).start()  # 分错位置在全文中的索引
#                 if item_no_type == 2:
#                     err_no_st = re.search(r"\n\s*[(（]\s*" + str(db[0]) + r'\s*[)）]\s*([.．、､]?.+?)',
#                                           all_con[may_no_st + 10:], re.S).start()  # 分错位置在全文中的索引
#                 # print("err_no_st:", err_no_st, all_con[may_no_st + err_no_st+10:may_no_st + err_no_st+20])
#
#                 all_con = all_con[:may_no_st + err_no_st + 11] + "【fei】" \
#                            + all_con[may_no_st + err_no_st + 11:]  # 在分错题号前加标识
#
#             if db[1] == 'da':  # 重复的切分错误的序号在正确的前面，第一个匹配到的是错误的
#                 all_con = all_con[:may_no_st + 1] + "【fei】" \
#                           + all_con[may_no_st + 1:]  # 在分错题号前加标识
#         # print("all_con:",all_con)
#         con_list = all_con.split("@@\n")[1:]
#
#     # 针对2个以内成组的序号 加错误标识
#     sorted_idx = sorted(err_no_idx.keys(), reverse=False)  # 对字典按索引位置排序
#     print("err_no_idx:", err_no_idx, "sorted_idx:", sorted_idx)
#     if err_no_idx:
#         if sorted_idx[0] > 0:
#             all_con = "@@\n" + "@@\n".join(con_list)
#             st_flag = str(seq_no[sorted_idx[0] - 1][-1])  # 分错位置的前一个题号
#             # 分错位置的前一个题号在全文中的索引
#             # if err_no_idx[sorted_idx[0]][0] == int(st_flag):
#             #     return st_flag + "题题号出现重复"
#             st_flag_index = re.search(r"\n+\s*" + st_flag + r'\s*([.．、､].+?)', all_con, re.S).start()
#             if item_no_type == 2:
#                 st_flag_index = re.search(r"\n+\s*[(（]\s*" + st_flag + r'\s*[)）]\s*([.．、､]?.+?)', all_con, re.S).start()
#             for k in sorted_idx:  # 遍历键
#                 for subk in err_no_idx[k]:  # 遍历 键 的值
#                     # print('*****************')
#                     # print("st_flag:", st_flag, '---subk:', subk)
#                     # print("st_flag_index:",st_flag_index)
#                     err_no_st = re.search(r"\n\s*" + str(subk) + r'\s*([.．、､].+?)',
#                                           all_con[st_flag_index:], re.S).start()  # 分错位置在全文中的索引
#                     if item_no_type == 2:
#                         err_no_st = re.search(r"\n\s*[(（]\s*" + str(subk) + r'\s*[)）]\s*([.．、､]?.+?)',
#                                               all_con[st_flag_index:], re.S).start()  # 分错位置在全文中的索引
#                     all_con = all_con[:st_flag_index + err_no_st + 1] + "【fei】" \
#                               + all_con[st_flag_index + err_no_st + 1:]  # 在分错题号前加标识
#             con_list = all_con.split("@@\n")[1:]
#         else:  # 拿到了前面不是题号的序号 [27, 27, 1, 2, 3, 4, 5, 6, 7]
#             all_con = "@@\n" + "@@\n".join(con_list)
#             if items_no.count(1) == 1:
#                 con_1 = re.split(r"@@\n\s*1\s*[.．、､]", all_con)[1]
#                 con_list = ("1､"+con_1).split("@@\n")            # right_no_list = sum(right_no_list, [])
#             # right_no_list = str(right_no_list).replace("[", "").replace("]", "").replace(" ", "").split(",")
#
#             # con_list = re.split(r"\n\s*("+ r"|".join(right_no_list) + ")\s*[.．、､]", all_con)[1:]
#             # if len(con_list) > 1:
#             #     con_list = [con for k, con in enumerate(con_list) if k % 2 == 1]
#     return con_list


def split2one_item(con_list):
    """
        第一种试卷格式：教师用卷，含答案和解析关键字
        输入html文件，先按大题将 一篇文档分开
        切分思路：
        1.按空行分割，首先将【答案】,【解析】,<img src=<img src="files/image\d+.png">前面的空行<p> </p>删掉，然后直接按<p></p>来split
        格式要求:每小题 21. 数字+英文点号 大题：中文 一二三四+中文顿号
        :return:
    """
    # item_no_type = 1
    # # all_con = table_label_cleal("\n" + "\n".join(con_list))
    # # item_no = [int(no) for no in re.findall(r'\n+\s*([1-9][0-9]?)\s*[.．、､]', all_con)]
    # # if len(item_no) <= 2:
    # #     item_no_type = 2
    # #     item_no = [int(no) for no in re.findall(r'\n+\s*[(（]\s*([1-9][0-9]?)\s*[)）]\s*[.．、､]?', all_con)]
    # #     if len(item_no) > 3:
    # #         all_con = re.sub(r'\n\s*\(([1-9][0-9]?)\)\s*[.．、､]?', "\n" + r"【@\1､", all_con)
    # #         con_list = all_con.replace("【@", "").split("\n")[1:]
    # # ----------------------------------------------------------------------------
    # # 去掉多余空格，作用不大
    # con2 = ["【delete】" if (k < len(con_list) - 1 and v.strip() == "" and (
    #         re.match(r"【(答案|解析)】|(答案|解析)\s*[：:]|<imgsrc\d+|\s+", con_list[k + 1].strip()) or
    #         re.match(r"(([1-9]|[1-4][0-9])\s*[.．、､]|[一二三四五六七八九十]\s*[、.．､]\s*[^必考基础综合中等]{2,4}题)",
    #                  con_list[k + 1].strip()) is None))
    #                       or (k > 0 and v.strip() == "" and (
    #         re.match(r"【(答案|解析)】$|(答案|解析)\s*[：:]", con_list[k - 1].strip()) or
    #         re.match(r"[a-z<>/\s]*[一二三四五六七八九十]\s*[、.．､]\s*[^必考基础综合中等]{2,4}题",
    #                  con_list[k - 1].strip())))
    #         else v for k, v in enumerate(con_list)]
    # con3 = list(filter(lambda x: x != "【delete】", con2))
    # while len(con3) > 0:
    #     if con3[-1].strip() == "":
    #         del con3[-1]
    #     if con3[0].strip() == "":
    #         del con3[0]
    # con3.append("")  # 不然最后一个题就漏掉了
    #
    # # 开头没用信息处理
    # con3[0] = re.sub(r"([一二三四五六七八九十]\s*[、.．､]\s*[^必考基础综合中等]{2,4}题)", r"\n\1", con3[0])
    # while con3 and (re.search(r"[\u4e00-\u9fa5]", con3[0]) is None
    #                 or re.search(r"[一二三四五六七八九十]\s*[、.．､]\s*[^必考基础综合中等]{2,4}题", con3[0]) is None):
    #     del con3[0]
    #
    # # ----------------------解析 方案【1】-------------------------------------------------------------
    # # 根据大题型分，再按【答案|解析】初步拆分题目，再在‘解析’和‘答案’间细分‘题干’和‘解析’
    # # 1、获取题型行信息、按题型行切分
    # con4, all_type_info, all_type, each_item_score, each_item_score2, select_type_id, choice_class \
    #     = get_item_head_info("\n" + "\n".join(con3))
    #
    # # 2、据是否有题型行分两步进行
    # res = []
    # if not all_type:
    #     print("不存在大题题型行或题型行格式有问题")
    #     return "不存在大题题型行或题型行格式有问题，请检查"  # 放第【2】种方案中进行处理
    # else:
    #     if len(all_type) != len(con4):
    #         print("存在题型行没有换行")
    #         return "存在题型行末尾没有换行，请在所有题型行末尾重新换行"  # 放第【2】种方案中进行处理
    #     else:
    #         # if "非选择题" in all_type:
    #         #     return "第" + str(all_type.index("非选择题")+1) + "大题的题型不明确"
    #         index = 0
    #         for num, one_type in enumerate(con4):
    #             count = 1
    #             if len(re.findall(r"\n\s*【答案】", one_type)) == len(re.findall(r"\n\s*【解析】", one_type)):
    #                 subcon = re.split(r"((?<=\n)\s*【答案】|(?<=\n)\s*【解析】)\n?", one_type.strip())
    #                 # index根据第一道题的题号进行纠正
    #                 st_pat = re.match(r"([1-9]|[1-6][0-9])\s*[.．、､].+?", subcon[0].strip())
    #                 if st_pat and num == 0:
    #                     st_id = st_pat.group(1)
    #                     if int(st_id) != 1:
    #                         index = int(st_id) - 1
    #
    #                 if len(subcon) == 5:  # 只有1道题
    #                     dd = dict(zip(["content", "answer", "parse"],
    #                                   re.split(r"(?<=\n)\s*【答案】|(?<=\n)\s*【解析】", table_label_cleal(one_type))))
    #                     dd["item_topic_name"] = all_type[num]
    #                     dd["content"] = re.sub(r"\d+\s*[.．、､]", "", dd["content"][:5]) + dd["content"][5:]
    #                     dd["score"] = each_item_score[num]
    #                     dd["errmsgs"] = []
    #                     dd["item_id"] = count + index
    #                     if not dd["score"] and each_item_score2 and str(dd["item_id"]) in each_item_score2.keys():
    #                         dd["score"] = each_item_score2[str(dd["item_id"])]
    #                     if select_type_id and dd["item_id"] in select_type_id:
    #                         dd['is_optional'] = 'true'
    #                     res.append(dd)
    #                     # count += 1
    #                 else:
    #                     # ------在下一题【解析】在本题【答案】之间找到下一题【content】的位置--------
    #                     for id in range(len(subcon)):
    #                         if re.match(r"\n*\s*【解析】", subcon[id]) and id < len(subcon) - 2:  # 不是最后一个解析,倒数第二个是最后一个解析
    #                             count += 1
    #                             ssub = subcon[id + 1].strip().split("\n")  # 首尾空行先去掉
    #                             blank_line = [i for i, v in enumerate(ssub) if v.strip() == ""]  # 空格索引
    #                             #  索引to题号字典
    #                             con_id_line_dict = {i: re.match(r"([1-9]|[1-6][0-9])\s*[.．、､]", v.strip()).group(1)
    #                                                 for i, v in enumerate(ssub)
    #                                                 if re.match(r"([1-9]|[1-6][0-9])\s*[.．、､]", v.strip())}
    #                             # print("con_id_line_dict",con_id_line_dict)
    #                             con_id_line = list(con_id_line_dict.keys())  # 行索引,第几行
    #                             topicno = list(con_id_line_dict.values())  # 题号序列
    #                             topicno_line_idx = dict(zip(topicno, con_id_line))  # 题号to行索引字典
    #                             if len(con_id_line) != len(topicno_line_idx):
    #                                 return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str(
    #                                     index + count) + "题)的题文和上一题的解析之间出现【多个相同的题目序号】,请重新确认！"
    #                             else:
    #                                 if len(blank_line) == 1 and len(con_id_line) == 1:  # 一般情况只有一个空行
    #                                     if con_id_line[0] > blank_line[0]:
    #                                         ssub.insert(con_id_line[0], "【content】")
    #                                     else:
    #                                         if str(count + index) == topicno[0]:  # 该题的序号正确，优先按序号拆
    #                                             ssub.insert(con_id_line[0], "【content】")
    #                                         else:
    #                                             ssub[blank_line[0]] = "【content】"  # 该题序号不对时再考虑空行
    #                                 elif len(blank_line) != 1:
    #                                     if len(con_id_line) >= 1:  # 优先考虑题目序号，多个序号时
    #                                         # ssub.insert(con_id_line[-1], "【content】")   # 默认最后一个，很粗糙
    #                                         if str(count + index) in topicno:
    #                                             ssub.insert(topicno_line_idx[str(count + index)], "【content】")
    #                                         else:
    #                                             return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str(
    #                                                 index + count) + "题)的题文和上一题的解析之间出现【题目序号不连续】,请检查该题目序号并重新手输！"
    #                                     elif len(blank_line) > 1:  # 题目序号有误，多个空行时
    #                                         # ssub[blank_line[-1]] = "【content】"
    #                                         return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str(
    #                                             index + count) + "题)的题文和上一题的解析之间出现【题目序号有误】,请将题目序号重新手输！"
    #                                     else:  # 无序号，无空行
    #                                         return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str(
    #                                             index + count) + "题)的题文和上一题的解析之间出现【题目序号或空行都有误】,请将题目序号重新手输并查看空行！"
    #                                     # 如果存在空行有误，且题目序号有误时，那基本就会拆分错误
    #                                 else:  # len(con_id_line)!=1
    #                                     if not con_id_line:  # 一个空行，没有序号时
    #                                         # ssub[blank_line[0]] = "【content】"
    #                                         return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str(
    #                                             index + count) + "题)的题文和上一题的解析之间出现【题目序号有误】,请将题目序号重新手输！"
    #                                     else:  # 1个空行，多个序号时
    #                                         print(all_type[num], "第", count, "道题的题文和上一题的解析之间存在【多个题目序号】")
    #                                         if str(count + index) in topicno:
    #                                             ssub.insert(topicno_line_idx[str(count + index)], "【content】")
    #                                         else:
    #                                             return all_type[num] + "第" + str(count) + "道题(在整篇文档中为第" + str(
    #                                                 index + count) + "题)的题文和上一题的解析之间出现【题目序号不连续】,请检查该题目序号并重新手输！"
    #                                         # ssub.insert(con_id_line[-1], "【content】")  # 须优化
    #                             subcon[id + 1] = "\n".join(ssub)
    #                     # ----------------------------------------------------------------
    #                     all_item = re.split(r"【content】", "\n".join(subcon).strip())
    #                     for idk, one_item in enumerate(all_item):
    #                         dd = dict(zip(["content", "answer", "parse"],
    #                                       re.split(r"(?<=\n)\s*【答案】\n?|(?<=\n)\s*【解析】\n?",
    #                                                table_label_cleal(one_item))))
    #                         dd["item_topic_name"] = all_type[num]
    #                         dd["content"] = re.sub(r"\d+\s*[.．、､]", "", dd["content"][:5]) + dd["content"][5:]
    #                         dd["score"] = each_item_score[num]
    #                         dd["errmsgs"] = []
    #                         dd["item_id"] = idk + 1 + index
    #                         if choice_class:
    #                             for k, v in choice_class.items():
    #                                 if count + index in v:
    #                                     dd["item_topic_name"] = k + "选题"
    #                                 elif len(choice_class) == 1:
    #                                     dd["item_topic_name"] = "多选题" if k == "单" else "单选题"
    #                         if not dd["score"] and each_item_score2 and str(dd["item_id"]) in each_item_score2.keys():
    #                             dd["score"] = each_item_score2[str(dd["item_id"])]
    #                         if select_type_id and dd["item_id"] in select_type_id:
    #                             dd['is_optional'] = 'true'
    #                         res.append(dd)
    #                 # pprint(res)
    #                 # print('------------------')
    #             else:
    #                 # print("【答案】或【解析】格式有误")
    #                 return "第" + str(num + 1) + "大题《" + all_type[num] + "》中【答案】或【解析】格式有误或其中某道题中出现多个相同关键字或漏关键字"
    #             index += count
    # return res, item_no_type
    #


# def only_parse_split(one_item_ans, item_type, reparse_n = 1):
#     """
#     拆分出答案和解析
#     :one_item: 一道题的答案解析部分,
#     :return:{'answer': ,"parse": }
#     """
#     dd = {'parse': one_item_ans, 'answer': ""}
#     simp_item = re.sub("(【([解分][析答]|详解|点[评睛])】|答案|解析|详解)\s*[：:]?", "", one_item_ans)
#     simp_item = re.sub("[^\u4e00-\u9fa5∵∴]", "", simp_item)
#     if len(simp_item) < 10 and re.search("因为？|因此|所以|根据|依据|若|假设", simp_item) is None:
#         dd['parse'] = ""
#
#     if re.search(r"【(解析|解答|分析|详解|点评|点睛)】\n?|(解析|解答|分析|详解|点评|点睛)\s*[：:]", one_item_ans):
#         dd1 = dict(zip(["answer", "parse_title", "parse"],
#                        re.split(r"【(解析|解答|分析|详解|点评|点睛)】\n?", one_item_ans, maxsplit=1)))
#         dd["parse"] = "【" + dd1["parse_title"] + "】" + dd1["parse"]
#         del dd1["parse_title"]
#
#     if item_type in ["单选题", "多选题", "选择题", "单项选择", "多项选择"]:
#         ans = re.search(r'故选\s*[：:]\s*<img src=[^>]+?data-latex="([A-Z;；和与、､\s]+)".+?/>|故选\s*[：:]?\s*([A-Z;；和与、､\s]+)',
#                         dd["parse"].replace("$", ""))
#         if ans:
#             dd["answer"] = ans.group(1) if ans.group(1) is not None else ans.group(2)
#         elif not dd['answer']:
#             dd['answer'] = one_item_ans.strip()
#         dd['answer'] = re.sub("[.;；．]", "", dd['answer'])
#     else:
#         ans1 = re.search(r'故\s*[：:]?\s*(答案分?别?[为是]?|填)\s*[：:]?\s*(.+?)[.．]\s*(\n|$)', dd["parse"])
#         ans2 = re.search(r'故\s*[：:]?\s*(答案分?别?[为是]?|填)\s*[：:]?\s*(<img src=.+?/>)[.．]?\s*(\n|$)', dd["parse"])
#         if reparse_n != 2 and "【答案】" not in one_item_ans and \
#                 len(re.findall(r"[（(]\d[)）]|[\n:：;；。】]([（(](i{1,3}|[ⅰⅱⅲⅳⅠⅡⅢIV①②③④])[)）]|[①②③④])",
#                                one_item_ans.replace(" ", ""))) > 1:
#             dd["answer"] = "见解析"
#         elif ans1:
#             dd["answer"] = ans1.group(2)
#         elif ans2:
#             dd["answer"] = ans2.group(2)
#         elif not dd['parse']:
#             dd['answer'] = one_item_ans.strip()
#         else:
#             dd["answer"] = "见解析"
#
#     return dd