cdZWj
/
new_tiku_structure_2021


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447
							#!/usr/bin/env/python
# -*- coding:utf-8 -*-

import re
from pprint import pprint
from washutil import table_label_cleal, find_seq_num


# 本文件包含以下函数
# con_ans_split：将传进来的单道题（含答案或解析）按题干、答案、解析 拆分
# ans_structure_total：针对答案部分解析结构化汇总
# ans_structure： 拆分答案，并根据已拆分好的题目item_res 补上答案和解析
# stem_ans_struc_combine: 题干结构化与答案结构化的合并
# manyans_oneline_split(item_str,  one_type_num):对一行多个答案的情况进行拆分
# only_parse_split:  拆分出答案和解析，主要针对答案页中的每个题的答案进行拆分
# get_ans_from_parse:  从已知解析中 挑选 答案


# def ans_structure_step1(anss, item_type_classify, item_res):
#     """
#     针对答案部分解析结构化汇总
#     anss : 整个答案部分
#     :return:  dd = {'parse': , 'answer': }
#     """
#     anss = [k for k in anss if k.strip()]
#     ans_label = [k for k, a in enumerate(anss) if re.match("【答案】", a.strip())]
#     parse_label = [k for k, a in enumerate(anss) if re.match("【解析】", a.strip())]
#     if len(ans_label) == 1 and len(parse_label) == 1:
#         ans1 = anss[ans_label[0] + 1: parse_label[0]]
#         parse1 = anss[parse_label[0]+1:]
#         res_ans, flag1 = ans_structure_step2(ans1, item_type_classify, item_res,'group_ans')
#         res_parse, flag2 = ans_structure_step2(parse1, item_type_classify, item_res, 'group_parse')
#         if flag1 == flag2 == 1:
#             for idx, item_r in enumerate(item_res):
#                 if not res_ans[idx]['answer']:
#                     if not res_parse[idx]['answer']:
#                         item_res[idx]['answer'] = "见解析"
#                     else:
#                         item_res[idx]['answer'] = res_parse[idx]['answer']
#                 else:
#                     item_res[idx]['answer'] = res_ans[idx]['answer']
#
#                 if not res_ans[idx]['parse']:
#                     item_res[idx]['parse'] = res_parse[idx]['parse']
#                 else:  # 解析中的parse肯定有
#                     item_res[idx]['parse'] = res_ans[idx]['parse']+"<br/>【解析】"+res_parse[idx]['parse']
#             return item_res
#         elif flag1 == 2:
#             return "【答案】组中题型数量与题目中不一致,请重点检查题目序号,重新手输题目序号"
#         elif flag2 == 2:
#             return "【解析】组中题型数量与题目中不一致,请重点检查题目序号,重新手输题目序号"
#         else:
#             return '【答案】组和【解析】组中题型数量与题目中均不一致,请重点检查题目序号,重新手输题目序号'
#     else:
#         res_ans, flag1 = ans_structure_step2(anss, item_type_classify, item_res)
#         if flag1 == 1:
#             for idx, item_r in enumerate(item_res):
#                 item_res[idx]['answer'] = res_ans[idx]['answer']
#                 item_res[idx]['parse'] = res_ans[idx]['parse']
#         else:
#             # return "答案中题目数量与题目中不一致,①请重点检查题目序号,重新手输题目序号;②将参考答案开头没用的信息去掉;" \
#             #        "③是否有遗漏答案或答案格式不对；④答案中若存在一行多个答案时，保证每个题的答案间要留有多个空格！"
#             return res_ans
#         return item_res
#
#
# def ans_structure_step2(anss, item_type_classify, item_res, *group):
#     """
#     拆分答案，并根据已拆分好的题目item_res 补上答案和解析
#     有的答案放在表格里，如选择题、填空题、判断题，有的一行多个答案
#     思路：1.先按一行没有多个题答案的情况取答案，数量与题干不同 时 >>>> 2.再按一行多个答案的情况取答案：
#               1）先判断表格，拿到表格的答案；2）一行多个答案
#     anss: 一组按所有不重复题号的答案
#     item_type_classify: 题目中对各题型的统计
#     :return: [{'parse': , 'answer': },{},{}]
#     """
#     while not anss[0]:
#         anss = anss[1:]
#     if re.match(".+?省.+?试[卷题]|[^a-zA-Z]*?【专题】", anss[0]):
#         anss = anss[1:]
#
#     # 预处理： 对答案部分的题号进行处理， 将(\d)类型的题号改为\d、类型
#     sub_item_no = [int(no[0]+no[2]) for no in
#                    re.findall(r'\n\s*([1-9]|[1-4][0-9])\s*[.．、､]|\n\s*([1-9]|[1-4][0-9])\s*[.．、､].+?\s+([1-9]|[1-4][0-9])\s*[.．、､].+?',
#                               "\n" + "\n".join(anss))]
#     if len(sub_item_no) <= 2:
#         sub_item_no = [int(no[0]+no[2]) for no in re.findall(r'\n\s*\(([1-9]|[1-4][0-9])\)\s*[.．、､]?'
#                                         r'|\n\s*\(([1-9]|[1-4][0-9])\)\s*[.．、､]?.+?\s+\(([1-9]|[1-4][0-9])\)\s*[.．、､]?.+?',
#                                                              "\n" + "\n".join(anss))]
#         if len(sub_item_no) > 3:
#             anss = re.sub(r'\n\s*\(([1-9]|[1-4][0-9])\)\s*[.．、､]?', "\n" + r"【@\1､", "\n" + "\n".join(anss))
#             anss = re.sub(r'(\n【@([1-9]|[1-4][0-9])､.+?\s+)\(([1-9]|[1-4][0-9])\)\s*[.．、､]?', r"\1【@\3､", anss)
#             anss = anss.replace("【@", "").split("\n")[1:]
#
#     # --------- 一行多个答案的情况----存在一行中有选择题和填空题答案，填空题答案尽量每题占一行----------
#     all_item_ans = []
#     table_ans = []
#     ans_no = []
#     while anss and "table" in anss[0]:  # 答案以表格形式呈现, 表格应放在前两行位置，不要插在答案中间
#         row_list = []                   # 要求表格形式为 横纵分明 ，不存在合并
#         for tt in re.finditer('<tr>(((?!(</?tr>)).)*)</tr>', anss[0], re.S):  # 先划分每行
#             tt_list = re.split(r'</p></td>|<td><p>|</td><td>|</td>|<td>', tt.group(1))  # 再划分每列
#             # row_list.append([col for col in tt_list if col.strip()])  # 也有可能答案为空
#             row_list.append(tt_list)
#         if row_list:
#             print("^^^^^^存在答案放在表格里的情况！^^^^^^^")
#             if len(row_list) % 2 != 0:
#                 print('表格形式呈现的答案不是偶数行')
#             else:
#                 # print("row_list:", row_list)
#                 for k, v in enumerate(row_list):
#                     # print('-----',v)
#                     if (k + 1) % 2 == 1:  # 奇数行==》答案序号行
#                         item_no = [int(i) if re.sub(r"[^\d]", "", i) else -1 for i in v]
#                         item_no_st = [num for num, i in enumerate(item_no) if i != -1]   # 可能开头是-1
#                         ans_no.extend([i for i in item_no if i != -1])  # 表格序号
#                         table_ans.extend(row_list[k + 1][item_no_st[0]: item_no_st[-1] + 1])   # 表格答案
#         anss = anss[1:]
#     # 先按一行没有多个题答案的情况取答案
#     anss_str = table_label_cleal("\n" + "\n".join(anss))
#     if re.search("<table>.+?</table>", anss_str) is None:
#         anss_str = anss_str.split("</table>")[-1].replace("</div>", "")
#     anss_str = re.sub(r"([A-H])\s*[.．](\s*([1-4][0-9]|[1-9])\s*[.．、､])", r"\1  \2", anss_str)
#     anss_str = re.sub(r"([；;])(\s*([1-4][0-9]|[1-9])\s*[．、､])", r"\1  \2", anss_str)
#
#     rest_item_split = re.split(r'\n+\s*[1-4][0-9]\s*[.．、､]|\n+\s*[1-9]\s*[.．、､]', anss_str)
#     if not rest_item_split[0]:
#         rest_item_split = rest_item_split[1:]
#     all_item_ans.extend(table_ans)
#     all_item_ans.extend(rest_item_split)
#     print("表格答案：", table_ans)
#     pprint(all_item_ans)
#     # ------------先按没有一行多个答案的情况-------------------
#     if item_type_classify and len(all_item_ans) == sum(list(item_type_classify.values())):
#         res1 = []
#         for num1, one_ans in enumerate(all_item_ans):
#             parse = only_parse_split(one_ans, item_res[num1]["item_topic_name"], item_res[num1]['content'])
#             res1.append(parse)
#         return res1, 1
#     elif not item_type_classify and len(all_item_ans) == len(item_res):
#         res1 = []
#         for num1, one_ans in enumerate(all_item_ans):
#             parse = only_parse_split(one_ans, item_res[num1]["item_topic_name"], item_res[num1]['content'])
#             res1.append(parse)
#         return res1, 1
#     else:  # 答案个数与题目不一致时,再按一行多个答案处理(题目个数正常，答案个数比题目少时)
#         print('-----存在一行多个答案的情况-----')
#         all_item_ans = []
#         all_item_ans.extend(table_ans)
#         # 再按一行多个答案的情况取答案
#         manyans_oneline_split = re.split(r'\n\s*[1-4][0-9]\s*[.．、､]|\n\s*[1-9]\s*[.．、､]'
#                                          r'|(?<![：:.．、､+\-*/=])\s[1-4][0-9]\s*[.．、､]|(?<![：:.．、､+\-*/=])\s[1-9]\s*[.．、､]'
#                                          r'|\s{2,}[1-4][0-9]\s*[.．、､]|\s{2,}[1-9]\s*[.．、､]', anss_str)
#
#         temp_no = re.findall(r'\n\s*([1-4][0-9]|[1-9])\s*[.．、､]'
#                                          r'|(?<![：:.．、､+\-*/=])\s([1-4][0-9]|[1-9])\s*[.．、､]|\s{2,}([1-4][0-9]|[1-9])\s*[.．、､]', anss_str)
#         temp_no = [int("".join(i)) for i in temp_no]
#         # print("temp_no:",temp_no)
#         # print('manyans_oneline_split:', manyans_oneline_split, len(manyans_oneline_split))
#         if not temp_no and not all_item_ans:  # 没有表格答案的情况，如1~10 ACBBD...
#             row_ans = re.findall("[A-Z](?<!\))", manyans_oneline_split[0].strip())
#             all_item_ans.extend(row_ans)
#             temp_no = re.findall("(\d)-(\d{1,2})", manyans_oneline_split[0])
#             for t in temp_no:
#                 ans_no.extend(list(range(int(t[0]), int(t[1])+1)))
#             if row_ans:
#                 manyans_oneline_split = []
#         elif temp_no and not manyans_oneline_split[0]:
#             manyans_oneline_split = manyans_oneline_split[1:]
#             ans_no.extend(temp_no)
#         elif re.match("A-Z", manyans_oneline_split[1].strip()) is None and \
#                 len(re.findall("[A-Z](?<!\))", manyans_oneline_split[0].strip())) == len(item_res) - (len(manyans_oneline_split)-1):
#             print('第一行答案不是以题号形式一个个给出')
#             row_ans = re.findall("[A-Z](?<!\))", manyans_oneline_split[0].strip())
#             all_item_ans.extend(row_ans)
#             manyans_oneline_split = manyans_oneline_split[1:]
#             if temp_no and temp_no[0] > len(row_ans):
#                 ans_no.extend(list(range(temp_no[0]-len(row_ans), temp_no[0])))
#                 ans_no.extend(temp_no)
#             else:
#                 print("答案序号有问题！！")
#                 ans_no.extend(['']*len(row_ans))
#                 ans_no.extend(temp_no)
#         # print("manyans_oneline_split:************")
#         # pprint(manyans_oneline_split)
#         print("ans_no:", ans_no)
#         all_item_ans.extend(manyans_oneline_split)
#         combine_res = stem_ans_struc_combine(item_type_classify, item_res, all_item_ans, ans_no, group)
#         # if not combine_res:
#         #     return '答案数量与题干数量不一致，请检查题干和答案中的题号,是否有遗漏答案或答案格式不对；' \
#         #            '答案中若存在一行多个答案时，保证每个题的答案间要留有多个空格！', 2
#
#         return combine_res


# def stem_ans_struc_combine(item_type_classify, item_res, all_item_ans, ans_no, group):
#     """
#     题干结构化与答案结构化的合并
#     :return:
#     """
#     print("item_type_classify:", item_type_classify)
#     print("题干中的题目数量：", len(item_res))
#     print("答案中的题目数量：", len(all_item_ans))
#     if item_type_classify and len(all_item_ans) == sum(list(item_type_classify.values())):
#         res1 = []
#         for num1, one_ans in enumerate(all_item_ans):
#             parse = only_parse_split(one_ans, item_res[num1]["item_topic_name"], item_res[num1]['content'])
#             res1.append(parse)
#         return res1, 1
#     elif not item_type_classify and len(all_item_ans) == len(item_res):
#         res1 = []
#         for num1, one_ans in enumerate(all_item_ans):
#             parse = only_parse_split(one_ans, item_res[num1]["item_topic_name"], item_res[num1]['content'])
#             res1.append(parse)
#         return res1, 1
#     else:
#         print('答案数量与题干数量不一致，请检查题干和答案中的题号,是否有遗漏答案或答案格式不对；',
#               '答案中若存在一行多个答案时，保证每个题的答案间要留有多个空格！', 2)
#         print("试题个数：", len(item_res))
#         print("答案中的题号：", ans_no)
#         # ----------------------是否正确对上序号还需进一步验证！！！！！！！！！！-------------------------------
#         res1 = []; simp_res = []
#         err_n = 0  # 与题目id没对上号的个数， 默认答案一般也是从前往后排序
#         for k, one_item in enumerate(item_res):  # 以题目为主
#             search_range = ans_no
#             if k+3-err_n <= len(ans_no):
#                 search_range = ans_no[k-err_n:k+3-err_n]
#             elif k-err_n < len(ans_no):
#                 search_range = ans_no[k-err_n:]
#             # print("答案的搜索范围search_range:",search_range)
#             if one_item['item_id'] in search_range:  # 在对应位置前
#                 ans_no_st = [k1+k-err_n for k1, v1 in enumerate(search_range) if v1 == one_item['item_id']]  # 默认取第一个作为对应答案
#                 # print("答案的位置{0}：{1}, ----对应题目id:{2}".format(ans_no_st, all_item_ans[ans_no_st[0]],one_item['item_id']))
#                 parse = only_parse_split(all_item_ans[ans_no_st[0]], one_item["item_topic_name"], one_item['content'])
#                 one_item['answer'] = parse['answer']
#                 one_item['parse'] = parse['parse']
#                 res1.append(one_item)
#                 if group == 'group_ans':
#                     simp_res.append({'parse': "", 'answer': parse['answer'],'item_id':one_item['item_id']})
#                 if group == 'group_parse':
#                     simp_res.append({'parse': parse['parse'], 'answer': parse['answer'],'item_id':one_item['item_id']})
#             else:
#                 err_n += 1
#                 one_item.update({'parse': "", 'answer': ""})
#                 res1.append(one_item)
#                 if group:
#                     simp_res.append({'parse': '', 'answer': '', 'item_id': one_item['item_id']})
#         if simp_res:
#             return simp_res, 1
#
#         return res1, 2


# def one2more_ans_split(item_list, item_type):
#     """
#     对一行多个答案的情况进行拆分
#     :return:
#     """
#     manyans_oneline_split = []
#     while item_list and \
#             len(re.findall(r"(^|\s+)[1-9][0-9]?\s*[.．、､]\s*(【答案】|答案\s*[：:]?)?\s*[A-D]", item_list[0])) > 1:
#         print('选择题存在一行多个答案的情况！！！')  # 主要以选择题的为主
#
#         # 处理 1.xxx    2.xxx    3.xxx
#         ans_line1 = item_list[0]
#         if not item_type or item_type.replace("题", "") in ["单选", "多选", "选择", "单项选择", "多项选择"]:
#             ans_line1 = re.sub(r"[^A-D\d.．、､()]", "", item_list[0])
#
#         if re.match(r"[1-9][0-9]?[.．、､][A-D]", ans_line1):  # 第一个答案为选择题的情况
#             one_ans_split = re.split(r'^\s*[1-9][0-9]?\s*[.．、､]|\s+[1-9][0-9]?\s*[.．、､]', item_list[0])
#             if not one_ans_split[0]:
#                 one_ans_split = one_ans_split[1:]
#             manyans_oneline_split.extend(one_ans_split)
#         else:  # 第一个答案为非选择题或没有序号的情况
#             one_ans_split = re.split(r'^\s*[1-9][0-9]?\s*[.．、､]|\s+[1-9][0-9]?\s*[.．、､]', item_list[0])
#             if not one_ans_split[0]:
#                 one_ans_split = one_ans_split[1:]
#             if manyans_oneline_split and not manyans_oneline_split[-1]:  # 序号和答案跨行的情况
#                 manyans_oneline_split[-1] = one_ans_split[0]
#                 manyans_oneline_split.extend(one_ans_split[1:])
#             else:
#                 manyans_oneline_split.extend(one_ans_split)
#         item_list = item_list[1:]
#         # if item_list:
#         #     ans_line1 = re.sub(r"[^A-D\d.．、､]", "", item_list[0])
#
#     if manyans_oneline_split and not manyans_oneline_split[-1]:
#         print('答案要求：题目序号不在行首时不要与该题答案跨行')
#
#     # 填空题答案也可能一行多个
#     if item_type == '填空题':  # 在题干题型明确时
#         one_type_ans_split = re.split(r'\n\s*[1-9][0-9]?\s*[.．、､]|(?<![：:])\s+[1-9][0-9]?\s*[.．、､](?!png)',
#                                  table_label_cleal("\n"+"\n".join(item_list)))
#         while not one_type_ans_split[0]:
#             del one_type_ans_split[0]
#         one_type_ans_split.extend(one_type_ans_split)
#         return one_type_ans_split

    # 对item_list剩余文本按题号继续拆分，包括一行多个答案的情况
    # while re.match("\n\s*([1-9][0-9]?)\s*[.．、､].+?\s+([1-9][0-9]?)\s*[.．、､]", item_list[0]):
        # item_no_seq = re.findall("[\n\s]\s*([1-9][0-9]?)\s*[.．、､]", "\n" + "\n".join(item_list))
        # item_no_seq = [int(one) for one in item_no_seq]
        # # print("item_no_seq:",find_seq_num(item_no_seq))
        # if len(find_seq_num(item_no_seq)) == 1:
        #     all_type_ans_split = re.split(r'\n\s*[1-9][0-9]?\s*[.．、､]|(?<![：:])\s+[1-9][0-9]?\s*[.．、､](?!png)',
        #                                   table_label_cleal("\n" + "\n".join(item_list)))
        #     while not all_type_ans_split[0].strip():
        #         del all_type_ans_split[0]
        #     manyans_oneline_split.extend(all_type_ans_split)

    # return manyans_oneline_split


# def get_ans_from_parse(item_parse, item_type, res_con):
#     """
#     从已知解析中 挑选 答案
#     :param item_parse: 总解析
#     :param item_type: 题型
#     :return:
#     """
#     item_parse = re.split("【点评】|【点睛】", item_parse)[0].strip()
#     # 将解析中末尾出现的图片去掉
#     while re.search('\n\s*<imgsrc\d+\sw_h=(\d+\*\d{3})/>\s*$', item_parse):
#         item_parse = re.sub('\n\s*<imgsrc\d+\sw_h=(\d+\*\d{3})/>\s*$', "", item_parse)
#     item_ans = ""
#     if item_type.replace("题", "") in ["单选", "多选", "选择", "单项选择", "多项选择"]:
#         ans = re.search(r'故选\s*[：:]?\s*<imgsrc\d+\sdata-latex="\$?([A-Z;；和与、､\s]+)\$?"/>'
#                         r'|故选\s*[：:]?\s*([A-Z;；和与、､\s]+)', item_parse.replace("$", ""))
#         if ans:
#             item_ans = ans.group(1) if ans.group(1) is not None else ans.group(2)
#             item_ans = re.sub(r"[.;；．]\s*$", "", item_ans)
#         elif not ans:
#             item_ans = "见解析"
#     elif item_type:
#         ans0 = re.search(r'故选\s*[：:]?\s*([A-Z;；和与、､\s]+)[.．；;。]?$', item_parse)  # 试验题中可能还有选择题
#         ans01 = re.search(r'故选\s*[：:]\s*<imgsrc\d+\sdata-latex="\$?([A-Z;；和与、､\s]+)\$?"/>', item_parse)  # 选择题的题型可能前面分错
#         ans1 = re.search(r'(故|因[而此]|所以)\s*[：:]?\s*(答案分?别?[为是填]?|填)\s*[：:]?\s*(((?!(<img)).)+?)[.．]?\s*(\n|$)', item_parse)
#         ans11 = re.search(r'((?<!解)答\s*[：:]|整理得\s*[：:]?)\s*(.+?)([.．；;]?\s*$|[.．]\s*\n)', item_parse)
#         ans2 = re.search(r'(故|因[而此]|所以)\s*[：:]?\s*(答案分?别?[为是填]?|填)\s*[：:]?\s*(<imgsrc.+?/>)[.．]?\s*(\n|$)', item_parse, re.S)
#         ans22 = re.search(r'(故|因[而此]|所以)\s*[：:]?\s*(答案分?别?[为是填]?|填)\s*[：:]?\s*([^∴∵因所故即【】]+?)([.．]\s*(\n|$)|$)', item_parse)
#         ans21 = re.search(r'综上所述\s*[：:]\s*([^∴∵故因所即【】]+?)[.．；;]\s*$', item_parse)
#         ans3 = re.search(r'(故|因[而此]|所以|∴)\s*[：:]?.+?[为是填]\s*[：:]?\s*([^∴∵故因所即【】]+?)([.．；;，,]\s*$|[.．]\s*\n)', item_parse)
#         ans31 = re.search(r'(故|因[而此]|所以|∴)\s*([^当为是填∴∵故因所即则【】]+?)[.．；;]\s*$', item_parse)
#         ans32 = re.search(r'(故|因[而此]|所以)\s*[：:]?[^当为是填∴∵故因所即【】]+?[为是填]\s*[：:]?\s*(<imgsrc.+?/>)[.．]?\s*(\n|$)',
#                           item_parse, re.S)
#         ans4 = re.search(r'\n\s*[＝=]([^＝\n]+?)[.．]?\s*$', item_parse)
#         ans42 = re.search(r'[＝=](?!")(((?!([故＝∴即]|原式|因[而此]|所以|\n|=[^"])).)+?)[.．]?\s*$', item_parse)
#         ans41 = re.search(r'原式\s*[＝=].+?[＝=](?!")(((?!(＝|=[^"])).)+?|\s*<imgsrc.+?/>)([.．]?\s*$|[.．]\s*\n)', item_parse)
#         if not (item_type == '填空题' and len(re.findall(r"_{2,}|_+([^_]*?)_+", res_con)) == 1) and \
#                 len(re.findall(r"[（(]\d[)）]|[\n:：;；。】]([（(](i{1,3}|[ⅰⅱⅲⅳⅠⅡⅢIV①②③④])[)）]|[①②③④]\s*(?![+-]))",
#                                item_parse.replace(" ", ""))) > 1 or "证明" in item_parse:
#             item_ans = "见解析"
#         elif ans0:
#             item_ans = ans0.group(1)
#         elif ans01:
#             item_ans = ans01.group(1)
#         elif ans1 or ans11:
#             item_ans = ans1.group(3) if ans1 else ans11.group(2)
#         elif ans2:
#             item_ans = ans2.group(3)
#         elif ans22:
#             item_ans = ans22.group(3)
#         elif ans21:
#             item_ans = ans21.group(1)
#         elif (ans3 or ans31 or ans32) and '证明' not in item_parse:
#             if ans3:
#                 item_ans = ans3.group(2)
#             if ans31:
#                 item_ans = ans31.group(2)
#             if ans32:
#                 item_ans = ans32.group(2)
#         elif (ans4 or ans41 or ans42) and '证明' not in item_parse:
#             if ans4:
#                 item_ans = ans4.group(1)
#             if ans41:
#                 item_ans = ans41.group(1)
#             if ans42:
#                 item_ans = ans42.group(1)
#         else:
#             item_ans = "见解析"
#     return item_ans

# def con_ans_split(one_item_list, item_topic_name, topic_no):
#     """
#     将传进来的单道题（含答案或解析）按题干、答案、解析 拆分
#     :param one_item_list: [str,str,str] 每个元素为一行数据, ocr一行行识别, 保留每行数据;如果是word,直接用wordbin拿到html格式进行解析
#     :param item_topic_name:题型
#     :param topic_no:题号
#     :return: one_item:{"content":xxxx,"answer":xxx,"parse":xxx}
#     """
#     pattern1 = re.compile(r"【(.*?)\s+(.*?)】|【(.*?)<imgsrc.+?/>(.*?)】")
#     con0 = re.sub(r"】[:：]", "】", "#&#".join(one_item_list))  # 是否换行后面再考虑
#     con0 = re.sub(r"\[来源.*?\]", "", con0).replace("\xa0", " ").replace("\u3000", " ")
#     while re.search(pattern1, con0):
#         con0 = re.sub(pattern1, r"【\1\2】", con0)  # 去掉 【】 中的图片和空格
#
#     big_struc_dict = {"item_topic_name": item_topic_name, "topic_no": topic_no}
#     if re.search("【(答案|[解分][析答]|详解)】", con0):
#         label = re.findall("【(答案|[解分][析答]|详解|点[评睛])】", con0)
#         label_split = re.split(r"【答案】|【[解分][析答]】|【详解】|【点[评睛]】", con0)
#         big_struc_dict1 = dict(zip(label, label_split[1:]))
#         big_struc_dict1['con'] = label_split[0]
#         # 将键值为空的键删掉
#         big_struc_dict.update({k: v for k, v in big_struc_dict1.items() if v.replace("#&#", "").strip() != ""})
#     else:
#         big_struc_dict['con'] = con0
#
#     return big_struc_dict


if __name__ == '__main__':
    # oneitem = {'content': '7．化简<img src="files/image7.jpeg" style="width: 28.8pt; height: '
    #            '28.8pt" width="38" height="38" />÷<img src="files/image8.jpeg" '
    #            'style="width: 47.95pt; height: 30.7pt" width="64" height="41" '
    #            '/>的结果是（\u3000\u3000）\n'
    #            'A．<img src="files/image9.jpeg" style="width: 21.6pt; height: '
    #            '28.8pt" width="29" height="38" />B．<img src="files/image10.jpeg" '
    #            'style="width: 21.7pt; height: 26.2pt" width="29" height="35" '
    #            '/>C．<img src="files/image11.jpeg" style="width: 21.6pt; height: '
    #            '28.8pt" width="29" height="38" />D．<img src="files/image12.jpeg" '
    #            'style="width: 21.7pt; height: 26.2pt" width="29" height="35" />\n'}
    # res = one_item_parse(oneitem, '单选题')
    # oneitem = ["1．下列说法正确的是（　　）","A．射线是高速运动的电子流","B．氢原子可通过吸收光子实现从低能级向高能级跃迁",
    #             "C．太阳辐射能量的主要来源是太阳内部发生的重核裂变","D．的半衰期是3.8天，1克经过7.6天后全部发生衰变",
    #             "【答案】B","【解析】","【分析】","【详解】","A．γ射线是电磁波，故A错误；","B．按照波尔理论，氢原子吸收光子后，将从低能级向高能级跃迁，故B正确;",
    #             "C．太阳辐射能量的主要来源是太阳中发生的轻核聚变，故C错误；","D．的半衰期是3.8天，7.6天是2个半衰期，根据可知，有发生衰",
    #             "变，还剩下克没有衰变，故D错误。","故选B。"]
    # num = 1
    # item_type = '选择题'
    # res = con_ans_split(oneitem, item_type, num)
    # pprint(res)

    one_ans = '故答案为：3．\n'
    parse = only_parse_split(one_ans, "填空题")
    print(parse)

    # a = dict(zip(["2","2","3"], [2,4,6]))
    # print(a)
    con = '【分析】 首先根据分式值为零的条件，可得<img src="999image136.png" style="width: 77.35pt; height: 33.85pt" width="103" height="45" />；然后根据因式分解法解一元二次方程的步骤，求出x的值为多少即可．\n【解答】 解：∵分式<img src="999image137.png" style="width: 57.5pt; height: 31.7pt" width="77" height="42" />的值为0，\n<p>∴<img src="999image138.png" style="width: 77.35pt; height: 33.85pt" width="103" height="45" />\n<p>解得x=3，\n<p>即x的值为3．\n<p>故答案为：3．\n【点评】 （1）此题主要考查了分式值为零的条件'
    con = re.search(r'(故|因[而此]|所以)\s*[：:]?\s*(答案分?别?[为是填]?|填)\s*[：:]?\s*(((?!(<img)).)+?)[.．]?\s*(\n|$)', con)
    print(':',con.group(3))