cdZWj
/
new_tiku_structure_v3_art


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
							#!/usr/bin/env/python
# -*- coding:utf-8 -*-

import re
from structure.ans_structure import only_parse_split
from structure.option import option_structure
from structure.stems_to_groups import suojin


def get_slave(one_item, con, parse, ans, flag=1):
    """
    带小问的大题 按小问切分
    flag=1:解析先不拆
    :return:
    """
    # if re.search(r"[;；]", ans) and len(re.findall(r"[（(]\s*\d\s*[)）]", con)) > 1:  # 模板要求老师小题题号（1）(2)
    th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
           "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
    con = re.sub(r"([\n】])\s*[(（]\s*(" + "|".join(th1.keys()) + ")\s*[)）]", lambda x: x.group(1) + th1[x.group(2)], con)
    parse = re.sub(r"([\n】])\s*[(（]\s*(" + "|".join(th1.keys()) + ")\s*[)）]", lambda x: x.group(1) + th1[x.group(2)],
                   parse)

    con = re.sub("(<[/a-z]+>|[(（]\s*\d+\s*分\s*[）)])\s*([(（]\s*\d\s*[）)])", r"\1" + "\n" + r"\2", con)
    parse = re.sub("(<[/a-z]+>)\s*([(（]\s*\d\s*[）)])", r"\1" + "\n" + r"\2", parse)
    # parse = re.sub("(答案分?别?[为是]?\s*[：:])\s*[(（]\s*(\d)\s*[)）]", r"\1[#[\2]#]", parse)

    kuo_num = len(re.findall(r"[（(]\d[)）]", con.replace(" ", "")))
    circle_num = len(re.findall(r"\n[（(](i{1,3}|[ⅰⅱⅲⅳ①②③④⑤])[)）]|\n[①②③④⑤]\s*(?![+-])", con.replace(" ", "")))

    if len(re.findall(r"[（(]\d[)）]|\n[（(](i{1,3}|[ⅰⅱⅲⅳ①②③④⑤])[)）]|\n[①②③④⑤]\s*(?![+-])", con.replace(" ", ""))) > 1:
        by_sub_item = True  # 答案是按照小题获取还是按照空的个数,答案老师有时候全部用;隔开,有时候又会分题号
        # 题干
        if kuo_num > 1:
            con = re.sub(r"((?<=[\n:：;；。求])|^)\s*([(（]\s*\d\s*[)）])\s*(?!小?题?中)", "【ⅳ】", con)
        elif circle_num > 1:
            con = re.sub(r"((?<=[\n:：;；。求])|^)\s*([(（]\s*(\d|i{1,3}|[ⅰⅱⅲⅳ①②③④⑤])\s*[)）]|[①②③④⑤]\s*(?![+-]))", "【ⅳ】", con)
        # print(con)
        # print('-------------------------------')
        con_list = re.split(r"【ⅳ】", con)
        # print(con_list)

        # ---------------答案和解析拆分---------------------------------------
        # ans_list = [] if ans != "见解析" else "见解析"
        ans_list = []
        parse_list = []
        syn_list = []
        analy_comment = []
        parse_common = ""
        ans_summarize = ""
        if not flag:
            # 答案   不能只用空格隔开
            if re.search(r"[;；]|\n[（(](\d|i{1,3}|[ⅰⅱⅲⅳ①②③④⑤])[)）]|\n[①②③④⑤]\s*(?![+-])", ans.replace(" ", "")):
                if len(re.findall(r"[（(]\d[)）]", ans.replace(" ", ""))) > 1:  # 优先按（\d）拆分
                    # and len(re.findall(r"\n[（(](i{1,3}|[ⅰⅱⅲⅳ①②③④])[)）]|\n[①②③④]\s*(?![+-])", ans.replace(" ", ""))) > 0:
                    ans = re.sub(r"((?<=[\n:：;；。])|^)\s*([(（]\s*\d\s*[)）])", "【ⅳ】", ans)
                elif len(re.findall(r"\n[（(](i{1,3}|[ⅰⅱⅲⅳ①②③④⑤])[)）]|\n[①②③④⑤]\s*(?![+-])", ans.replace(" ", ""))) > 1:
                    ans = re.sub(r"((?<=[\n:：;；。])|^)\s*([(（]\s*(\d|i{1,3}|[ⅰⅱⅲⅳ①②③④⑤])\s*[)）]|[①②③④⑤]\s*(?![+-]))",
                                 "【ⅳ】", ans)
                ans_list.extend(re.split(r"【ⅳ】", ans))
                while not ans_list[0]:
                    ans_list = ans_list[1:]
                if len(ans_list) < len(con_list) - 1:
                    ans_list = re.split(r"[;；](?! height)", ans)
                    by_sub_item = False
            # 解析
            if parse:
                if re.search('【(详解|解析|解答)】', parse):  # 2020-6-10
                    temp_parse = re.split('【详解】|【解析】|【解答】', parse)
                    parse = temp_parse[1]
                    # parse_list.append(temp_parse[0])
                    # 若分析也分小问来，则单独拆分
                    if len(re.findall(r"[（(]\d[)）]", temp_parse[0].replace(" ", ""))) > 1:
                        syn = re.sub(r"((?<=[\n:：;；。】])|^)\s*([(（]\s*\d\s*[)）])", "【ⅳ】", temp_parse[0])
                        syn_list.extend(re.split(r"【ⅳ】", syn))
                    syn_list.append(temp_parse[0])  # 【详解】|【解析】|【解答】 前面的部分

                    if re.search("【(点评|点睛)】", parse):
                        comment = re.split('(【点评】|【点睛】)', parse)
                        analy_comment.append(comment[-2] + comment[-1])
                        parse = comment[0]

                # 解析拆分小问
                if len(re.findall(r"[（(]\d[)）]", parse.replace(" ", ""))) > 1:
                    parse = re.sub(r"((?<=[\n:：;；。])|^)\s*([(（]\s*\d\s*[)）])", "【ⅳ】", parse)
                    parse = re.sub(r"(/>)\s*([(（]\s*\d\s*[)）])", r"\1【ⅳ】", parse)
                else:
                    parse = re.sub(r"((?<=[\n:：;；。])|^)\s*([(（]\s*(\d|i{1,3}|[ⅰⅱⅲⅳ①②③④⑤])\s*[)）]|[①②③④⑤]\s*(?![+-]))", "【ⅳ】", parse)
                    parse = re.sub(r"(/>)\s*([(（]\s*(\d|i{1,3}|[ⅰⅱⅲⅳ①②③④⑤])\s*[)）]|[①②③④⑤]\s*(?![+-]))", r"\1【ⅳ】", parse)

                # 将解析末尾出现的‘故答案为’在成功slave后删掉
                if re.search('(故|因[而此]|所以)\s*[：:]?\s*答案分?别?([为是]|填)?\s*[：:]\s*(.+?)(\n|$)', parse):
                    ans_s = re.search('(\n.*?|^.*?|<p>)((故|因[而此]|所以)\s*[：:]?\s*答案分?别?([为是]|填)?\s*[：:]\s*(.+?))(\n|$)', parse)
                    # print("ans_s:",ans_s)
                    # print(parse)
                    if ans_s.group(5) and ans_s.group(5).count("【ⅳ】") > 1:
                        ans_summarize = ans_s.group(2)
                        ans_s_index = parse.index(ans_summarize) if ans_s.group(1) == '<p>' or not ans_s.group(1).strip() \
                            or ans_s.group(1).strip() is None else parse.index(ans_s.group(1))
                        ans_summarize = [ans_s.group(2), ans_s_index]
                        parse = parse.replace(ans_summarize[0], "")
                    elif ans_s.group(5) and "【ⅳ】" in ans_s.group(5):
                        aa5 = ans_s.group(5).replace("【ⅳ】", "")
                        parse = parse.replace(ans_s.group(5), aa5)

                parse = re.sub("(【ⅳ】\s*解答?\s[:：])\s*【ⅳ】", r"\1", parse)
                little_parse = re.split(r"【ⅳ】", parse)

                if len(syn_list) - 1 == len(little_parse) and len(little_parse) > 2:  # 不拼接；syn_list长有4，little_parse长为3
                    parse_list = ["分析:{}\n解答:{}".format(syn_list[k + 1], p) for k, p in
                                  enumerate(little_parse[1:])]
                    parse_common = syn_list[0] + '\n' + little_parse[0]  # 分小问解析的共同部分
                else:
                    if syn_list and len(re.sub("[^\u4e00-\u9fa5]", "", syn_list[-1])) > 4:  # 有4个汉字以上
                        analy = syn_list[-1]
                        analy_comment.insert(0, analy)

                    parse_list.extend(little_parse)
                    if len(parse_list) > 1:
                        # if parse_list[0].strip():
                        #     common = parse_list[0]
                        #     parse_list = ["{} {}".format(common, p) for p in parse_list]
                        parse_common = parse_list[0]
                        parse_list = parse_list[1:]
        # ---------------------------------------------------------------------------------
        one_item = split2little_con(con_list, ans_list, parse_list, one_item, by_sub_item, ans_summarize)
        if "slave" in one_item and one_item["slave"]:
            if not flag:
                one_item['parse'] = parse_common
        if analy_comment:
            one_item['analy'] = "\n".join(analy_comment)
    else:
        if re.findall(r"_{2,}", one_item["stem"]):
            one_item["blank_num"] = len(re.findall(r"_{2,}", one_item["stem"]))
        if flag and re.search("^[A-Z]{2,}$", re.sub("\W", "", ans)):
            one_item["type"] = "多选题"
        if len(re.findall(r"[\n\s\u4e00-\u9fa5]\s*[A-D]\s*[.．、､]", one_item["stem"])) >= 3:  # 增加对选项的拆分处理
            one_item = option_structure(one_item, con, ans, 1)

    return one_item


def split2little_con(con_list, ans_list, parse_list, one_item, is_sub_item, ans_summarize):
    """
    将按小问切分开的题干、答案、解析 进行 【结构化组合】
    :param con_list:切开了小问的题干
    :param ans_list:切开了小问的答案
    :param parse_list:切开了小问的解析
    :param one_item: 初步切开的一道题目
    :param is_sub_item: 答案是否按小题号获取（还是按照空的个数获取）的标志
    :param ans_summarize: 解析中的综述  [内容，索引]
    :return:
    """
    # print(con_list)
    # print(ans_list)
    # print(parse_list)
    # print('***********************')
    old_con = one_item["stem"]
    if len(con_list) > 1:
        if con_list[0] == "":  # 说明全是小题，没有总题文
            one_item["stem"] = ""
        else:
            # 添加缩进属性<p style="text-indent: 2em">、居中属性<p style="text-align:center">
            # com_stem_list = re.split("\n+", con_list[0])
            # com_stem = '<p style="text-indent: 2em">' + '</p><p style="text-indent: 2em">'.join(
            #     com_stem_list) + "</p>"
            one_item["stem"] = suojin(con_list[0])
        slave = []
        for index, s in enumerate(con_list[1:]):
            blank_num = len(re.findall(r"_{2,}", s))
            s = re.sub(r"[(（]\d+分[)）]", "", s[:9]) + s[9:]
            one_slave = {}
            if len(con_list) - len(parse_list) == 1:
                one_slave = {"slave_no": "（%s）" % (index + 1),  # index + 1,
                             "stem": s,
                             # "key": ans_list[index],
                             "parse": parse_list[index]}  # 按索引取解析
                if isinstance(ans_list, list) and len(con_list) - len(ans_list) == 1:
                    one_slave["key"] = ans_list[index]
            elif not parse_list and isinstance(ans_list, list) and len(con_list) - len(ans_list) == 1:
                one_slave = {"slave_no": "（%s）" % (index + 1),  # index + 1,
                             "stem": s,
                             "key": ans_list[index],
                             }
            elif (not ans_list or ans_list == "见解析") and not parse_list:
                one_slave = {"slave_no": "（%s）" % (index + 1),
                             "stem": s,
                             "key": "",
                             "parse": "",
                             "errmsgs": [],
                             }
                may_stem_info = re.search("\n材料[一二三四五六七八九十]\s", s)
                if may_stem_info:
                    one_slave["stem"] = s[:may_stem_info.start()]
                    one_item["stem"] += s[may_stem_info.start()+1:]
                if len(re.findall(r"[\n\s\u4e00-\u9fa5]\s*[A-D]\s*[.．、､]", s)) >= 4:
                    one_slave = option_structure(one_slave, s, "", 1)
                del one_slave["errmsgs"]
            if one_slave:
                one_slave["blank_num"] = blank_num
                if ans_list:
                    if is_sub_item is False:  # 有具体答案的情况，可能要按照空的个数来拿,但至少一个小题一个答案
                        if blank_num >= 1:  # 填空题
                            one_ans = re.sub(r"(\n|^)\s*[(（]\s*" + str(index + 1) + r"\s*[)）](.+)", r"\2",
                                             ";".join(ans_list[:blank_num]))
                            one_ans = re.sub(r"((?<=[\n;；。])|^)\s*([(（]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④])\s*[)）]|[①②③④]\s*(?![+-]))(.+)",
                                             r"【ⅳ】\4", one_ans)
                            one_slave["key"] = one_ans.replace("【ⅳ】", "")
                            ans_list = ans_list[blank_num:]
                        else:  # 没有空的时候,这个题也应该至少有一个答案
                            one_slave["key"] = ans_list[0] if len(ans_list) > 0 else ""
                            ans_list = ans_list[1:]
                    elif type(ans_list) == str:  # 无具体答案的情况：答案为‘见解析’
                        one_slave["key"] = ans_list
                        # if one_item["type"] == "填空题":
                        #     one_item["type"] = "解答题"
                    # if blank_num > 0:
                    #     one_slave["blank_num"] = blank_num
                    # else:
                    #     if one_item["type"] == "填空题":
                    #         one_item["type"] = "解答题"
                    # 对带小题的大题，对每个小题的答案重新再提取一次
                    if not ans_list or "key" not in one_slave or not one_slave["key"] or one_slave["key"] == '见解析':
                        new_ans = only_parse_split(one_slave["parse"], one_item["type"], one_slave["stem"], reparse_n=2)  # 再解析
                        if new_ans["key"]:
                            one_slave["key"] = new_ans["key"]
                        if not new_ans["parse"]:
                            one_slave["parse"] = ""
                slave.append(one_slave)
        one_item["slave"] = slave
        one_item["slave_no"] = "1-{}".format(len(slave)) if len(slave) > 1 else "1"
        # 有slave的话,就把外面的ans和parse字段给删除掉
        if slave:
            pass
            # del one_item["key"]
            # if parse_list:
            #     del one_item["parse"]
        else:
            one_item["stem"] = old_con
            if ans_summarize:
                one_item["parse"] = one_item["parse"][:ans_summarize[1]] + '\n' + ans_summarize[0] + '\n' + one_item["parse"][ans_summarize[1]:]
    return one_item