cdZWj
/
new_tiku_structure_v3_sci


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
							#!/usr/bin/env/python
# -*- coding:utf-8 -*-

import re
from structure.ans_structure import only_parse_split
from structure.option import option_structure


def get_slave(one_item, con, parse, ans, parse_split=1):
    """
    带小问的大题 按小问切分
    parse_split=1:解析拆
    :return:
    """
    # if re.search(r"[;；]", ans) and len(re.findall(r"[（(]\s*\d\s*[)）]", con)) > 1:
    #  模板要求老师小题题号（1）(2)
    th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
           "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}

    con = re.sub("(<[/a-z]+>|[(（]\s*\d+\s*分\s*[）)])\s*([(（]\s*([1-9]|1[0-9])\s*[）)])", r"\1" + "\n" + r"\2", con)
    parse = re.sub("(<[/a-z]+>)\s*([(（]\s*([1-9]|1[0-9])\s*[）)])", r"\1" + "\n" + r"\2", parse)

    # 小题干先按\n(\d)拆，拆不出来时再考虑按其他格式拆
    pattern1 = re.compile(r"(?<=[\n:：;；。])\s*([(（]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[)）]"
                          r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])")
    # pattern11 = re.compile(r"\n([(（](\d|i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])[)）]"
    #                        r"|[①②③④⑤⑥⑦⑧⑨⑩])(?![+-])")
    pattern12 = re.compile(r"\n([(（](i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])[)）]|[①②③④⑤⑥⑦⑧⑨⑩])(?![+-])")
    pattern13 = re.compile(r"(?<=[\n:：;；。求])\s*([(（]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[)）]"
                           r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])")
    pattern2 = re.compile(r"((?<=[\n:：;；。])\s*|\s{2,})([(（]\s*([1-9]|1[0-9])\s*[)）])\s*(?!小?[题问]中)")
    # pattern22 = re.compile(r"(?<=[\n:：;；。求])\s*([(（]\s*[1-9]\s*[)）])\s*(?!小?[题问]中)|\n\s*[(（]\s*\d{2}\s*[)）]")
    pattern22 = re.compile(r"(?<=[\n:：;；。求])\s*([(（]\s*([1-9]|1[0-9])\s*[)）])\s*(?!小?[题问]中)")
    # ①②③④⑤⑥⑦⑧⑨⑩
    split_style = 1
    if len(re.findall(r"\n\s*[（(]\s*\d\s*[)）]\s*(?!小?[题问]中)", "\n"+con)) > 1:
        con = re.sub(pattern22, "【ⅳ】", "\n" + con)
    else:
        con = re.sub(r"([\n】])\s*[(（]\s*(" + "|".join(th1.keys()) + ")\s*[)）]",
                     lambda x: x.group(1) + th1[x.group(2)], con)
        parse = re.sub(r"([\n】])\s*[(（]\s*(" + "|".join(th1.keys()) + ")\s*[)）]",
                       lambda x: x.group(1) + th1[x.group(2)], parse)
        if len(re.findall(r"\n\s*[（(]\s*\d\s*[)）]\s*(?!小?[题问]中)", "\n"+con)) > 1:
            con = re.sub(pattern22, "【ⅳ】", "\n" + con)
        elif len(re.findall(pattern12, "\n" + con.replace(" ", ""))) > 1:
            con = re.sub(pattern13, "【ⅳ】", "\n" + con)
            split_style = 2
        else:
            split_style = 0

    if split_style:
        con_list = re.split(r"【ⅳ】", con)

        # ---------------答案和解析拆分---------------------------------------
        # ans_list = [] if ans != "见解析" else "见解析"
        ans_list = []
        parse_list = []
        # syn_list = []
        analy_comment = []
        # parse_common = ""
        raw_ans = ans
        ans_summarize = ""
        if parse_split:  # 解析需拆分
            # 答案   不能只用空格隔开,答案序号要与题干序号保持一致
            if ans.strip() == "见解析":
                ans_list = ["见解析"] * (len(con_list) - 1)
            else:
                if re.search(r"(\n\s*|\s{2,})[（(]\s*\d\s*[)）]", "\n" + ans):
                    if len(re.findall(pattern2, "\n" + ans)) > 1:  # 优先按（\d）拆分
                        ans = re.sub(pattern2, "【ⅳ】", "\n" + ans)
                    elif len(re.findall(pattern1, "\n"+ans)) > 1:
                        ans = re.sub(pattern1, "【ⅳ】", "\n" + ans)
                elif len(re.findall(pattern12, "\n" + ans.replace(" ", ""))) > 1:
                    ans = re.sub(pattern1, "【ⅳ】", "\n" + ans)

                ans_list.extend(re.split(r"【ⅳ】", ans))
                ans_summarize = ans_list[0]
                ans_list = ans_list[1:]
                while ans_list and not ans_list[0]:
                    ans_list = ans_list[1:]
                if len(con_list) - len(ans_list) != 1:
                    may_ans_list = re.split(r"[（(]\s*\d\s*[)）]", raw_ans)
                    if len(may_ans_list) == len(con_list):
                        ans_summarize = may_ans_list[0]
                        ans_list = may_ans_list[1:]

            # 解析
            if parse:
                if re.search('【(详解|解析|解答|分析)】', parse):  # 2020-6-10
                    temp_parse = re.split('【详解】|【解析】|【解答】', parse, maxsplit=1)
                    if len(temp_parse) == 1:  # 无【详解】|【解析】|【解答】字段
                        temp_parse = re.split('【分析】', parse, maxsplit=1)
                        parse = "【分析】" + temp_parse[1].strip()
                    else:
                        parse = temp_parse[1].strip()
                    analy_comment.append(temp_parse[0])

                if re.search("【(点评|点睛)】", parse):
                    comment = re.split('(【点评】|【点睛】)', parse, maxsplit=1)
                    analy_comment.append(comment[-2] + comment[-1])
                    parse = comment[0]

                # \d、（1）xxxx
                if "topic_num" in one_item:
                    one_item["item_id"] = one_item["topic_num"]
                if one_item["item_id"]:
                    other_parse_info = re.search("\n\s*" + str(one_item["item_id"]) + "\s*[、.．､]\s*[（(]\s*1\s*[）)]",
                                                 "\n" + parse)
                    if other_parse_info:
                        parse = "（1）" + parse[other_parse_info.end():]
                        analy_comment.append(parse[:other_parse_info.start()])

                # ------解析拆分小问------
                if len(re.findall(r"[（(]\d[)）]", parse.replace(" ", ""))) > 1:
                    parse = re.sub(r"(?<=[\n:：;；。])\s*([(（]\s*([1-9]|1[0-9])\s*[)）])", "【ⅳ】", "\n" + parse)
                    parse = re.sub(r"(/>|【解】)\s*([(（]\s*([1-9]|1[0-9])\s*[)）])", r"\1【ⅳ】", parse)
                else:
                    pattern3 = re.compile(r"(/>)\s*([(（]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[)）]"
                                          r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])")
                    parse = re.sub(pattern1, "【ⅳ】", "\n"+parse)
                    parse = re.sub(pattern3, r"\1【ⅳ】", parse)

                # 将解析末尾出现的‘故答案为’在成功slave后删掉，容易判断错误
                # if re.search('(故|因[而此]|所以)\s*[：:]?\s*答案分?别?([为是]|填)?\s*[：:]\s*(.+?)(\n|$)', parse):
                #     ans_s = re.search('(\n.*?|^.*?|<p>)((故|因[而此]|所以)\s*[：:]?\s*答案分?别?([为是]|填)?\s*[：:]\s*(.+?))(\n|$)', parse)
                #     # print("ans_s:",ans_s.group(5))
                #     # print(ans_s.group(0))
                #     if ans_s.group(5) and ans_s.group(5).count("【ⅳ】") > 1:
                #         ans_summarize = ans_s.group(2)
                #         ans_s_index = parse.index(ans_summarize) if ans_s.group(1) == '<p>' or not ans_s.group(1).strip() \
                #             or ans_s.group(1).strip() is None else parse.index(ans_s.group(1))
                #         # ans_summarize = [ans_s.group(2), ans_s_index]
                #         parse = parse.replace(ans_summarize, "")
                #     # elif ans_s.group(5) and "【ⅳ】" in ans_s.group(5):  小问答案里也可以有“故答案为”
                #     #     aa5 = ans_s.group(5).replace("【ⅳ】", "")
                #     #     parse = parse.replace(ans_s.group(5), aa5)

                # -----------------------------------------------
                parse = re.sub("(【ⅳ】\s*解答?\s[:：])\s*【ⅳ】", r"\1", parse)
                little_parse = re.split(r"【ⅳ】", parse)
                if len(little_parse) > 1:
                    # if re.search("\n\s*(【分析】|分析\s*[：:]).+?", "\n"+little_parse[0].strip()):
                    #     analy_comment.append(little_parse[0])
                    if len(little_parse[0].strip()) >= 5:
                        analy_comment.append(little_parse[0])
                    parse_list = little_parse[1:]
        # print(analy_comment)

        # ---------------------------拆分后组合----------------------------------
        # 解析不拆分时，小问也要拆;   # 小问解析个数与小问一致时才组合的
        one_item = split2little_con(con_list, ans_list, parse_list, one_item)
        if parse_split and "slave" in one_item:  # 解析需拆分
            # 里层答案/解析存在时，外层就不需要了
            if one_item["slave"][0]["parse"].strip():
                one_item['parse'] = "\n".join(analy_comment).strip()
            if one_item["slave"][0]["key"].strip():
                one_item['key'] = ans_summarize.strip()

    else:  # 不存在多问时
        if re.findall(r"_{2,}", one_item["stem"]):
            one_item["blank_num"] = len(re.findall(r"_{2,}", one_item["stem"]))
            one_item["answer_type"] = "填空题"
        # if parse_split and re.search("^[A-Z]{2,}$", re.sub("\W", "", ans)):
        #     one_item["type"] = "多选题"
        elif len(re.findall(r"[\n\s\u4e00-\u9fa5]\s*[A-D]\s*[.．、､]", one_item["stem"])) >= 3:  # 增加对选项的拆分处理
            one_item = option_structure(one_item, con, ans, 1)
            one_item["answer_type"] = "选择题"
            if 'options' not in one_item:
                one_item["options"] = []
        elif re.search("[(（]\s+[）)]\s*$", one_item["stem"]) or one_item["type"] == "判断题":
            one_item["answer_type"] = "判断题"
            if re.match("【?(对的?|正确的?|[T√])】?$", one_item["key"].strip()):
                one_item["key"] = "正确"
            elif re.match("【?(错误?的?|不对的?|不正确的?|[F×])】?$", one_item["key"].strip()):
                one_item["key"] = "错误"
        elif re.search("[横划画]线处填写", one_item["stem"]) and "com_stem" in one_item:
            one_item["answer_type"] = "填空题"
            if "com_stem" in one_item:
                blank_num = len(re.findall(r"_{2,}", one_item["com_stem"]))
                if blank_num > 0:
                    one_item["blank_num"] = blank_num
                else:
                    one_item["answer_type"] = "解答题"
        else:
            one_item["answer_type"] = "解答题"

    return one_item


def split2little_con(con_list, ans_list, parse_list, one_item):
    """
    将按小问切分开的题干、答案、解析 进行 【结构化组合】
    :param con_list:切开了小问的题干
    :param ans_list:切开了小问的答案
    :param parse_list:切开了小问的解析
    :param one_item: 初步切开的一道题目
    :param is_sub_item: 答案是否按小题号获取（还是按照空的个数获取）的标志
    :param ans_summarize: 解析中的综述  [内容，索引]
    :return:
    """
    # print(con_list)
    # print(ans_list)
    # print(parse_list)
    # print('***********************')
    if len(con_list) > 1:
        if con_list[0] == "":  # 说明全是小题，没有总题文
            one_item["stem"] = ""
        else:
            one_item["stem"] = con_list[0]

        slave = []
        for index, s in enumerate(con_list[1:]):  # 以题干拆分为主
            blank_num = len(re.findall(r"_{2,}", s))
            s = re.sub(r"[(（]\d+分[)）]", "", s[:9]) + s[9:]
            one_slave = {"slave_no": "（%s）" % (index + 1),
                         "stem": s,
                         "key": "",
                         "parse": "",
                         "answer_type": "解答题",
                         "errmsgs": [],
                         }
            if len(con_list) - len(parse_list) == 1:
                one_slave["parse"] = parse_list[index]  # 按索引取解析
            if isinstance(ans_list, list) and len(con_list) - len(ans_list) == 1:
                one_slave["key"] = ans_list[index]

            # 判断小题干是否可以是选择题（含选择题时，作答类型也不一定是选择题）
            if len(re.findall(r"[\n\s\u4e00-\u9fa5]\s*[A-D]\s*[.．、､]", s)) >= 3:
                raw_ans = one_slave["key"]
                raw_stem = one_slave["stem"]
                one_slave = option_structure(one_slave, s, one_slave["key"], 1, is_slave=1)
                one_slave["answer_type"] = "选择题"
                if "options" not in one_slave or not one_slave["options"]:
                    one_slave["key"] = raw_ans  # 选择题解析不成功时，答案还原
                elif blank_num > 1:
                    one_slave["answer_type"] = "填空题"
                    one_slave["key"] = raw_ans
                    one_slave["stem"] = raw_stem
                    del one_slave["options"], one_slave["options_rank"]
            if "options" not in one_slave or not one_slave["options"]:
                if blank_num > 0:
                    one_slave["blank_num"] = blank_num
                    one_slave["answer_type"] = "填空题"
                elif re.search("[(（]\s+[）)]\s*$", s):
                    one_slave["answer_type"] = "判断题"
                elif re.search("[横划画]线处填写", s):
                    one_slave["answer_type"] = "填空题"
                    blank_num = len(re.findall(r"_{2,}", one_item["stem"]))
                    if blank_num > 0:
                        one_slave["blank_num"] = blank_num
                    else:
                        one_slave["answer_type"] = "解答题"
            # if "errmsgs" in one_slave:
            #     del one_slave["errmsgs"]

            # 对带小题的大题，对每个小题的答案重新再提取一次
            # if one_slave["parse"].strip() and (not ans_list or "key" not in one_slave or not one_slave["key"]):  # or one_slave["key"] == '见解析'
            #     new_ans = only_parse_split(one_slave["parse"], one_item["type"], one_slave["stem"],
            #                                reparse_n=2)  # 再解析
            #     if new_ans["key"] == "见解析":
            #         new_ans["key"] = ""
            #     if new_ans["key"] and not new_ans["parse"]:
            #         pass
            #     else:
            #         one_slave["key"] = new_ans["key"]
            #         one_slave["parse"] = new_ans["parse"]

            slave.append(one_slave)
        one_item["slave"] = slave
        one_item["slave_no"] = "1-{}".format(len(slave)) if len(slave) > 1 else "1"

        # 有slave的话,就把外面的ans和parse字段给删除掉
        # if slave:
        #     pass
        #     # del one_item["key"]
        #     # if parse_list:
        #     #     del one_item["parse"]
        # else:
        #     one_item["stem"] = old_con
        #     if ans_summarize:
        #         one_item["parse"] = one_item["parse"][:ans_summarize[1]] + '\n' + ans_summarize[0] + '\n' + one_item["parse"][ans_summarize[1]:]
    return one_item