HJ-AI
/
new_tiku_structure_2021


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
							#!/usr/bin/env/python
# -*- coding:utf-8 -*-


"""
域公式转latex
"""
import re, os
import configs
from pprint import pprint

from func_timeout import func_set_timeout

SUB = {"A":"Ⓐ",
       "V":"Ⓥ",
       "W":"Ⓦ",
       "X":"Ⓧ",
       "G":"Ⓖ",
       }


# def get_latex(item):
#     if r"$eq \\f(" in item:
#         item = re.sub(r"\$eq \\\\f\((.+?),(.+?)\)", r"$\\frac{\1}{\2}", item)
#
#     if r"$eq \\r(" in item:
#         item = re.sub(r"\$eq \\\\r\((.+?)\)", r"$\sqrt{\1}", item)
#
#     if "$eq \\\\o\\\\" in item:
#         while re.search(r"\$eq \\\\o\\\\al\((.+?),(.*?)\)", item):
#             ss = re.search(r"\$eq \\\\o\\\\al\((.+?),(.*?)\)", item)
#             # 将非变量的{}修改成{{}}
#             s1 = "$_{{{sub}}}^{{{sup}}}".format(sub=ss.group(2), sup=ss.group(1))
#             s1 = re.sub("</?su[bp]>|\s", "", s1)
#             if not ss.group(2):
#                 eq_info = re.match(r"\$\s*<sub>(.+?)</sub>", item[ss.end():])
#                 if eq_info:
#                     s1 = "$_{{{sub}}}^{{{sup}}}$".format(sub=eq_info.group(1), sup=ss.group(1))
#                     s1 = re.sub("</?su[bp]>|\s", "", s1)
#                     item = item[:ss.start()] + s1 + item[ss.end()+eq_info.end():]
#                     # return re.sub("</?su[bp]>|\s", "", s1)
#                 else:
#                     item = item[:ss.start()] + s1 + item[ss.end():]
#             else:
#                 item = item[:ss.start()] + s1 + item[ss.end():]
#
#         #     s1 = "$_{{{sub}}}^{{{sup}}}".format(sub=ss.group(2), sup=ss.group(1))
#         #     return re.sub("</?su[bp]>|\s", "", s1)
#         # item = re.sub(r"\$eq \\\\o\\\\al\((.+?),(.*?)\)", sub1, item)
#
#         ac_info = re.search(r"\$eq \\\\o\\\\ac\(○,\s*([A-Z])\)", item)
#         if ac_info:
#             if ac_info.group(1) in SUB.keys():
#                 item = item.replace(ac_info.group(0), SUB.get(ac_info.group(1)))
#
#     return item


# @func_set_timeout(3)
def get_latex0(item):
    while "$eq \\\\f(" in item or "$eq \\\\r(" in item or re.search("【域公式】.*?\\\\o\\\\", item):
        if "$eq \\\\f(" in item:
            # item = re.sub(r"\$eq \\\\f\((((?!\\\\[fr]).)+?),(.+?)\)", r"$\\frac{\1}{\2}", item)
            item = re.sub(r"(【域公式】.*?)\\\\f\((((?!\\\\[fr]\().)+?),(((?!\\\\[fr]\().)+?)\)",
                          r"\1\\frac{\2}{\4}", item)

        if "$eq \\\\r(" in item:
            item = re.sub(r"(【域公式】.*?)\\\\r\((((?!\\\\[fr]\().)+?)\)", r"\1\sqrt{\2}", item)

        if re.search("【域公式】.*?\\\\o\\\\", item):  # if "$eq \\\\o\\\\" in item:
            while re.search(r"【域公式】.*?\\\\o\\\\al\((.+?),(.*?)\)", item):
                ss = re.search(r"(【域公式】.*?)\\\\o\\\\al\((.+?),(.*?)\)", item)
                # 将非变量的{}修改成{{}}
                s1 = "_{{{sub}}}^{{{sup}}}".format(sub=ss.group(3), sup=ss.group(2))
                s1 = re.sub("</?su[bp]>|\s", "", s1)
                if not ss.group(3):
                    eq_info = re.match(r"\$\s*<sub>(.+?)</sub>", item[ss.end():])
                    if eq_info:
                        s1 = "_{{{sub}}}^{{{sup}}}$".format(sub=eq_info.group(1), sup=ss.group(2))
                        s1 = re.sub("</?su[bp]>|\s", "", s1)
                        item = item[:ss.start()] + ss.group(1) + s1 + item[ss.end()+eq_info.end():]
                        # return re.sub("</?su[bp]>|\s", "", s1)
                    else:
                        item = item[:ss.start()] + ss.group(1) + s1 + item[ss.end():]
                else:
                    item = item[:ss.start()] + ss.group(1) + s1 + item[ss.end():]

            #     s1 = "$_{{{sub}}}^{{{sup}}}".format(sub=ss.group(2), sup=ss.group(1))
            #     return re.sub("</?su[bp]>|\s", "", s1)
            # item = re.sub(r"\$eq \\\\o\\\\al\((.+?),(.*?)\)", sub1, item)

            ac_info = re.search(r"\$eq \\\\o\\\\ac\(○,\s*([A-Z])\)", item)
            if ac_info:
                if ac_info.group(1) in SUB.keys():
                    item = item.replace(ac_info.group(0), SUB.get(ac_info.group(1)))

    return item.replace("【域公式】$eq ", "$")


# @func_set_timeout(5)
def zifu_match_combine(split_eq):
    """
    递归函数，将成对括号进行组合，目前先按成对的括号进行转化
    :param split_eq:
    :return:
    """
    if len(split_eq) < 4 or ")" not in split_eq or "(" not in split_eq:
        return split_eq

    for k, i in enumerate(split_eq):
        if i == ")":
            for subk, j in enumerate(split_eq[:k][::-1]):
                if j == "(":
                    # print(split_eq[k - subk - 1 - 1])
                    bef_left_kuohao = split_eq[k - subk - 1 - 1]
                    if bef_left_kuohao == "\\f":
                        # dou_index = split_eq[k-subk-1-1:k+1].index(',')+k-subk-2
                        # bb = split_eq[k - subk - 1 - 1:k + 1]
                        info1 = re.search(r"\\f\((.*?),(.*?)\)$", "".join(split_eq[k - subk - 1 - 1:k + 1]))
                        if info1:
                            new_s = "\\frac{{{one}}}{{{two}}}".format(one=info1.group(1), two=info1.group(2))
                            new_split_eq = split_eq[:k - subk - 1 - 1]
                            if new_split_eq and new_split_eq[-1] == "(":
                                new_s = "{" + new_s + "}"
                            new_split_eq.append(new_s)
                            new_split_eq.extend(split_eq[k + 1:])
                            return zifu_match_combine(new_split_eq)
                    elif bef_left_kuohao == "\\r":
                        info1 = re.search(r"\\r\((.*?)\)$", "".join(split_eq[k - subk - 1 - 1:k + 1]))
                        if info1:
                            new_s = "\sqrt{{{}}}".format(re.sub("^\s*,", "", info1.group(1)))
                            new_split_eq = split_eq[:k - subk - 1 - 1]
                            if new_split_eq and new_split_eq[-1] == "(":
                                new_s = "{" + new_s + "}"
                            new_split_eq.append(new_s)
                            new_split_eq.extend(split_eq[k + 1:])
                            return zifu_match_combine(new_split_eq)
                    elif bef_left_kuohao in ['\\o\\al', '\\s']:
                        info1 = re.search(r"(\\o\\al|\\s)\((.*?),(.*?)\)$", "".join(split_eq[k - subk - 1 - 1: k + 1]))
                        if info1:
                            new_s = "_{{{sub}}}^{{{sup}}}".format(sub=info1.group(3), sup=info1.group(2))
                            new_s = re.sub("</?su[bp]>|\s", "", new_s)
                            new_split_eq = split_eq[:k - subk - 1 - 1]
                            new_split_eq.append(new_s)
                            new_split_eq.extend(split_eq[k + 1:])
                            return zifu_match_combine(new_split_eq)
                    elif bef_left_kuohao == '\\x\\to':
                        info1 = re.search(r"\\x\\to\((.*?)\)$", "".join(split_eq[k - subk - 1 - 1:k + 1]))
                        if info1:
                            new_s = "\\bar{{{}}}".format(info1.group(1))
                            new_split_eq = split_eq[:k - subk - 1 - 1]
                            if new_split_eq and new_split_eq[-1] == "(":
                                new_s = "{" + new_s + "}"
                            new_split_eq.append(new_s)
                            new_split_eq.extend(split_eq[k + 1:])
                            return zifu_match_combine(new_split_eq)
                    else:
                        new_s = "".join(split_eq[k - subk - 1 - 1: k + 1])
                        new_split_eq = split_eq[:k - subk - 1 - 1]
                        new_split_eq.append(new_s)
                        new_split_eq.extend(split_eq[k + 1:])
                        return zifu_match_combine(new_split_eq)

# @func_set_timeout(36)
def get_latex(item, is_reparse=0, wordid="123456"):
    """
    第一通道：
    将文本中的域代码字符串能转化latex的先转化，不能转化的就暂时用域代码格式
    第二通道：
    再解析时，遇到域代码，将域代码转图片处理
    考虑先转化：根式、分式、上下标、to、\s
    :param item:
    :return:
    """
    is_first = 1
    item = item.replace("\\uf028", "(").replace("\\uf029", ")")  # 2020-6-21
    new_item = ""
    # semi_succ_dict = {}
    while re.findall("(【域公式：[^【]*?】)", item):
        all_eqs1 = re.findall("(【域公式：[^【]*?】)", item)  # 遇到嵌套的域公式，无法获取完整,故加【
        all_eqs = list(set(all_eqs1))
        all_eqs.sort(key=all_eqs1.index)
        print(all_eqs)
        new_eqs = []
        fail_n = 0
        for eq in all_eqs:
            raw_eq = eq.replace("\\\\", "\\").replace(" \R", " \\r")
            eq = raw_eq.replace("eq ", "").replace("【域公式：", "").replace("】", "")
            split_eq = re.split(r"(\\f|\(|\)|\\r|\\o\\al|\\x\\to|\\s|,)", eq)
            split_eq = [i for i in split_eq if i]
            res_eq = zifu_match_combine(split_eq)
            # print(res_eq, split_eq)
            try:
                if "".join(res_eq) == "".join(split_eq):  # 转失败
                    fail_n += 1
                    new_eqs.append(raw_eq)
                elif re.search(r"\\[a-zA-Z\d]{1,5}\(", "".join(res_eq)):  # 没有完全转成功
                    fail_n += 1
                    new_eqs.append(raw_eq)
                    # semi_succ_dict[raw_eq] = "【域公式：eq {}】".format("".join(res_eq))
                    # new_eqs.append("【域公式：eq {}】".format("".join(res_eq)))
                else:
                    # mathjax不能渲染sub和sup
                    new_eq = "".join(res_eq)
                    def deal2(yy):
                        new_y = yy.group(2)
                        if yy.group(1) == "<sub>":
                            new_y = "_{" + yy.group(2) + "}"
                        if yy.group(1) == "<sup>":
                            new_y = "^{" + yy.group(2) + "}"
                        return new_y

                    new_eq = re.sub("(<sub>)(.+?)</sub>", deal2, new_eq)
                    new_eq = re.sub("(<sup>)(.+?)</sup>", deal2, new_eq).strip()
                    if not is_first:  # 如果不是第一轮转化，则将前面转化后的$去掉
                        new_eq = re.sub(r"(?<!\\)\$", "", new_eq)
                    new_eqs.append("${}$".format(new_eq))
            except:
                fail_n += 1
                new_eqs.append(raw_eq)
        if fail_n == len(all_eqs):  # 防止死循环
            break

        eq_repl_dict = dict(zip(all_eqs, new_eqs))
        # print('-------------',eq_repl_dict)
        for k, v in eq_repl_dict.items():
            item = item.replace(k, v)
        is_first = 0

    # 对于转latex失败的域公式走第二通道：转图片
    # 嵌套的情况，里层域公式转latex成功，外层转失败，怎么办
    # if is_reparse and "【域公式" in item:
    #     file_path = configs.IMG_FOLDER + '/' + str(wordid) + '/' + "field_eq"
    #     if not os.path.exists(file_path):
    #         os.makedirs(file_path)
    #     new_eqs2raw = {}  # 域代码_原始文本
    #     for i in re.finditer("【域公式：(.*?)】", item):
    #         if re.search(r"\\sqrt|\\frac|\\bar", i.group(1)) is None:  # 不能包含latex命令
    #             if "【" in i.group(1):  # 嵌套，则按上面提取的域公式不完整
    #                 cout = i.group(1).count("【")  # 统计【个数
    #                 try:  # 根据嵌套的“【”找到最外层的“】”
    #                     raw_eq = i.group(0)+"】".join(item[i.end():].split("】")[:cout])+"】"  # 拿到完整样式
    #                     eqs = i.group(1) + "".join(item[i.end():].split("】")[:cout])
    #                     eqs = "eq " + eqs.replace("【域公式：", "").replace("【", "").replace("eq ", "")
    #                     eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", eqs)
    #                     eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
    #                     new_eqs2raw[eqs]=raw_eq
    #                 except:
    #                     pass
    #             else:
    #                 eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", i.group(1))
    #                 eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
    #                 new_eqs2raw[eqs] = i.group(0)
    #         else:
    #             print("域公式中含latex表达式！！！")
    #     new_eqs = list(new_eqs2raw.keys())
    #     new_eqs.append(file_path)
    #     eqcode = "】【".join(new_eqs)
    #     try:
    #         requests.get(r"http://localhost:9001/FieldEq/Eq2Png/?eqcode=" + eqcode, timeout=30)
    #     except:
    #         pass
    #     # 在生成图片的文件夹中对应判断图片再进行替换
    #     eq_imgs = os.listdir(file_path)
    #     if eq_imgs:
    #         raw_eqs2img = {}
    #         for img in eq_imgs:
    #             w_h_info = str(img.replace(".png", "").split("__")[-1]).split("_")
    #             w = int(int(w_h_info[0])/1.27+1)
    #             h = int(int(w_h_info[1])/1.27+1)
    #             name = str(img.replace(".png", "").split("__")[0])
    #             idn = int(name.split("_")[-1])
    #             new_name = name + ".png"
    #             os.rename(file_path + "/" + img, file_path + "/" + new_name)
    #             eq_img = '<img src="{}/{}/field_eq/{}" width="{}px" height="{}px" eq-code="{}" />'\
    #                      .format(configs.new_img_ip, wordid, new_name, w, h, new_eqs[idn-1])
    #             raw_eqs2img[new_eqs2raw[new_eqs[idn-1]]] = eq_img
    #         if raw_eqs2img:
    #             for k, v in raw_eqs2img.items():
    #                 item = item.replace(k, v)
    #                 new_item = new_item.replace(k, v)
    #         else:
    #             new_item = ""
    # else:
    #     new_item = ""
    return item, new_item


if __name__ == '__main__':
    import requests,json
    # f = "t＝【域公式】$eq \\\\f(v<sub>0</sub>,a)$＝【域公式】$eq \\\\f(6,1)$ s＝6s, $eq \\\\r(6)$ "
    # print(re.sub(r"\\\\o\\\\al\((.+?),.+?\)", r"\1",f))
    # p1 = r"C:\Users\Python\Desktop\test\24\25.html"
    # html = open(p1, 'r', encoding='utf-8').read()
    # # print(html)
    # print(get_latex(html))
    # f = "eq \\f(\\f(1,2)×0.82,0.2×10)】【eq \\f(6,1)】【eq \\f(\\x\\to(OC)－\\x\\to(OA),2T)】【C:/Users/Python/Desktop/test/temp"
    # res = requests.get(r"http://localhost:9001/FieldEq/Eq2Png/?eqcode=" + f, timeout=30).text
    # print(json.loads(res).replace("\r\n", ""))
    f = "【解】解析 (1)因OB绳处于竖直方向，所以B球处于平衡状态，AB绳上的拉力为零，OB绳对小球的拉力F<sub>OB</sub>=mg. (3分)<br/>(2)A球在重力mg、水平拉力F和OA绳的拉力F<sub>OA</sub>三力作用下平衡，所以OA绳对小球的拉力F<sub>OA</sub>=【域公式：eq \\\\f(mg,cos 60°)】=2mg. (3分)<br/>(3)作用力F=mgtan 60°=【域公式：eq \\\\r(3)】mg. (3分)<br/>答案 (1)mg (2)2mg (3)【域公式：eq \\\\r(3)】mg"
    # f = "【域公式：eq \\f(v<sub>0</sub>,a)】"
    aa = get_latex(f, )
    print(aa)
    # item = "【域公式：eq \\f(【域公式：eq \\f(6,1)】,3)】geeghe】threthtrh"
    # new_eqs2raw = {}  # 域代码_原始文本
    # for i in re.finditer("【域公式：(.*?)】", item):
    #     print(i.group(0))
    #     if re.search(r"\\sqrt|\\frac|\\bar", i.group(1)) is None:  # 不能包含latex命令
    #         if "【" in i.group(1):  # 嵌套
    #             cout = i.group(1).count("【")
    #             try:  # 根据嵌套的【找到最外层的】
    #                 raw_eq = i.group(0) + "】".join(item[i.end():].split("】")[:cout]) + "】"
    #                 eqs = i.group(1) + "".join(item[i.end():].split("】")[:cout])
    #                 eqs = "eq " + eqs.replace("【域公式：", "").replace("【", "").replace("eq ", "")
    #                 eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", eqs)
    #                 eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
    #                 new_eqs2raw[eqs] = raw_eq
    #             except:
    #                 pass
    #         else:
    #             eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", i.group(1))
    #             eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
    #             new_eqs2raw[eqs] = i.group(0)