# encoding=utf-8 import os import sys sys.path.append(os.getcwd()) import os import re import unicodedata from bs4 import BeautifulSoup from main_clear.latex2maple.latex2maple import structured # tjt新增修改(针对页面渲染直接取出data-latex导致清洗出错进行特殊处理) # '$'特殊处理(方法一) def non_data_latex_replace(s): s = s.replace("\(","$") s = s.replace("\)","$") s_doll = s.replace("$", "$$") s_list = s_doll.split('$') s_list_len = len(s_list) if s_list_len == 1: return s i = 0 # ['文本','','公式','','数字','','','公式','']->找空字符串去处理被''包围"公式"和"数字" while i < s_list_len: if s_list[i] == '': if i+2 < s_list_len and s_list[i+1] != '' and s_list[i+2] == '': s_list[i+1] = structured(s_list[i+1].replace('%20', ' ').replace('%3C', '<').replace('%3E', '>')) i += 2 i += 1 return ''.join(s_list) # '$'特殊处理(方法二) def non_data_latex_iter(s): s = s.replace("\(","$") s = s.replace("\)","$") s_doll = s.replace("$", "$$") s_list = s_doll.split('$') s_list_len = len(s_list) if s_list_len == 1: return s # ['文本','','公式','','数字','','','公式','']->找被空字符''包围的"公式"和"数字"进行处理 # 要先判断索引是否满足长度要求 s_list = [structured(ele.replace('%20', ' ').replace('%3C', '<').replace('%3E', '>')) if (i+1)0 and ele!='' and s_list[i-1]=='' and s_list[i+1]=='' else ele for i, ele in enumerate(s_list)] return ''.join(s_list) # '$'特殊处理(方法三) def non_data_latex_regexp(s): s = s.replace("\(","$") s = s.replace("\)","$") re_list = re.findall(r"\$.*?\$", s) if len(re_list) > 0: latex_list = [ele for ele in re_list if ele.strip() != ''] if len(latex_list) == len(re_list): latex_list = [structured(ele.replace('%20', ' ').replace('%3C', '<').replace('%3E', '>')) for ele in latex_list] for i in range(len(latex_list)): s = s.replace(re_list[i], latex_list[i], 1) return s # 转义字符特殊处理 def escape_func(s): # 转义字符转换 s = s.replace("\a", "\\a") s = s.replace("\b", "\\b") s = s.replace("\f", "\\f") s = s.replace("\n", "\\n") s = s.replace("\r", "\\r") s = s.replace("\t", "\\t") s = s.replace("\v", "\\v") # 部分指令特殊处理 s = s.replace(r'\[', '') s = s.replace(r'\]', '') s = s.replace(r"\lt", "<") s = s.replace(r"\gt", ">") s = s.replace(r'\theta', 'θ') s = s.replace(r'\a*rg', 'arg') s = s.replace(r'\leftrightarrow', '↔') s = s.replace(r'\Leftrightarrow', '⇔') s = s.replace(r'\rightleftharpoons', '⇌') s = s.replace(r'\leftharpoonup', '↼') s = s.replace(r'\rightharpoonup', '⇀') s = s.replace(r'\leftharpoondown', '↽') s = s.replace(r'\rightharpoonupdown', '⇁') s = s.replace(r'\leftarrow', '←') s = s.replace(r'\Leftarrow', '⇐') s = s.replace(r'\rightarrow', '→') # 前面要加上r,否则结果显示替换不成功 s = s.replace(r'\Rightarrow', '⇒') s = s.replace(r'\right', '') # 前面要加上r,否则结果显示替换不成功 s = s.replace(r'\left', '') # 向量特殊处理 s = s.replace("overleftarrow", "overrightarrow") # 分号(/)特殊处理 s = s.replace("dfrac", "frac") return s def get_maplef_items(html): html = re.sub('(latex=".*?")', lambda x: x.group(1).replace("\n", ""), html, flags=re.S) html = escape_func(html) soup = BeautifulSoup(html, features="lxml") s = '' for i in soup.prettify().split('\n'): if i.strip().startswith('')) else: s3 = s2.img.get('src') if not s3: continue if len(s3.split('?')) == 2: s3 = s3.split('?')[-1] elif 'class="tiankong"' in i: s += '____' s3 = '' else: s3 = 'img' s += structured(s3.replace('%20', ' ').replace('%3C', '<').replace('%3E', '>')) elif i.strip().startswith('<'): pass else: s += i # tjt新增修改(针对页面渲染直接取出data-latex导致清洗出错进行特殊处理) try: s = non_data_latex_iter(s) except Exception as e: try: s = non_data_latex_regexp(s) except Exception as e: pass # 结果显示替换不成功 s = re.sub(r'\s', " ", s) s = re.sub(r"(begin|end){?(gathered|array)", "", s) s = s.replace("<", "<") s = s.replace(">", ">") s = s.replace('%20', '') s = s.replace('%3E', '>') s = s.replace('%3C', '<') s = s.replace('img', '') s = s.replace('$', '') s = s.replace('mathbf', '') s = s.replace('operatornamem', '') s = s.replace('beginarra*y', '') # s = s.replace('endarra*y', '') s = s.replace('slant', '') s = s.replace('endarra*y', '') s = s.replace('hfill', '') s = s.replace('#', '') # 结果显示替换不成功 s = s.replace('\n', '') s = s.replace('\r', '') s = s.replace(r'①', '(1)、') s = s.replace(r'②', '(2)、') s = s.replace(r'③', '(3)、') s = s.replace(r'④', '(4)、') s = s.replace(r'⑤', '(5)、') s = s.replace(r'⑥', '(6)、') s = s.replace(r'⑦', '(7)、') s = s.replace(r'⑧', '(8)、') s = s.replace(r'⑨', '(9)、') s = s.replace(r'⑩', '(10)、') s = re.sub(r'/images/1-50/[1-9].gif', '( )', s) s = re.sub(r'/([0-9a-z/*.]*?)(png|jpg|gif)', 'img', s) s = unicodedata.normalize('NFKC', s) # 中文符号转换成英文 # s = s.replace(",", ",").replace(":", ":").replace(";", ";").replace("“", "'").replace("”", "'"). \ # replace("?", "?").replace("!", "!").replace("。", ",").replace("(", "(").replace(")", ")"). \ # replace(".", ".").replace("【", "[").replace("】", "]") # tjt修改支持"= ."或"=___."处理 s = re.sub(r"[==][ _]*\.?$", "等于多少", s) # tjt注释 # s = re.sub(r"[==]\s*(\(\s*\)|_+|(\s*))\.?", "等于多少", s) # s = re.sub(r"[==]\.?$", "等于多少", s) s = re.sub(r"\(\s*[??]*\s*\)|(\s*[??]*\s*)\.?$", "", s) s = re.sub(r"(为|是|等于|=|=)img\.?$", "等于多少", s) s = re.sub(r"img(为|是|等于|=|=)\.?$", "等于多少", s) s = re.sub(r"[fFGg]\^\(-1\)\*\(.*?\)", "反函数_y", s) s = s.replace("图像", "图象") s = s.replace("椭圆", "椭椭") s = s.replace("⇒", "所以") s = re.sub(r'img$', '', str(s)) s = re.sub(r"[==]\s*(\(\s*\)|_{1,5}|(\s*))", "等于多少", s) s = re.sub(r"\(\s*[??]*\s*\)|(\s*[??]*\s*)$", "", s) s = re.sub(r"(为|是|等于)img$", "等于多少", s) # s = re.sub(r"([A-Za-z0-9])'", r"\1", s) s = re.sub(r"over(right|left)arrow", "向量", s) s = re.sub(r"[Vv]e\*?nn", "韦恩", s) s = re.sub(r"\^['′]", "'", s) # tjt修改 s = s.replace("×", "*") s = s.replace("*", '') s = s.replace("%%", '') s = s.replace('\\n', '') s = s.replace('\\r', '') s = s.replace("^°", "°") s = re.sub(r"°\^([Cc])", r"°\1", s) s = re.sub(r"([0-9])\)°([^Cc])", r"\1°)\2", s) # tjt修改支持"= ."或"=___."处理 s = re.sub(r"[==][ _]*\.?$", "等于多少", s) return s if __name__ == "__main__": # print(structured( r'$z = \left( {{m^2} - 5m + 6} \right) + \left( {m - 3} \right)i$')) # li = ['

', '

', '

', '

'] # s= r"""

Have you ever wondered how your favorite NBA team received its famous name? All NBA teams have an interesting story or a history behind their names. Some of the names reflect the city's culture or history, others came from previous owners and many were selected through "Name the Team" contests.
For teams like Los Angeles and Utah, the names were not always a reflection of the city. Even though Los Angeles has no lakes, the Laker name has been a city treasure for almost 40 years. Before going to Los Angeles, the team originated in Minneapolis, Minnesota. In 1948, team officials chose the name for its direct relationship to the state's motto, "The Land of 10,000 Lakes." The team name went unchanged after moving to Los Angeles in 1960.
Because Utah's team originated in New Orleans, Louisiana, it was called the Jazz. In 1974, New Orleans club officials chose the name to represent the city for its reputation as the "jazz capital of the world." The name stayed with the team even after finding a new home in Salt Lake City, Utah in 1979.
The Chicago Bull's original owner, Richard Klein, named the team the Bulls. He picked the name because a fighting bull is relentless, and never quits. Klein, who founded the club in1966, believed these qualities were necessary for a championship team and hoped his Chicago athletes would live up to the team name. A belief that Bulls—winner of the six NBA championships— have definitely followed.
In 1967, the Indian Pacers selected their team name in a different way from most other teams. Their decision was based on what they wanted to accomplish in the NBA. Team officials chose the Pacers name because the organization wanted to set the "pace" in professional basketball.

""" # from requests_toolbelt import MultipartEncoder # import requests # print(requests.post('http://192.168.1.145:8086/math_data_clean',data = {'item':s}).json()) # # for s in li: s="""

如图,已知平分,求的度数.

解:______,

______

____________,

平分

____________.

""" s="""

为确保信息安全,信息需加密传输,发送方由明文密文(加密),接收方由密文明文(解密),已知加密规则为:明文对应密文.例如,明文对应密文.当接收文收到密文,解密得到的明文为(   )

""" s="""

""" s="""\n

【题文】如图所示,△ABC为正三角形,在A、B两点放有两点电荷,结果C点的合场强大小为E,方向垂直于CB边斜向上,CD是AB边的垂线,D是垂足,则下列说法正确的是(  )

\n

\n

A.A 为正电荷,B为负电荷,A点电荷的电荷量是B点电荷量的2倍

\n

B.若撤去A点的电荷,则C点的电场强度大小为$\\frac{\\sqrt{3}}{3}$E

\n

C.C点电势高于D点电势,若取无穷远处电势为零,则C点的电势为正

\n

D.将一正点电荷从D点沿直线移到C点,电势能一直减小

\n

【答案】ABD

\n

【解析】

\n

【详解】

\n

A.由于C点的场强是由A、B两点的点电荷产生的场强叠加而成,根据平行四边形定则和点电荷场强的特点可知,如图所示,可以判断EA背离A点,EB指向B点,所以A点为正电荷,B点为负电荷,A,B点电荷在C点产生的场强大小分别为

\n

EA=$\\frac{2\\sqrt{3}}{3}$E

\n

EB=$\\frac{\\sqrt{3}}{3}$E

\n

设三角形的边长为L,由点电荷的场强公式可知

\n

EA=k$\\frac{{q}_{A}}{{L}^{2}}$

\n

EB=k$\\frac{{q}_{B}}{{L}^{2}}$

\n

\n

qA=2qB

\n

A正确;

\n

\n

B.若撤去A点的电荷,则C点的电场强度大小为

\n

EB=$\\frac{\\sqrt{3}}{3}$E

\n

B正确;

\n

C.设qB=﹣q,qA=+2q,将A点的电荷看成两个+q电荷的叠加,则其中一半与B点的电荷是等量异种电荷,还剩一半是正点电荷,因为等量异种电荷中垂线是等势线,延伸到无穷远,由题意可知,中垂线上的电势就相当于A点剩下的+q在中垂线上的电势,为正,且离A点越近,电势越高,C错误;

\n

D.同理,正电荷在中垂线上从D点向C点运动时,相当于只有A点剩下的+q对其做正功,电势能一直减小,D正确。

\n

故选ABD。

\n

【题型】290104

\n

【难度】3

\n

【结束】

\n""" s="""

的对称轴为.

,即

,即

,即,(如图)


综上所述,的值域为,故选B.

""" print(get_maplef_items(s))