sci_clear.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. # encoding=utf-8
  2. import os
  3. import sys
  4. sys.path.append(os.getcwd())
  5. sys.path.append("/home/cv/workspace/tujintao/document_segmentation")
  6. import os
  7. import re
  8. import requests
  9. import unicodedata
  10. from bs4 import BeautifulSoup
  11. from Utils.main_clear.latex2maple.latex2maple import structured
  12. # tjt新增修改(针对页面渲染直接取出data-latex导致清洗出错进行特殊处理)
  13. # '$'特殊处理(方法一)
  14. def non_data_latex_replace(s):
  15. s = s.replace("\(","$")
  16. s = s.replace("\)","$")
  17. s_doll = s.replace("$", "$$")
  18. s_list = s_doll.split('$')
  19. s_list_len = len(s_list)
  20. if s_list_len == 1:
  21. return s
  22. i = 0
  23. # ['文本','','公式','','数字','','','公式','']->找空字符串去处理被''包围"公式"和"数字"
  24. while i < s_list_len:
  25. if s_list[i] == '':
  26. if i+2 < s_list_len and s_list[i+1] != '' and s_list[i+2] == '':
  27. s_list[i+1] = structured(s_list[i+1].replace('%20', ' ').replace('%3C', '<').replace('%3E', '>'))
  28. i += 2
  29. i += 1
  30. return ''.join(s_list)
  31. # '$'特殊处理(方法二)
  32. def non_data_latex_iter(s):
  33. s = s.replace("\(","$")
  34. s = s.replace("\)","$")
  35. s_doll = s.replace("$", "$$")
  36. s_list = s_doll.split('$')
  37. s_list_len = len(s_list)
  38. if s_list_len == 1:
  39. return s
  40. # ['文本','','公式','','数字','','','公式','']->找被空字符''包围的"公式"和"数字"进行处理
  41. # 要先判断索引是否满足长度要求
  42. s_list = [structured(ele.replace('%20', ' ').replace('%3C', '<').replace('%3E', '>'))
  43. if (i+1)<s_list_len and i>0 and ele!='' and s_list[i-1]=='' and s_list[i+1]=='' else ele
  44. for i, ele in enumerate(s_list)]
  45. return ''.join(s_list)
  46. # '$'特殊处理(方法三)
  47. def non_data_latex_regexp(s):
  48. s = s.replace("\(","$")
  49. s = s.replace("\)","$")
  50. re_list = re.findall(r"\$.*?\$", s)
  51. if len(re_list) > 0:
  52. latex_list = [ele for ele in re_list if ele.strip() != '']
  53. if len(latex_list) == len(re_list):
  54. latex_list = [structured(ele.replace('%20', ' ').replace('%3C', '<').replace('%3E', '>'))
  55. for ele in latex_list]
  56. for i in range(len(latex_list)):
  57. s = s.replace(re_list[i], latex_list[i], 1)
  58. return s
  59. # 转义字符特殊处理
  60. def escape_func(s):
  61. # 转义字符转换
  62. # s = s.replace("\a", "\\a")
  63. # s = s.replace("\b", "\\b")
  64. # s = s.replace("\f", "\\f")
  65. # s = s.replace("\n", "\\n")
  66. # s = s.replace("\r", "\\r")
  67. # s = s.replace("\t", "\\t")
  68. # s = s.replace("\v", "\\v")
  69. # s = s.replace('<\/', '</')
  70. # s = s.replace('\\\\', '\\')
  71. # 部分指令特殊处理
  72. s = s.replace(r'*-*', '')
  73. # s = s.replace(r'\[', '')
  74. # s = s.replace(r'\]', '')
  75. # s = s.replace(r"\lt", "<")
  76. # s = s.replace(r"\gt", ">")
  77. # s = s.replace(r'\theta', 'θ')
  78. s = s.replace(r'\a*rg', 'arg')
  79. # s = s.replace(r'\leftrightarrow', '↔')
  80. # s = s.replace(r'\Leftrightarrow', '⇔')
  81. # s = s.replace(r'\rightleftharpoons', '⇌')
  82. # s = s.replace(r'\leftharpoonup', '↼')
  83. # s = s.replace(r'\rightharpoonup', '⇀')
  84. # s = s.replace(r'\leftharpoondown', '↽')
  85. # s = s.replace(r'\rightharpoonupdown', '⇁')
  86. # s = s.replace(r'\leftarrow', '←')
  87. # s = s.replace(r'\Leftarrow', '⇐')
  88. # s = s.replace(r'\rightarrow', '→') # 前面要加上r,否则结果显示替换不成功
  89. # s = s.replace(r'\Rightarrow', '⇒')
  90. # s = s.replace(r'\right', '') # 前面要加上r,否则结果显示替换不成功
  91. # s = s.replace(r'\left', '')
  92. # 向量特殊处理
  93. # s = s.replace("overleftarrow", "overrightarrow")
  94. # # 分号(/)特殊处理
  95. # s = s.replace("dfrac", "frac")
  96. # <latex>标签特殊处理
  97. s = s.replace('<latex>', '$').replace('</latex>', '$')
  98. return s
  99. def get_maplef_items(html):
  100. """
  101. # data-latex="xxx", 不要data-latex=\"xxx\"和data-latex=\'xxx\',否则BeautifulSoup会掉内容
  102. """
  103. html = re.sub('(data-latex=".*?")', lambda x: x.group(1).replace("\n", ""), html, flags=re.S)
  104. html = escape_func(html)
  105. soup = BeautifulSoup(html, features="lxml")
  106. s = ''
  107. # print(soup.prettify())
  108. for i in soup.prettify().split('\n'):
  109. if i.strip().startswith('<img'):
  110. s2 = BeautifulSoup(i, features="lxml")
  111. if s2.img:
  112. s3 = s2.img.get('data-latex')
  113. if s3:
  114. # s += structured(s3.replace('%20', ' ').replace('%3C', '<').replace('%3E', '>'))
  115. s3 = re.sub(r"^\\\[(.*?)\\\]$", r"$\1$", s3)
  116. s += s3.replace('%20', ' ').replace('%3C', '<').replace('%3E', '>').strip()
  117. else:
  118. s3 = s2.img.get('src')
  119. if not s3:
  120. continue
  121. elif len(s3.split('?')) == 2:
  122. s3 = s3.split('?')[-1]
  123. else:
  124. if 'class="tiankong"' in i:
  125. s += '____'
  126. s3 = ''
  127. else:
  128. s3 = 'img'
  129. s += structured(s3.replace('%20', ' ').replace('%3C', '<').replace('%3E', '>'))
  130. elif i.strip().startswith('<'):
  131. if re.match("<br\s*/?>|</p>", i.strip()):
  132. s += "\\n"
  133. pass
  134. else:
  135. s += i.strip()
  136. # print(s)
  137. # print("****************************")
  138. # tjt新增修改(针对页面渲染直接取出data-latex导致清洗出错进行特殊处理)
  139. # try:
  140. # s = non_data_latex_iter(s) # 拿到字符串中的latex再转maple
  141. # except Exception as e:
  142. # try:
  143. # s = non_data_latex_regexp(s) # 拿到字符串中的latex再转maple
  144. # except Exception as e:
  145. # pass
  146. # 结果显示替换不成功
  147. s = re.sub(r'\s+', " ", s)
  148. s = re.sub(r'\n+', "\n", s)
  149. s = s.replace('#', '').replace(" ", " ")
  150. # s = re.sub(r"(begin|end){?(gathered|array)", "", s)
  151. s = s.replace("&lt;", "<")
  152. s = s.replace("&gt;", ">")
  153. s = s.replace('%20', '')
  154. s = s.replace('%3E', '>')
  155. s = s.replace('%3C', '<')
  156. # s = s.replace('img', '')
  157. # s = s.replace('$', '')
  158. # s = s.replace('mathbf', '')
  159. # s = s.replace('operatornamem', '')
  160. # s = s.replace('beginarra*y', '')
  161. # s = s.replace('endarra*y', '')
  162. # s = s.replace('slant', '')
  163. # s = s.replace('endarra*y', '')
  164. # s = s.replace('hfill', '')
  165. # 结果显示替换不成功
  166. # s = s.replace('\n', '')
  167. # s = s.replace('\r', '')
  168. # s = s.replace(r'①', '(1)、')
  169. # s = s.replace(r'②', '(2)、')
  170. # s = s.replace(r'③', '(3)、')
  171. # s = s.replace(r'④', '(4)、')
  172. # s = s.replace(r'⑤', '(5)、')
  173. # s = s.replace(r'⑥', '(6)、')
  174. # s = s.replace(r'⑦', '(7)、')
  175. # s = s.replace(r'⑧', '(8)、')
  176. # s = s.replace(r'⑨', '(9)、')
  177. # s = s.replace(r'⑩', '(10)、')
  178. # s = re.sub(r'/images/1-50/[1-9].gif', '( )', s)
  179. # s = re.sub(r'/([0-9a-z/*.]*?)(png|jpg|gif)', 'img', s)
  180. # s = unicodedata.normalize('NFKC', s) # 中文符号转换成英文
  181. # s = s.replace(",", ",").replace(":", ":").replace(";", ";").replace("“", "'").replace("”", "'"). \
  182. # replace("?", "?").replace("!", "!").replace("。", ",").replace("(", "(").replace(")", ")"). \
  183. # replace(".", ".").replace("【", "[").replace("】", "]")
  184. # tjt修改支持"= ."或"=___."处理
  185. # s = re.sub(r"[==][ _]*\.?$", "等于多少", s)
  186. # tjt注释
  187. # s = re.sub(r"[==]\s*(\(\s*\)|_+|(\s*))\.?", "等于多少", s)
  188. # s = re.sub(r"[==]\.?$", "等于多少", s)
  189. # s = re.sub(r"\(\s*[??]*\s*\)|(\s*[??]*\s*)\.?$", "", s)
  190. # s = re.sub(r"(为|是|等于|=|=)img\.?$", "等于多少", s)
  191. # s = re.sub(r"img(为|是|等于|=|=)\.?$", "等于多少", s)
  192. # s = re.sub(r"[fFGg]\^\(-1\)\*\(.*?\)", "反函数_y", s)
  193. # s = s.replace("图像", "图象")
  194. # s = s.replace("椭圆", "椭椭")
  195. # s = s.replace("⇒", "所以")
  196. # s = re.sub(r'img$', '', str(s))
  197. # s = re.sub(r"[==]\s*(\(\s*\)|_{1,5}|(\s*))", "等于多少", s)
  198. # s = re.sub(r"\(\s*[??]*\s*\)|(\s*[??]*\s*)$", "", s)
  199. # s = re.sub(r"(为|是|等于)img$", "等于多少", s)
  200. # # s = re.sub(r"([A-Za-z0-9])'", r"\1", s)
  201. # s = re.sub(r"over(right|left)arrow", "向量", s)
  202. # s = re.sub(r"[Vv]e\*?nn", "韦恩", s)
  203. # s = re.sub(r"\^['′]", "'", s)
  204. # tjt修改
  205. # s = s.replace("×", "*")
  206. # s = s.replace("%%", '')
  207. # s = s.replace('\\n', '')
  208. # s = s.replace('\\r', '')
  209. # s = s.replace("^°", "°")
  210. # s = re.sub(r"°\^([Cc])", r"°\1", s)
  211. # s = re.sub(r"([0-9])\)°([^Cc])", r"\1°)\2", s)
  212. # # tjt修改支持"= ."或"=___."处理
  213. # s = re.sub(r"[==][ _]*\.?$", "等于多少", s)
  214. return s.strip()
  215. if __name__ == "__main__":
  216. # print(structured( r'$z = \left( {{m^2} - 5m + 6} \right) + \left( {m - 3} \right)i$'))
  217. # li = ['<p><img src="/data/word/wordimg/2019/05/5ce6485c56f42.png" style="width: 36pt; height: 21.6pt" data-type="math" data-latex="\\[y = {e^x}\\]" width="48" height="29"/></p>', '<p><img src="/data/word/wordimg/2019/05/5ce6485c58025.png" style="width: 64.8pt; height: 28.8pt" data-type="math" data-latex="\\[y = - {\\log _{\\frac{1}{\\pi }}}x\\]" width="86" height="38"/></p>', '<p><img src="/data/word/wordimg/2019/05/5ce6485c5910a.png" style="width: 43.2pt; height: 21.6pt" data-type="math" data-latex="\\[y = \\sqrt x \\]" width="58" height="29"/></p>', '<p><img src="/data/word/wordimg/2019/05/5ce6485c5a2b1.png" style="width: 57.6pt; height: 28.8pt" data-type="math" data-latex="\\[y = {\\log _{\\frac{1}{2}}}x\\]" width="77" height="38"/></p>']
  218. #
  219. s= r"""<article><p>Have you ever wondered how your favorite NBA team received its famous name? All NBA teams have an interesting story or a history behind their names. Some of the names reflect the city's culture or history, others came from previous owners and many were selected through "Name the Team" contests.<br/>For teams like Los Angeles and Utah, the names were not always a reflection of the city. Even though Los Angeles has no lakes, the Laker name has been a city treasure for almost 40 years. Before going to Los Angeles, the team originated in Minneapolis, Minnesota. In 1948, team officials chose the name for its direct relationship to the state's motto, "The Land of 10,000 Lakes." The team name went unchanged after moving to Los Angeles in 1960.<br/>Because Utah's team originated in New Orleans, Louisiana, it was called the Jazz. In 1974, New Orleans club officials chose the name to represent the city for its reputation as the "jazz capital of the world." The name stayed with the team even after finding a new home in Salt Lake City, Utah in 1979.<br/>The Chicago Bull's original owner, Richard Klein, named the team the Bulls. He picked the name because a fighting bull is relentless, and never quits. Klein, who founded the club in1966, believed these qualities were necessary for a championship team and hoped his Chicago athletes would live up to the team name. A belief that Bulls—winner of the six NBA championships— have definitely followed.<br/>In 1967, the Indian Pacers selected their team name in a different way from most other teams. Their decision was based on what they wanted to accomplish in the NBA. Team officials chose the Pacers name because the organization wanted to set the "pace" in professional basketball.<br/></p></article>"""
  220. # from requests_toolbelt import MultipartEncoder
  221. # import requests
  222. # print(requests.post('http://192.168.1.145:8086/math_data_clean',data = {'item':s}).json())
  223. #
  224. # for s in li:
  225. s="""<p>如图,已知<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/c0b6d5bb7d966f18ef9353138f6352cc.png" style="width: 65.25pt; height: 14.25pt" data-type="math" data-latex="$\angle AOB = 40^\circ $" width="87" height="19" />,<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/5dd55320d1a52d7d34b2fd5e94433c8c.png" style="width: 87.75pt; height: 14.25pt" data-type="math" data-latex="$\angle BOC = 3\angle AOB$" width="117" height="19" />,<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/c1664fb157b79a64d991d7c77af335d7.png" style="width: 21pt; height: 14.25pt" data-type="math" data-latex="$OD$" width="28" height="19" />平分<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/480718502d54647275c7fe8ac58bac75.png" style="width: 36.75pt; height: 14.25pt" data-type="math" data-latex="$\angle AOC$" width="49" height="19" />,求<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/d2d28e3733589e90a99a8391c54171ff.png" style="width: 38.25pt; height: 14.25pt" data-type="math" data-latex="$\angle COD$" width="51" height="19" />的度数.</p><p>解:<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/e18901783e1f03c948bd1a1f617912cf.png" style="width: 72.75pt; height: 14.25pt" data-type="math" data-latex="$\because \angle BOC = 3\angle $" width="97" height="19" />______,<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/47a2d20361dc34d3d372d485f4f8a9f7.png" style="width: 65.25pt; height: 14.25pt" data-type="math" data-latex="$\angle AOB = 40^\circ $" width="87" height="19" />,</p><p><img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/92a7bd71a095ca92577acd42e4cab402.png" style="width: 56.25pt; height: 14.25pt" data-type="math" data-latex="$\therefore \angle BOC = $" width="75" height="19" />______<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/414575fac3ad1e5fb552f948f76c54ac.png" style="width: 8.25pt; height: 12.75pt" data-type="math" data-latex="$^\circ $" width="11" height="17" />,</p><p><img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/c35b0bafb0d67ef84b9d8cc60afdc724.png" style="width: 56.25pt; height: 14.25pt" data-type="math" data-latex="$\therefore \angle AOC = $" width="75" height="19" />______<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/eef861150281f1cf1a5ce7cd013a7995.png" style="width: 11.25pt; height: 11.25pt" data-type="math" data-latex="$ + $" width="15" height="15" />______,</p><p><img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/de740a6139e9045a39fe4820ca34fa2d.png" style="width: 80.25pt; height: 14.25pt" data-type="math" data-latex="$\therefore \angle AOC = 160^\circ $" width="107" height="19" /></p><p><img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/2af5afb7e01160519ad998abe0993af1.png" style="width: 30.75pt; height: 14.25pt" data-type="math" data-latex="$\because OD$" width="41" height="19" />平分<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/b81f079120974bc8ef9736e4c574353c.png" style="width: 36.75pt; height: 14.25pt" data-type="math" data-latex="$\angle AOC$" width="49" height="19" /></p><p><img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/c3551758644fd7c37d9e127b0936448f.png" style="width: 57pt; height: 14.25pt" data-type="math" data-latex="$\therefore \angle COD = $" width="76" height="19" />______<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/ca6337eef25ff570fb1de161ef162948.png" style="width: 9.75pt; height: 9pt" data-type="math" data-latex="$ = $" width="13" height="12" />______<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/7c4334acc724beee1f283a162be5e1d6.png" style="width: 8.25pt; height: 12.75pt" data-type="math" data-latex="$^\circ $" width="11" height="17" />.</p><p><img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/teacher/uploadfiles/wording/0/2022/07/28/edf70a69a16fe0a1e9e69467f4a90c7e.png" style="width: 2.270833in; height: 1.246731in" width="218" height="120" /></p>"""
  226. s="""<p><span style="font-variant-ligatures: normal; orphans: 2; widows: 2;">为确保信息安全,信息需加密传输,发送方由明文</span><img width="15" height="9" src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zsytk/topic/image/2022/08/05/1659694400394379.gif?%20\to" class="gsImgLatex mathType" style="font-variant-ligatures: normal; orphans: 2; white-space: normal; widows: 2; vertical-align: middle;"/><span style="font-variant-ligatures: normal; orphans: 2; widows: 2;">密文(加密),接收方由密文</span><img width="15" height="9" src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zsytk/topic/image/2022/08/05/1659694400394379.gif?%20\to" class="gsImgLatex mathType" style="font-variant-ligatures: normal; orphans: 2; white-space: normal; widows: 2; vertical-align: middle;"/><span style="font-variant-ligatures: normal; orphans: 2; widows: 2;">明文(解密),已知加密规则为:明文</span><img width="54" height="15" src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zsytk/topic/image/2022/08/05/1659694401424587.gif?a,b,c,d" class="gsImgLatex mathType" style="font-variant-ligatures: normal; orphans: 2; white-space: normal; widows: 2; vertical-align: middle;"/><span style="font-variant-ligatures: normal; orphans: 2; widows: 2;">对应密文</span><img width="179" height="15" src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zsytk/topic/image/2022/08/05/1659694401118594.gif?a%20+%202b,2b%20+%20c,2c%20+%203d,4d" class="gsImgLatex mathType" style="font-variant-ligatures: normal; orphans: 2; white-space: normal; widows: 2; vertical-align: middle;"/><span style="font-variant-ligatures: normal; orphans: 2; widows: 2;">.例如,明文</span><img width="54" height="15" src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zsytk/topic/image/2022/08/05/1659694401275521.gif?1,2,3,4" class="gsImgLatex mathType" style="font-variant-ligatures: normal; orphans: 2; white-space: normal; widows: 2; vertical-align: middle;"/><span style="font-variant-ligatures: normal; orphans: 2; widows: 2;">对应密文</span><img width="71" height="15" src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zsytk/topic/image/2022/08/05/1659694401650019.gif?5,7,18,16" class="gsImgLatex mathType" style="font-variant-ligatures: normal; orphans: 2; white-space: normal; widows: 2; vertical-align: middle;"/><span style="font-variant-ligatures: normal; orphans: 2; widows: 2;">.当接收文收到密文</span><img width="79" height="15" src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zsytk/topic/image/2022/08/05/1659694401264806.gif?14,9,23,28" class="gsImgLatex mathType" style="font-variant-ligatures: normal; orphans: 2; white-space: normal; widows: 2; vertical-align: middle;"/><span style="font-variant-ligatures: normal; orphans: 2; widows: 2;">,解密得到的明文为( &nbsp; )</span></p>"""
  227. s="""<p><img src=\"http://tkimgs.zhixinhuixue.net/image/word/2021/05/05/1620215474707966.png\" data-latex=\"${G\\dfrac{{m}_{1}{m}_{2}}{{r}^{2}}}$\" width=\"60\" height=\"29\"/></p>"""
  228. s="""图1是放置在水平地面上的落地式话筒架实物图,图2是其示意图.支撑杆AB垂直于地面l,活动杆CD固定在支撑杆上的点E处.若∠AED=48°,BE=110*cm,DE=80*cm,求活动杆端点D离地面的高度DF.(结果精确到1*cm,参考数据:sin(48)°≈0.74,cos(48)°≈0.67,tan(48)°≈1.11)"""
  229. s="""<p>
  230. 如图,在直三棱柱<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254549901532.png" data-type="math" data-latex="\[ABC - {A_1}{B_1}{C_1}\]" width="92" height="24"/>中,<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254549188750.png" style="width: 36pt; height: 14.4pt" data-type="math" data-latex="\[\Delta ABC\]" width="48" height="19"/>是边长为<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254549243743.png" style="width: 7.2pt; height: 14.4pt" data-type="math" data-latex="\[6\]" width="10" height="19"/>的等边三角形,<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254549171353.png" style="width: 28.8pt; height: 14.4pt" data-type="math" data-latex="\[D,E\]" width="38" height="19"/>分别为<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550721453.png" style="width: 43.2pt; height: 14.4pt" data-type="math" data-latex="\[A{A_1},BC\]" width="58" height="19"/>的中点.</p><p> <img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550474830.png" style="width: 1.877778in; height: 2.767361in" width="180" height="266"/></p><p> (1)证明:<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550485317.png" style="width: 28.8pt; height: 14.4pt" data-type="math" data-latex="\[AE{\rm{//}}\]" width="38" height="19"/>平面<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550391913.png" style="width: 28.8pt; height: 14.4pt" data-type="math" data-latex="\[BD{C_1}\]" width="38" height="19"/>;</p><p> (2)若异面直线<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550855502.png" style="width: 21.6pt; height: 14.4pt" data-type="math" data-latex="\[B{C_1}\]" width="29" height="19"/>与<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550178780.png" style="width: 21.6pt; height: 14.4pt" data-type="math" data-latex="$AC$" width="29" height="19"/>所成的余弦值为<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550672200.png" style="width: 21.6pt; height: 36pt" data-type="math" data-latex="$\frac{{\sqrt 3 }}{4}$" width="29" height="48"/>,求<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550663250.png" style="width: 21.6pt; height: 14.4pt" data-type="math" data-latex="\[DE\]" width="29" height="19"/>与平面<img src="http://zxhx-pro-1302712961.cos.ap-beijing.myqcloud.com/zsytk/topic/image/2022/08/10/1660254550449745.png" style="width: 28.8pt; height: 14.4pt" data-type="math" data-latex="\[BD{C_1}\]" width="38" height="19"/>所成角的正弦值.</p>"""
  231. s = '大上传流程测试型<img src=\"http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zyk/uploadfiles/wording/62b41710765759d85567a54b/MJMATH-1684231348148437.png\" width=\"75\" height=\"24\" data-type=\"math\" data-latex="$\\frac{1}{n}\\sum _{i=1}^{n}\\"{x}_{i}$" />解答题_{}99 999<br/>(1)问题一<br/>(2)问题二'
  232. s = r' 1 . 1cm<sup >3</sup>纯水的质量是1g,1 cm<sup >3</sup>水中有3.34×10<sup >22</sup>个水分子,试计算: <br/>(1)1个水分子的质量约为多少千克?<span style="font-family:\'Times New Roman\'" qml-space-size="4">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</span><br/>(2)若水分子的直径约为40nm,将1cm<sup >3</sup>水中的水分子紧密排列成一条直线,共有多长?'
  233. print(non_data_latex_iter(s))
  234. # import json
  235. # f = json.loads(open(r"C:\Users\Administrator\Desktop\62b41710765759d85567a54b__2023_05_16_18_02_29.json",
  236. # 'r',encoding="utf-8").read())
  237. # for i in f["items"]:
  238. # print(get_maplef_items(i["stem"]))