field_eq2latex.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. """
  4. 域公式转latex
  5. 域公式意义:\al 列内左对齐;\ac 列内居中对齐;\ar 列内右对齐
  6. \r(,):根号;
  7. \s(上标, 下标):设置上下标;
  8. """
  9. import re, os
  10. import configs
  11. from pprint import pprint
  12. from func_timeout import func_set_timeout
  13. SUB = {"A":"Ⓐ",
  14. "V":"Ⓥ",
  15. "W":"Ⓦ",
  16. "X":"Ⓧ",
  17. "G":"Ⓖ",
  18. }
  19. def latex_wash(ltx, is_danti=0):
  20. """
  21. latex格式调整, 为了渲染效果
  22. :param ltx:
  23. :return:
  24. """
  25. # ltx = re.sub(r"(?<!\\)%", "\%", ltx)
  26. # word中phi和varphi的渲染与mathjax相反,需换一下
  27. if not is_danti:
  28. ltx = re.sub(r"\\phi(?!up)", "@#@\\varphi", ltx)
  29. ltx = re.sub(r"(?<!@#@)\\varphi(?!up)", "\\phi", ltx)
  30. ltx = ltx.replace("@#@\\varphi", "\\varphi")
  31. # 处理\left和\right单独出现的情况
  32. ltx = re.sub(r"\$\\left\s*[((]\$", "(", ltx)
  33. ltx = re.sub(r"\$\\right\s*[))]\$", ")", ltx)
  34. def sub1(ss):
  35. if re.search(r"\\left.*?\\right|\\right.*?\\left", ss.group(1)) is None:
  36. # res = re.sub(r"\\left(?!right|arrow)|\\right(?!left|arrow)", "", ss.group(1))
  37. res = re.sub(r"\\left(?![a-z])|\\right(?![a-z])", "", ss.group(1))
  38. return res
  39. return ss.group(1)
  40. ltx = re.sub(r"(\$.*?(\\left(?![a-z])|\\right(?![a-z])).*?\$)", sub1, ltx)
  41. # -----------------------------------------------------------
  42. # \text{}中的 "<" 不换为"\lt"
  43. def get_posi(item, start): # 获取}的正确位置
  44. sign_stack = ["{"]
  45. for n, s in enumerate(item[start:]):
  46. if s == "{":
  47. sign_stack.append("{")
  48. elif s == "}":
  49. sign_stack.pop()
  50. if not sign_stack:
  51. return start + n
  52. return None
  53. res_p = ""
  54. tmp_item = ltx
  55. # 前端也会将小于号<替换成&lt;
  56. while "\\text" in tmp_item and (" \lt " in tmp_item or "\%" in tmp_item):
  57. st = re.search(r"\\text\s*{", tmp_item).end()
  58. ed = get_posi(tmp_item, st)
  59. if ed:
  60. res_p += tmp_item[:st] + tmp_item[st: ed + 1].replace(" \lt ", " &lt; ")\
  61. .replace("\%", "%")
  62. tmp_item = tmp_item[ed + 1:]
  63. else:
  64. break
  65. if res_p:
  66. ltx = res_p
  67. if tmp_item:
  68. ltx += tmp_item
  69. return ltx
  70. # def get_latex(item):
  71. # if r"$eq \\f(" in item:
  72. # item = re.sub(r"\$eq \\\\f\((.+?),(.+?)\)", r"$\\frac{\1}{\2}", item)
  73. #
  74. # if r"$eq \\r(" in item:
  75. # item = re.sub(r"\$eq \\\\r\((.+?)\)", r"$\sqrt{\1}", item)
  76. #
  77. # if "$eq \\\\o\\\\" in item:
  78. # while re.search(r"\$eq \\\\o\\\\al\((.+?),(.*?)\)", item):
  79. # ss = re.search(r"\$eq \\\\o\\\\al\((.+?),(.*?)\)", item)
  80. # # 将非变量的{}修改成{{}}
  81. # s1 = "$_{{{sub}}}^{{{sup}}}".format(sub=ss.group(2), sup=ss.group(1))
  82. # s1 = re.sub("</?su[bp]>|\s", "", s1)
  83. # if not ss.group(2):
  84. # eq_info = re.match(r"\$\s*<sub>(.+?)</sub>", item[ss.end():])
  85. # if eq_info:
  86. # s1 = "$_{{{sub}}}^{{{sup}}}$".format(sub=eq_info.group(1), sup=ss.group(1))
  87. # s1 = re.sub("</?su[bp]>|\s", "", s1)
  88. # item = item[:ss.start()] + s1 + item[ss.end()+eq_info.end():]
  89. # # return re.sub("</?su[bp]>|\s", "", s1)
  90. # else:
  91. # item = item[:ss.start()] + s1 + item[ss.end():]
  92. # else:
  93. # item = item[:ss.start()] + s1 + item[ss.end():]
  94. #
  95. # # s1 = "$_{{{sub}}}^{{{sup}}}".format(sub=ss.group(2), sup=ss.group(1))
  96. # # return re.sub("</?su[bp]>|\s", "", s1)
  97. # # item = re.sub(r"\$eq \\\\o\\\\al\((.+?),(.*?)\)", sub1, item)
  98. #
  99. # ac_info = re.search(r"\$eq \\\\o\\\\ac\(○,\s*([A-Z])\)", item)
  100. # if ac_info:
  101. # if ac_info.group(1) in SUB.keys():
  102. # item = item.replace(ac_info.group(0), SUB.get(ac_info.group(1)))
  103. #
  104. # return item
  105. # @func_set_timeout(3)
  106. def get_latex0(item):
  107. while "$eq \\\\f(" in item or "$eq \\\\r(" in item or re.search("【域公式】.*?\\\\o\\\\", item):
  108. if "$eq \\\\f(" in item:
  109. # item = re.sub(r"\$eq \\\\f\((((?!\\\\[fr]).)+?),(.+?)\)", r"$\\frac{\1}{\2}", item)
  110. item = re.sub(r"(【域公式】.*?)\\\\f\((((?!\\\\[fr]\().)+?),(((?!\\\\[fr]\().)+?)\)",
  111. r"\1\\frac{\2}{\4}", item)
  112. if "$eq \\\\r(" in item:
  113. item = re.sub(r"(【域公式】.*?)\\\\r\((((?!\\\\[fr]\().)+?)\)", r"\1\sqrt{\2}", item)
  114. if re.search("【域公式】.*?\\\\o\\\\", item): # if "$eq \\\\o\\\\" in item:
  115. while re.search(r"【域公式】.*?\\\\o\\\\al\((.+?),(.*?)\)", item):
  116. ss = re.search(r"(【域公式】.*?)\\\\o\\\\al\((.+?),(.*?)\)", item)
  117. # 将非变量的{}修改成{{}}
  118. s1 = "_{{{sub}}}^{{{sup}}}".format(sub=ss.group(3), sup=ss.group(2))
  119. s1 = re.sub("</?su[bp]>|\s", "", s1)
  120. if not ss.group(3):
  121. eq_info = re.match(r"\$\s*<sub>(.+?)</sub>", item[ss.end():])
  122. if eq_info:
  123. s1 = "_{{{sub}}}^{{{sup}}}$".format(sub=eq_info.group(1), sup=ss.group(2))
  124. s1 = re.sub("</?su[bp]>|\s", "", s1)
  125. item = item[:ss.start()] + ss.group(1) + s1 + item[ss.end()+eq_info.end():]
  126. # return re.sub("</?su[bp]>|\s", "", s1)
  127. else:
  128. item = item[:ss.start()] + ss.group(1) + s1 + item[ss.end():]
  129. else:
  130. item = item[:ss.start()] + ss.group(1) + s1 + item[ss.end():]
  131. # s1 = "$_{{{sub}}}^{{{sup}}}".format(sub=ss.group(2), sup=ss.group(1))
  132. # return re.sub("</?su[bp]>|\s", "", s1)
  133. # item = re.sub(r"\$eq \\\\o\\\\al\((.+?),(.*?)\)", sub1, item)
  134. ac_info = re.search(r"\$eq \\\\o\\\\ac\(○,\s*([A-Z])\)", item)
  135. if ac_info:
  136. if ac_info.group(1) in SUB.keys():
  137. item = item.replace(ac_info.group(0), SUB.get(ac_info.group(1)))
  138. return item.replace("【域公式】$eq ", "$")
  139. # @func_set_timeout(5)
  140. def zifu_match_combine(split_eq):
  141. """
  142. 递归函数,将成对括号进行组合,目前先按成对的括号进行转化
  143. :param split_eq:
  144. :return:
  145. """
  146. if len(split_eq) < 4 or ")" not in split_eq or "(" not in split_eq:
  147. return split_eq
  148. for k, i in enumerate(split_eq):
  149. if i == ")":
  150. for subk, j in enumerate(split_eq[:k][::-1]):
  151. if j == "(":
  152. # print(split_eq[k - subk - 1 - 1])
  153. bef_left_kuohao = split_eq[k - subk - 1 - 1]
  154. if bef_left_kuohao == "\\f":
  155. # dou_index = split_eq[k-subk-1-1:k+1].index(',')+k-subk-2
  156. # bb = split_eq[k - subk - 1 - 1:k + 1]
  157. info1 = re.search(r"\\f\((.*?),(.*?)\)$", "".join(split_eq[k - subk - 1 - 1:k + 1]))
  158. if info1:
  159. new_s = "\\frac{{{one}}}{{{two}}}".format(one=info1.group(1), two=info1.group(2))
  160. new_split_eq = split_eq[:k - subk - 1 - 1]
  161. if new_split_eq and new_split_eq[-1] == "(":
  162. new_s = "{" + new_s + "}"
  163. new_split_eq.append(new_s)
  164. new_split_eq.extend(split_eq[k + 1:])
  165. return zifu_match_combine(new_split_eq)
  166. elif bef_left_kuohao == "\\r":
  167. print(':::', "".join(split_eq[k - subk - 1 - 1:k + 1]))
  168. info1 = re.search(r"\\r\((.*?)\)$", "".join(split_eq[k - subk - 1 - 1:k + 1]))
  169. if info1:
  170. if re.search("<sup>\d</sup>\s*,", info1.group(1)):
  171. lft, right = re.split("(?<=</sup>)\s*,", info1.group(1))
  172. new_s = "\sqrt[{}]{{{}}}".format(re.search("<sup>(\d)</sup>", lft).group(1), right)
  173. else:
  174. new_s = "\sqrt{{{}}}".format(re.sub("^\s*,", "", info1.group(1)))
  175. new_split_eq = split_eq[:k - subk - 1 - 1]
  176. if new_split_eq and new_split_eq[-1] == "(":
  177. new_s = "{" + new_s + "}"
  178. new_split_eq.append(new_s)
  179. new_split_eq.extend(split_eq[k + 1:])
  180. return zifu_match_combine(new_split_eq)
  181. elif bef_left_kuohao in ['\\o\\al', '\\s']:
  182. info1 = re.search(r"(\\o\\al|\\s)\((.*?),(.*?)\)$", "".join(split_eq[k - subk - 1 - 1: k + 1]))
  183. if info1:
  184. new_s = "_{{{sub}}}^{{{sup}}}".format(sub=info1.group(3), sup=info1.group(2))
  185. if info1.group(1) == "\\o\\al" and (not info1.group(3) or not info1.group(2)):
  186. temp_s = info1.group(2) + info1.group(3)
  187. temp_s_info1 = re.search("<sub>(.*?)</sub>",temp_s)
  188. temp_s_info2 = re.search("<sup>(.*?)</sup>",temp_s)
  189. if temp_s_info1 and temp_s_info2:
  190. new_s = "_{{{}}}^{{{}}}".format(temp_s_info1.group(1), temp_s_info2.group(1))
  191. new_s = re.sub("</?su[bp]>|\s", "", new_s)
  192. new_split_eq = split_eq[:k - subk - 1 - 1]
  193. new_split_eq.append(new_s)
  194. new_split_eq.extend(split_eq[k + 1:])
  195. return zifu_match_combine(new_split_eq)
  196. elif bef_left_kuohao == '\\x\\to':
  197. info1 = re.search(r"\\x\\to\((.*?)\)$", "".join(split_eq[k - subk - 1 - 1:k + 1]))
  198. if info1:
  199. new_s = "\\bar{{{}}}".format(info1.group(1))
  200. new_split_eq = split_eq[:k - subk - 1 - 1]
  201. if new_split_eq and new_split_eq[-1] == "(":
  202. new_s = "{" + new_s + "}"
  203. new_split_eq.append(new_s)
  204. new_split_eq.extend(split_eq[k + 1:])
  205. return zifu_match_combine(new_split_eq)
  206. else:
  207. new_s = "".join(split_eq[k - subk - 1 - 1: k + 1])
  208. new_split_eq = split_eq[:k - subk - 1 - 1]
  209. new_split_eq.append(new_s)
  210. new_split_eq.extend(split_eq[k + 1:])
  211. return zifu_match_combine(new_split_eq)
  212. # @func_set_timeout(36)
  213. def get_latex(item, is_reparse=0, wordid="123456", must_latex=0):
  214. """
  215. 第一通道:
  216. 将文本中的域代码字符串能转化latex的先转化,不能转化的就暂时用域代码格式
  217. 第二通道:
  218. 再解析时,遇到域代码,将域代码转图片处理
  219. 考虑先转化:根式、分式、上下标、to、\s
  220. :param item:
  221. :return:
  222. """
  223. is_first = 1
  224. item = item.replace("\\uf028", "(").replace("\\uf029", ")") # 2020-6-21
  225. new_item = item
  226. # semi_succ_dict = {}
  227. while re.findall("(【域公式:[^【]*?】)", item):
  228. all_eqs1 = re.findall("(【域公式:[^【]*?】)", item) # 遇到嵌套的域公式,无法获取完整,故加【
  229. all_eqs = list(set(all_eqs1))
  230. all_eqs.sort(key=all_eqs1.index)
  231. print(all_eqs)
  232. new_eqs = []
  233. fail_n = 0
  234. for eq in all_eqs:
  235. raw_eq = eq.replace("\\\\", "\\").replace(" \R", " \\r")
  236. eq = raw_eq.replace("eq ", "").replace("【域公式:", "").replace("】", "")
  237. split_eq = re.split(r"(\\f|\(|\)|\\r|\\o\\al|\\x\\to|\\s|,)", eq)
  238. split_eq = [i for i in split_eq if i]
  239. res_eq = zifu_match_combine(split_eq)
  240. # print(res_eq, split_eq)
  241. try:
  242. if "".join(res_eq) == "".join(split_eq): # 转失败
  243. fail_n += 1
  244. new_eqs.append(raw_eq)
  245. elif re.search(r"\\[a-zA-Z\d]{1,5}\(", "".join(res_eq)): # 没有完全转成功
  246. fail_n += 1
  247. new_eqs.append(raw_eq)
  248. # semi_succ_dict[raw_eq] = "【域公式:eq {}】".format("".join(res_eq))
  249. # new_eqs.append("【域公式:eq {}】".format("".join(res_eq)))
  250. else:
  251. # mathjax不能渲染sub和sup
  252. new_eq = "".join(res_eq)
  253. def deal2(yy):
  254. new_y = yy.group(2)
  255. if yy.group(1) == "<sub>":
  256. new_y = "_{" + yy.group(2) + "}"
  257. if yy.group(1) == "<sup>":
  258. new_y = "^{" + yy.group(2) + "}"
  259. return new_y
  260. new_eq = re.sub("(<sub>)(.+?)</sub>", deal2, new_eq)
  261. new_eq = re.sub("(<sup>)(.+?)</sup>", deal2, new_eq).strip()
  262. if not is_first: # 如果不是第一轮转化,则将前面转化后的$去掉
  263. new_eq = re.sub(r"(?<!\\)\$", "", new_eq)
  264. new_eqs.append("${}$".format(new_eq))
  265. except:
  266. fail_n += 1
  267. new_eqs.append(raw_eq)
  268. if fail_n == len(all_eqs): # 防止死循环
  269. break
  270. eq_repl_dict = dict(zip(all_eqs, new_eqs))
  271. print('-------------',eq_repl_dict)
  272. for k, v in eq_repl_dict.items():
  273. # v = latex_wash(v)
  274. # print(v)
  275. item = item.replace(k, v)
  276. is_first = 0
  277. # 对于转latex失败的域公式走第二通道:转图片
  278. # 嵌套的情况,里层域公式转latex成功,外层转失败,怎么办
  279. if must_latex:
  280. return item, ""
  281. if is_reparse and "【域公式" in item:
  282. file_path = configs.IMG_FOLDER + '/' + str(wordid) + '/' + "field_eq"
  283. if not os.path.exists(file_path):
  284. os.makedirs(file_path)
  285. new_eqs2raw = {} # 域代码_原始文本
  286. for i in re.finditer("【域公式:(.*?)】", item):
  287. if re.search(r"\\sqrt|\\frac|\\bar", i.group(1)) is None: # 不能包含latex命令
  288. if "【" in i.group(1): # 嵌套,则按上面提取的域公式不完整
  289. cout = i.group(1).count("【") # 统计【个数
  290. try: # 根据嵌套的“【”找到最外层的“】”
  291. raw_eq = i.group(0)+"】".join(item[i.end():].split("】")[:cout])+"】" # 拿到完整样式
  292. eqs = i.group(1) + "".join(item[i.end():].split("】")[:cout])
  293. eqs = "eq " + eqs.replace("【域公式:", "").replace("【", "").replace("eq ", "")
  294. eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", eqs)
  295. eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
  296. new_eqs2raw[eqs]=raw_eq
  297. except:
  298. pass
  299. else:
  300. eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", i.group(1))
  301. eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
  302. new_eqs2raw[eqs] = i.group(0)
  303. else:
  304. print("域公式中含latex表达式!!!")
  305. new_eqs = list(new_eqs2raw.keys())
  306. new_eqs.append(file_path)
  307. eqcode = "】【".join(new_eqs)
  308. try:
  309. requests.get(r"http://localhost:9001/FieldEq/Eq2Png/?eqcode=" + eqcode, timeout=3)
  310. except:
  311. pass
  312. # 在生成图片的文件夹中对应判断图片再进行替换
  313. eq_imgs = os.listdir(file_path)
  314. if eq_imgs:
  315. raw_eqs2img = {}
  316. for img in eq_imgs:
  317. w_h_info = str(img.replace(".png", "").split("__")[-1]).split("_")
  318. w = int(int(w_h_info[0])/1.27+1)
  319. h = int(int(w_h_info[1])/1.27+1)
  320. name = str(img.replace(".png", "").split("__")[0])
  321. idn = int(name.split("_")[-1])
  322. new_name = name + ".png"
  323. os.rename(file_path + "/" + img, file_path + "/" + new_name)
  324. eq_img = '<img src="{}/{}/field_eq/{}" width="{}px" height="{}px" eq-code="{}" />'\
  325. .format(configs.new_img_ip, wordid, new_name, w, h, new_eqs[idn-1])
  326. raw_eqs2img[new_eqs2raw[new_eqs[idn-1]]] = eq_img
  327. if raw_eqs2img:
  328. for k, v in raw_eqs2img.items():
  329. item = item.replace(k, v)
  330. new_item = new_item.replace(k, v)
  331. else:
  332. new_item = ""
  333. else:
  334. new_item = ""
  335. return item, new_item
  336. if __name__ == '__main__':
  337. import requests,json
  338. # f = "t=【域公式】$eq \\\\f(v<sub>0</sub>,a)$=【域公式】$eq \\\\f(6,1)$ s=6s, $eq \\\\r(6)$ "
  339. # print(re.sub(r"\\\\o\\\\al\((.+?),.+?\)", r"\1",f))
  340. # p1 = r"C:\Users\Python\Desktop\test\24\25.html"
  341. # html = open(p1, 'r', encoding='utf-8').read()
  342. # # print(html)
  343. # print(get_latex(html))
  344. # f = "eq \\f(\\f(1,2)×0.82,0.2×10)】【eq \\f(6,1)】【eq \\f(\\x\\to(OC)-\\x\\to(OA),2T)】【C:/Users/Python/Desktop/test/temp"
  345. # res = requests.get(r"http://localhost:9001/FieldEq/Eq2Png/?eqcode=" + f, timeout=30).text
  346. # print(json.loads(res).replace("\r\n", ""))
  347. # f = "【解】解析 (1)因OB绳处于竖直方向,所以B球处于平衡状态,AB绳上的拉力为零,OB绳对小球的拉力F<sub>OB</sub>=mg. (3分)<br/>(2)A球在重力mg、水平拉力F和OA绳的拉力F<sub>OA</sub>三力作用下平衡,所以OA绳对小球的拉力F<sub>OA</sub>=【域公式:eq \\\\f(mg,cos 60°)】=2mg. (3分)<br/>(3)作用力F=mgtan 60°=【域公式:eq \\\\r(3)】mg. (3分)<br/>答案 (1)mg (2)2mg (3)【域公式:eq \\\\r(3)】mg"
  348. # f = "B.【域公式:eq \\r(<sup>3</sup>,\\f(1,4))】"
  349. # f1 = "由动能定理得-W<sub>克</sub><sub>f</sub>-mgh=0-【域公式:eq \\f(1,2)】mv【域公式:eq \\o\\al(<sub>B</sub><sup>2</sup>,)】"
  350. # aa = get_latex(f1, 1)
  351. # print(aa)
  352. # tt = r"${ } _ { n H C H O } \rightarrow f H _ { 2 } C - O _ { n }$"
  353. # def sub1(ss):
  354. # if re.search(r"\\left.*?\\right|\\right.*?\\left", ss.group(1)) is None:
  355. # # res = re.sub(r"\\left(?!right|arrow)|\\right(?!left|arrow)", "", ss.group(1))
  356. # res = re.sub(r"\\left(?![a-z])|\\right(?![a-z])", "", ss.group(1))
  357. # return res
  358. # return ss.group(1)
  359. # tt = re.sub(r"(\$.*?(\\left(?![a-z])|\\right(?![a-z])).*?\$)", sub1, tt)
  360. # # re1 = latex_wash(tt)
  361. # print(tt)
  362. # item = "【域公式:eq \\f(【域公式:eq \\f(6,1)】,3)】geeghe】threthtrh"
  363. # new_eqs2raw = {} # 域代码_原始文本
  364. # for i in re.finditer("【域公式:(.*?)】", item):
  365. # print(i.group(0))
  366. # if re.search(r"\\sqrt|\\frac|\\bar", i.group(1)) is None: # 不能包含latex命令
  367. # if "【" in i.group(1): # 嵌套
  368. # cout = i.group(1).count("【")
  369. # try: # 根据嵌套的【找到最外层的】
  370. # raw_eq = i.group(0) + "】".join(item[i.end():].split("】")[:cout]) + "】"
  371. # eqs = i.group(1) + "".join(item[i.end():].split("】")[:cout])
  372. # eqs = "eq " + eqs.replace("【域公式:", "").replace("【", "").replace("eq ", "")
  373. # eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", eqs)
  374. # eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
  375. # new_eqs2raw[eqs] = raw_eq
  376. # except:
  377. # pass
  378. # else:
  379. # eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", i.group(1))
  380. # eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
  381. # new_eqs2raw[eqs] = i.group(0)
  382. # 导入comtypes模块