Explorar o código

细节优化20240711

cdZWj hai 4 meses
pai
achega
d2c2d5181d

+ 10 - 8
configs.py

@@ -6,6 +6,7 @@ import time
 import os
 import sys
 import datetime
+import logging.handlers
 
 
 class myLog(object):
@@ -31,15 +32,15 @@ class myLog(object):
         # self.log_name = self.log_path + "/" + log_cate + "." + self.log_time + '.log'
         # self.log_name = os.path.join(log_dir, 'parse_log.log')  # 日志地址
         self.log_name = os.path.join(parse_log_dir, '{}.log'.format(log_cate))  # 日志地址
-        if os.path.exists(self.log_name):  # 设置日志定长自动新建
-            logsize = os.path.getsize(self.log_name)
-            if logsize > 180000000:  # 180M
-                os.rename(self.log_name, os.path.join(parse_log_dir, '{}_{}.log'.format(log_cate,
-                                                      datetime.datetime.now().strftime('%m_%d'))))
+        # if os.path.exists(self.log_name):  # 设置日志定长自动新建
+        #     logsize = os.path.getsize(self.log_name)
+        #     if logsize > 180000000:  # 180M
+        #         os.rename(self.log_name, os.path.join(parse_log_dir, '{}_{}.log'.format(log_cate,
+        #                                               datetime.datetime.now().strftime('%m_%d'))))
 
-        # fh = logging.FileHandler(self.log_name, 'a')  # 追加模式  这个是python2的
         fh = logging.FileHandler(self.log_name, mode='a', encoding='utf8', delay=True)
-        # fh = logging.FileHandler(self.log_name, 'a', encoding='utf-8')  # 这个是python3的
+        fh = logging.handlers.RotatingFileHandler(self.log_name, maxBytes=150000000, backupCount=3,
+                                                  mode='a', encoding='utf-8', delay=True)
         fh.setLevel(logging.INFO)
 
         # 再创建一个handler,用于输出到控制台
@@ -116,12 +117,13 @@ class ProductionCfg:  # production
     repeat_ip = "http://10.19.1.18:8866/api/repeat/subject"
     # topic_segment_ip = "http://10.19.1.14:10622/math_phy_TopicSegment_predict"  #CPU
     topic_segment_ip = "http://10.19.1.10:10622/math_phy_TopicSegment_predict"  #GPU
+    # topic_segment_ip = "http://49.232.72.198:10622/math_phy_TopicSegment_predict"
     phy_topicType_ip = "http://10.19.1.6:10611/phy_topicType_predict"
     callback_url_taskcheck = "http://api.tk.zhixinhuixue.com/v1/interior-api/record"
 
 
 # sys.argv:从控制台窗台运行程序,程序后加参数,以空格隔开,sys.argv[0]即程序本身
-config_class = TestingCfg   # 没有参数时,默认按测试环境
+config_class = ProductionCfg   # 没有参数时,默认按测试环境
 if len(sys.argv) > 1:
     print(sys.argv, sys.argv[0])
     print(sys.argv[1])

+ 33 - 27
structure/danti_structure.py

@@ -38,7 +38,7 @@ def single_parse(one_item, item_type, wordid, source="zxhx", subject="数学"):
     one_item = re.sub(r'(</?t[drh]( .*?")?>|</?table>|</?tbody>)(<br\s*/?>|\n)+', r"\1", one_item, flags=re.S)
     one_item = re.sub(r"\\\(\s*{\s*\}\s*\\\)", "", one_item)
     one_item = re.sub(r"\\\(\s*\\\)", "", one_item)
-    one_item = re.sub(r"\\\(|\\\)", "$", one_item)
+    one_item = re.sub(r"\\\(\s*|\s*\\\)", "$", one_item)
     one_item = latex_wash(one_item, 1)
 
     res_list = re.split(r"(\n+【答案】|\n+【解析】)", one_item)
@@ -135,41 +135,47 @@ if __name__ == '__main__':
 <div class="topic-analysis-content" data-v-35454a0f=""><span class="analysis-prefix" data-v-35454a0f="">【解析】</span><span data-v-35454a0f="">15.解:(1)以封闭气体为研究对象,初态<br><span class="math-tex">\({j_{1}=b_{0}-ogh_{1}=63\quad cmHg,V_{1}=L}\)</span><br>末态<br><span class="math-tex">\({f_{2}=p_{0}+ogh_{2}=84cmg,V_{2}=L^{\prime },}\)</span><br>根据玻意耳定律<br><span class="math-tex">\({p_{1}V_{1}=p_{2}V_{2}}\)</span><br>解得<span class="math-tex">\({L^{\prime }=15cm}\)</span><br>(2)右管中水银柱增加的长度<br><span class="math-tex">\({x_{1}=L-L^{\prime }=5\cot1}\)</span><br>左管中水银柱增加的长度<br><span class="math-tex">\({r_{2}=h_{1}+h_{2}+(L-L^{\prime })=26c}\)</span><br>加入的水银柱长度<br><span class="math-tex">\({x=x_{1}+x_{2}=31cm}\)</span></span></div>
 </div>
     """
-    hml4 = """
-    <div class="stem-wraper" data-v-6e158526=""><span class="topic-number" data-v-6e158526="">5.</span><span class="stem" data-v-6e158526=""><span class="stem" data-v-6e158526="">解磷菌是土壤中的一类功能微生物,包括细菌、真菌和放线菌。筛选出土壤中的高效解磷菌并制成生物菌剂,可以提高难溶性磷的利用率。下表为“分离出土壤中具有解磷功能的微生物实验”的培养基配方,下列说法正确的是( )<br></span></span>
+    hml4 = r"""
+    <div class="stem-wraper" data-v-3d09d6a3=""><span class="topic-number" data-v-3d09d6a3="">19.</span><span class="stem" data-v-3d09d6a3=""><span class="stem" data-v-3d09d6a3="">下列离子方程式的书写及评价均合理的是</span></span>
 <table>
 <tbody>
 <tr>
-<td>物质</td>
-<td>葡萄糖</td>
-<td>(NH<sub>4</sub>)<sub>2</sub>SO<sub>4</sub></td>
-<td>NaCl</td>
-<td>KCl</td>
-<td>CaCO<sub>3</sub></td>
-<td>磷酸三钙</td>
-<td>琼脂</td>
+<td>选项</td>
+<td>离子方程式</td>
+<td>评价</td>
 </tr>
 <tr>
-<td>质量/g</td>
-<td>10</td>
-<td>0.5</td>
-<td>3</td>
-<td>0.3</td>
-<td>5</td>
-<td>20</td>
-<td>15</td>
+<td>A</td>
+<td>将<span class="math-tex">\( \text{1 mol }{\text{Cl}}_{\text{2}} \)</span>通入含<span class="math-tex">\( \text{1 mol }{\text{FeI}}_{\text{2}} \)</span>溶液中:<span class="math-tex">\({\text{2Fe}}^{\text{2+}}{\text{+2I}}^{\text{-}}{\text{+2Cl}}_{\text{2}}{\text{=2Fe}}^{\text{3+}}{\text{+4Cl}}^{\text{-}}{\text{+I}}_{\text{2}}\)</span></td>
+<td>正确;<span class="math-tex">\({\text{Cl}}_{\text{2}}\)</span>过量,可将<span class="math-tex">\({\text{Fe}}^{\text{2+}}\)</span>、<span class="math-tex">\({\text{I}}^{\text{-}}\)</span>均氧化</td>
+</tr>
+<tr>
+<td>B</td>
+<td><span class="math-tex">\( \text{1 mol}·{\text{L}}^{\text{-1}} \)</span>的<span class="math-tex">\({\text{NaA1O}}_{\text{2}}\)</span>溶液和<span class="math-tex">\( \text{2.5 mol}·{\text{L}}^{\text{-1}} \)</span>的<span class="math-tex">\(\text{HCl}\)</span>溶液等体积均匀混合:<span class="math-tex">\({\text{2AlO}}_{\text{2}}^{\text{-}}{\text{+5H}}^{\text{+}}{\text{=A1}}^{\text{3+}}{\text{+Al(OH)}}_{\text{3}}↓{\text{+H}}_{\text{2}}\text{O}\)</span></td>
+<td>正确;<span class="math-tex">\({\text{AlO}}_{\text{2}}^{\text{-}}\)</span>与<span class="math-tex">\({\text{Al(OH)}}_{\text{3}}\)</span>消耗的<span class="math-tex">\({\text{H}}^{\text{+}}\)</span>的物质的量之比为<span class="math-tex">\(\text{2:3}\)</span></td>
+</tr>
+<tr>
+<td>C</td>
+<td>过量<span class="math-tex">\({\text{SO}}_{\text{2}}\)</span>通入<span class="math-tex">\(\text{NaClO}\)</span>溶液中:<span class="math-tex">\({\text{SO}}_{\text{2}}{\text{+H}}_{\text{2}}{\text{O+ClO}}^{\text{-}}{\text{=HClO+HSO}}_{\text{3}}^{\text{-}}\)</span></td>
+<td>正确;说明酸性:<span class="math-tex">\({\text{H}}_{\text{2}}{\text{SO}}_{\text{3}}\)</span>强于<span class="math-tex">\(\text{HClO}\)</span></td>
+</tr>
+<tr>
+<td>D</td>
+<td><span class="math-tex">\({\text{Mg(HCO}}_{\text{3}}{\text{)}}_{\text{2}}\)</span>溶液与足量的<span class="math-tex">\(\text{NaOH}\)</span>溶液反应:<span class="math-tex">\({\text{Mg}}^{\text{2+}}{\text{+HCO}}_{\text{3}}^{\text{-}}{\text{+OH}}^{\text{-}}{\text{=MgCO}}_{\text{3}}↓{\text{+H}}_{\text{2}}\text{O}\)</span></td>
+<td>正确;酸式盐与碱反应生成正盐和水</td>
 </tr>
 </tbody>
 </table>
-<span class="analysis-prefix" data-v-6e158526=""><br>A:</span><span data-v-6e158526="">培养基中加入琼脂的目的是作为凝固剂和提供碳源</span></div>
-<ul class="stem-options" data-v-6e158526="">
-<li data-v-6e158526=""><span class="analysis-prefix" data-v-6e158526="">B:</span><span data-v-6e158526="">该培养基是以磷酸三钙为唯一磷源的选择培养基</span></li>
-<li data-v-6e158526=""><span class="analysis-prefix" data-v-6e158526="">C:</span><span data-v-6e158526="">用平板划线法纯化解磷菌可以计数其活菌的数量</span></li>
-<li data-v-6e158526=""><span class="analysis-prefix" data-v-6e158526="">D:</span><span data-v-6e158526="">可根据培养基中透明圈的大小确定菌株的解磷能力</span></li>
+</div>
+<ul class="stem-options" data-v-3d09d6a3="">
+<li data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">A:</span><span data-v-3d09d6a3=""><span class="math-tex">\(A\)</span></span></li>
+<li data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">B:</span><span data-v-3d09d6a3=""><span class="math-tex">\(B\)</span></span></li>
+<li data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">C:</span><span data-v-3d09d6a3=""><span class="math-tex">\(C\)</span></span></li>
+<li data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">D:</span><span data-v-3d09d6a3=""><span class="math-tex">\(D\)</span></span></li>
 </ul>
-<div class="topic-analysis" data-v-6e158526="">
-<div class="topic-analysis-content" data-v-6e158526=""><span class="analysis-prefix" data-v-6e158526="">【答案】</span><span data-v-6e158526="">B</span></div>
-<div class="topic-analysis-content" data-v-6e158526=""><span class="analysis-prefix" data-v-6e158526="">【解析】</span><span data-v-6e158526="">【分析】1、微生物常见的接种的方法:(1)平板划线法:将已经熔化的培养基倒入培养皿制成平板,接种、划线,在恒温箱里培养,在线的开始部分,微生物往往连在一起生长,随着线的延伸,菌数逐渐减少,最后可能形成单个菌落。(2)稀释涂布平板法:将待分离的菌液经过大量稀释后,均匀涂布在培养皿表面,经培养后可形成单个菌落。<br>2、选择培养基:根据某种微生物的特殊营养要求或其对某化学、物理因素的抗性而设计的培养基使混合菌样中的劣势菌变成优势菌,从而提高该菌的筛选率,如加入青霉素分离得到酵母菌和霉菌。<br>【详解】A、配制固体培养基时可加入琼脂作为凝固剂,琼脂一般不会被微生物所利用,A错误;<br>B、本实验的目的是获得具有解磷功能的微生物,故其选择培养基中应以磷酸三钙为唯一磷源,B正确;<br>C、用稀释涂布平板法纯化解磷菌可以计数其活菌的数量,C错误;<br>D、可根据透明圈直径(D)与菌落直径(d)的比值(D/d)代表微生物解磷能力大小,D错误。<br>故选B。<br></span></div>
+<div class="topic-analysis" data-v-3d09d6a3="">
+<div class="topic-analysis-content" data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">【答案】</span><span data-v-3d09d6a3="">B</span></div>
+<div class="topic-analysis-content" data-v-3d09d6a3=""><span class="analysis-prefix" data-v-3d09d6a3="">【解析】</span><span data-v-3d09d6a3="">A.将1molCl<sub>2</sub>通入到含1molFeI<sub>2</sub>溶液中,由于I<sup>-</sup>的还原性强于Fe<sup>2+</sup>,所以<span class="math-tex">\( \text{1 mol }{\text{Cl}}_{\text{2}} \)</span>只能将2molI<sup>-</sup>氧化,其正确的离子方程式为2I<sup>-</sup>+Cl<sub>2</sub>═2Cl<sup>-</sup>+I<sub>2</sub>,故A错误;<br>B.1mol/L的NaAlO<sub>2</sub>溶液和2.5mol/L的HCl溶液等体积互相均匀混合,设溶液体积为1L,偏铝酸钠和HCl的物质的量分别为1mol、2.5mol,1mol偏铝酸钠消耗1molHCl生成1mol氢氧化铝,剩余的1.5molHCl能够溶解0.5mol氢氧化铝,反应的离子方程式为<span class="math-tex">\({\text{2AlO}}_{\text{2}}^{\text{-}}{\text{+5H}}^{\text{+}}{\text{=A1}}^{\text{3+}}{\text{+Al(OH)}}_{\text{3}}↓{\text{+H}}_{\text{2}}\text{O}\)</span>,<span class="math-tex">\({\text{AlO}}_{\text{2}}^{\text{-}}\)</span>与<span class="math-tex">\({\text{Al(OH)}}_{\text{3}}\)</span>消耗的<span class="math-tex">\({\text{H}}^{\text{+}}\)</span>的物质的量之比为1:1.5=2:3,故B正确;<br>C.HClO有强氧化性,能够氧化SO<sub>2</sub>,正确的离子方程式为SO<sub>2</sub>+H<sub>2</sub>O+ClO<sup>-</sup>=Cl<sup>-</sup>+SO<sub>4</sub><sup>2-</sup>+2H<sup>+</sup>,故C错误;<br>D.Mg(HCO<sub>3</sub>)<sub>2</sub>溶液与足量的NaOH溶液反应,由于氢氧化镁比碳酸镁更难溶,所以反应生成氢氧化镁沉淀,其离子方程式为Mg<sup>2+</sup>+2<span class="math-tex">\({\text{HCO}}_{3}^{-}\)</span>+4OH<sup>-</sup>=Mg(OH)<sub>2</sub>↓+2H<sub>2</sub>O+2<span class="math-tex">\({\text{CO}}_{3}^{2-}\)</span>,故D错误;<br>答案为B。<br></span></div>
 </div>
     """
     aa = single_parse(hml4, "选择题", "456")

+ 20 - 2
structure/option.py

@@ -38,6 +38,7 @@ def option2block(option_con, item_no_type):
     if item_no_type == 2:
         con = re.sub(r"\n\s*\(([A-Hc])\)\s*[、、..]?(.+?)", r"\n【【\1、】】\2", option_con)
 
+    con = con.replace("</table>【【", "</table>\n【【")
     # print(11111,option_con)
     if item_no_type == 1:
         if len(re.findall(r'【【[A-H]\s*[..、、]】】', con)) <= 3:
@@ -430,8 +431,25 @@ if __name__ == '__main__':
      下列物质与危险化学品标志的对应关系不正确的是<br/><table><tr><td>A</td><td>B</td><td>C</td><td>D</td></tr><tr><td>汽油</td><td>天然气</td><td>浓硫酸</td><td>氢氧化钠</td></tr><tr><td><img src="files/image2.png" width="125px" height="116px" /></td><td><img src="files/image3.png" width="117px" height="117px" /></td><td><img src="files/image4.png" width="118px" height="119px" /></td><td><img src="files/image5.png" width="122px" height="118px" /></td></tr></table>
     """
 
-    print(table_option_struc(stem))
-
+    # print(table_option_struc(stem))
+    one_item = {
+        'errmsgs': [],
+  'key': 'C',
+  'parse': '【详解】根据题意可知,辐射出的光子能量$\\varepsilon = 3 . 5 2 \\times 1 0 ^ { - 1 9 } '
+           'J$,由光子的能量$\\varepsilon = h v$得<br/>$\\nu = \\frac { \\varepsilon } '
+           '{ h } = 5 . 3 1 \\times 1 0 ^ { 1 4 } H z$<br/>故选C。',
+  'stem': '近年来,江西省科学家发明硅衬底氮化镓基系列发光二极管,开创了国际上第三条$L E D$技术路线。某氮化镓基$L E '
+          'D$材料的简化能级如图所示,若能级差为$2.20\\text{eV}$(约$3 . 5 2 \\times 1 0 ^ { - 1 9 '
+          '} J$),普朗克常量$h = 6 . 6 3 \\times 1 0 ^ { - 3 4 } J \\cdot '
+          's$,则发光频率约为()<br/><img height="112px" src="/word/media/image5.png" '
+          'width="140px"/><br/>A.$6 . 3 8 \\times 1 0 ^ { 1 4 } H z$B.$5 . 6 7 '
+          '\\times 1 0 ^ { 1 4 } H z$C.$5 . 3 1 \\times 1 0 ^ { 1 4 } H z$D.$4 '
+          '. 6 7 \\times 1 0 ^ { 1 4 } H z$',
+  'topic_num': 1,
+  'type': '选择题',
+    }
+    one_item = option_structure(one_item, one_item["stem"], one_item["key"], 1)
+    print(one_item)
 
 
 

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 17 - 6
structure/structure_main.py


+ 27 - 6
utils/field_eq2latex.py

@@ -1,6 +1,7 @@
 #!/usr/bin/env/python
 # -*- coding:utf-8 -*-
-
+# import sys
+# sys.path.append(r"F:\zwj\Text_Structure\new_tiku_structure_v3_sci")
 
 """
 域公式转latex
@@ -214,10 +215,29 @@ def zifu_match_combine(split_eq):
                             new_split_eq.append(new_s)
                             new_split_eq.extend(split_eq[k + 1:])
                             return zifu_match_combine(new_split_eq)
+                    elif bef_left_kuohao == "\\o":
+                        info0 = re.search(r"\\o\(([=\―→]+),\\s\\up\d\((.*?)\),\\s\\do\d\((.*?)\)\)$",
+                                           "".join(split_eq[k - subk - 1 - 1: k + 1]))
+                        info1 = re.search(r"\\o\(([=\―→]+),\\s\\up\d\((.*?)\)\)$", "".join(split_eq[k - subk - 1 - 1: k + 1]))
+                        new_s = ""
+                        if info0:
+                            # \\text{}中若含上下标,则渲染不出来,用\mathrm
+                            # mid_sign = "\!="*len(info0.group(1).replace(" ", ""))
+                            mid_sign = "".join(["\!" + i if i=="=" else "\!\!" + i for i in info0.group(1).replace(" ", "")])
+                            new_s = "\overset{\mathrm{"+info0.group(2)+"}}{\\underset{\mathrm{"+info0.group(3)+ \
+                                        "}}{"+mid_sign + "}}"
+                        elif info1:
+                            mid_sign = "".join(["\!" + i if i=="=" else "\!\!" + i for i in info1.group(1).replace(" ", "")])
+                            new_s = "\overset{\mathrm{"+info1.group(2)+"}}{"+mid_sign+"}"
+                        if new_s:
+                            new_split_eq = split_eq[:k - subk - 1 - 1]
+                            new_split_eq.append(new_s)
+                            new_split_eq.extend(split_eq[k + 1:])
+                            return zifu_match_combine(new_split_eq)
                     elif bef_left_kuohao == '\\x\\to':
                         info1 = re.search(r"\\x\\to\((.*?)\)$", "".join(split_eq[k - subk - 1 - 1:k + 1]))
                         if info1:
-                            new_s = "\\bar{{{}}}".format(info1.group(1))
+                            new_s = "\\overline{{{}}}".format(info1.group(1))
                             new_split_eq = split_eq[:k - subk - 1 - 1]
                             if new_split_eq and new_split_eq[-1] == "(":
                                 new_s = "{" + new_s + "}"
@@ -322,8 +342,8 @@ def get_latex(item, is_reparse=0, wordid="123456", must_latex=0):
                     except:
                         pass
                 else:
-                    eqs = re.sub("<sub>(.+?)</sub>", r"\s(,\1)", i.group(1))
-                    eqs = re.sub("<sup>(.+?)</sup>", r"\s(\1,)", eqs)
+                    eqs = re.sub("<sub>(.+?)</sub>", r"\\s(,\1)", i.group(1))
+                    eqs = re.sub("<sup>(.+?)</sup>", r"\\s(\1,)", eqs)
                     new_eqs2raw[eqs] = i.group(0)
             else:
                 print("域公式中含latex表达式!!!")
@@ -374,8 +394,9 @@ if __name__ == '__main__':
     # f = "【解】解析 (1)因OB绳处于竖直方向,所以B球处于平衡状态,AB绳上的拉力为零,OB绳对小球的拉力F<sub>OB</sub>=mg. (3分)<br/>(2)A球在重力mg、水平拉力F和OA绳的拉力F<sub>OA</sub>三力作用下平衡,所以OA绳对小球的拉力F<sub>OA</sub>=【域公式:eq \\\\f(mg,cos 60°)】=2mg. (3分)<br/>(3)作用力F=mgtan 60°=【域公式:eq \\\\r(3)】mg. (3分)<br/>答案 (1)mg (2)2mg (3)【域公式:eq \\\\r(3)】mg"
     # f = "B.【域公式:eq \\r(<sup>3</sup>,\\f(1,4))】"
     # f1 = "由动能定理得-W<sub>克</sub><sub>f</sub>-mgh=0-【域公式:eq \\f(1,2)】mv【域公式:eq \\o\\al(<sub>B</sub><sup>2</sup>,)】"
-    # aa = get_latex(f1, 1)
-    # print(aa)
+    f1 = r"A.Fe【域公式:eq \\o(――→,\\s\\up7(Cl2),\\s\\do5(加热))】FeCl3 B.Fe【域公式:eq \\o(――→,\\s\\up7(盐酸),\\s\\do5(常温))】FeCl3"
+    aa = get_latex(f1, 1)
+    print(aa)
 
     # tt = r"${ } _ { n H C H O } \rightarrow f H _ { 2 } C - O _ { n }$"
     # def sub1(ss):

+ 15 - 115
utils/html_again_parse.py

@@ -56,7 +56,9 @@ def css_label_wash(content):
         return content
     else:
         content = re.sub(r'<font\s+style="color: red">', "", str(content))
-        content = re.sub(r'<span\s+style="color: red">', "", str(content))
+        # content = re.sub(r'<span\s+style="color: red">', "", str(content))
+        #20240704/红色标记先保留
+        content = re.sub(r'<span\s+style="color: red">(.+?)</span>', r"【red##\1】", str(content))
         content = re.sub(r'<span\s+style="color: blue">', "", str(content))
         content = re.sub(r'<font\s+style="color: blue">', "", str(content))
         content = content.replace("</font >", "").replace("</font>", "")
@@ -76,6 +78,9 @@ def css_label_wash(content):
                                   content, flags=re.S)
             for k, img in enumerate(all_imgs):
                 content = content.replace(img, "&{}&".format(k))
+                #表格里的公式的标签需要清洗20240704 
+                if "</table>" in img and "math-tex" in img:
+                    img = re.sub(r'<span class="math-tex">(.+?)</span>', r'\1', img)
                 subs2img["&{}&".format(k)] = img
         content = re.sub(r"<(su[bp])>(.*?)</(su[bp])>", r"【\1】\2【/\3】", content)
         content = content.replace("&lt;", "【#lt;】")
@@ -85,7 +90,6 @@ def css_label_wash(content):
 
         if html.children():
             for line in html.children().items():  # <p>.*?</p>里面的内容可能会被过滤掉
-
                 test = line.text()
                 # 保留下划线及着重符标签   <span style="text-decoration: underline;">
                 # 波浪线:<span style="text-decoration: underline wavy;">
@@ -99,8 +103,7 @@ def css_label_wash(content):
                     line = pq(line)
                     new_line = list(map(lambda x: str(x).replace("【1#", '<span style="text-decoration: underline')
                                         .replace("##】", "</span>").replace("【2#", '<span class="dots">')
-                                        .replace("【p【+】style=", "<p style=").replace("##3】", "</p>").replace("【+】",
-                                                                                                             " "),
+                                        .replace("【p【+】style=", "<p style=").replace("##3】", "</p>").replace("【+】", " "),
                                         line.text().split("\n")))
                     a.extend(new_line)
                 elif str(line).startswith("<p") and line.text().strip():
@@ -163,15 +166,14 @@ def css_label_wash(content):
         new_a = re.sub("(\n\s*)+", "\n", new_a)
         # print("newa:::", new_a)
         if subs2img:
-            new_a = re.sub("|".join(subs2img.keys()), lambda x: subs2img[x.group()], new_a)
-        new_a = "<p>" + new_a.replace("\n\n", "\n").replace("\n", "</p>\n<p>") + "</p>"
-
+            new_a = re.sub(r"|".join(subs2img.keys()), lambda x: subs2img[x.group()], new_a)
+        new_a = "<p>" + new_a.replace("\n\n", "\n").replace("\n", "</p>\n<p>") + "</p>"  #第2个replace:“\n”前加r
         # for sb, img in subs2img.items():  # 2021
         #     new_a = new_a.replace(sb, img)
-
         # if parm:
         #     new_a[0] = "\xa0" * 4 + new_a[0]
         new_a = re.sub(r"【(/?su[bp])】", r"<\1>", new_a).replace("【#lt;】", "&lt;")
+        new_a = re.sub(r"【red##(.*?)】", r'<span style="color: red">\1</span>', new_a)
         return new_a
 
 
@@ -188,118 +190,16 @@ if __name__ == '__main__':
     '''
     cons1 = '''
     9 . 中国古代的政治权力由“传贤”转变为“传子”,“家天下”制度开始形成于<table name=\"optionsTable\" style=\"width:100%;table-layout:fixed;\" cols=\"4\"><tr><td>A.夏朝</td><td>B.商朝</td><td>C.周朝</td><td>D.秦朝</td></tr></table>
-    '''
-    cons2 = '''
-    <p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">1.下列对这首诗的赏析,不正确的一项是( &nbsp;&nbsp;)</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><strong><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">小寒食舟中作</span></strong></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">杜甫</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">佳辰强饮食犹寒,隐几萧条戴鹖冠。春水船如天上坐,老年花似雾中看。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">娟娟戏蝶过闲幔,片片轻鸥下急湍。云白山青万余里,愁看直北是长安。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">[注]这首诗写于大历五年春诗人淹留潭州时,即诗人去世前半年多。鹖(hé)冠:传为楚隐者鹖冠子所戴的帽子。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">A.首联中“强饮”一词是痛快豪饮的意思,表明诗人晚年要纵酒人生。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">B.颔联写诗人在船上所见所感,春来水涨,江流浩瀚,自己老眼昏花。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">C.颈联运用叠词,具有韵律美,写景由近及远,由蝴蝶而鸥鸟,层次分明。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">D.尾联总收全诗,诗人北望长安,思朝廷,忧愁顿生,有沉郁苍茫之美。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【答案】</span><span style="font-family: 宋体;">A</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【解析】</span><span style="font-family: 宋体;">“强饮”理解有误。应是“勉强吃一点饭”的意思。故选A。</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">2.下列对这首诗的赏析,不正确的一项是( &nbsp;&nbsp;)</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><strong><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">送客归江州</span></strong></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">韩翃</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">东归复得采真</span><sup><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt; vertical-align: super;"><span style="font-family: 宋体;">①</span></span></sup><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">游,江水迎君日夜流。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">客舍不离青雀舫,人家旧在白鸥洲</span><sup><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt; vertical-align: super;"><span style="font-family: 宋体;">②</span></span></sup><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">风吹山带遥知雨,露湿荷裳已报秋。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">闻道泉明</span><span style="font-family: 宋体;">③居止近,篮舆相访为淹留。</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【注】</span><span style="font-family: 宋体;">①采真:道教语,指顺乎天性,放任自然。②白鸥洲:指白鸥翔集的沙洲。此处借指客之家乡。③泉明:指晋陶渊明,此称其为泉明,乃避唐高祖李渊之讳。</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">A.这首诗写诗人送客人归江州隐居,但并无送别时的伤感,更多的是一种美好的祝福。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">B.“江水迎君”采用拟人手法,客人归心似箭、归程片刻不能迟的心态跃然纸上。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">C.“青雀舫”“白鸥洲”写出了诗人对客人旅舟华美,家乡景色宜人的赞美与羡慕。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">D.尾联写诗人听说陶渊明居所离客人很近,定会借探访陶渊明居所之机去拜访客人。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【答案】</span><span style="font-family: 宋体;">D</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【解析】</span><span style="font-family: 宋体;">D项,“定会借探访陶渊明居所之机去拜访客人”错误。尾联的意思是听说陶渊明居住的地方就在附近,你可以常常乘着竹轿,前往拜访。表达了诗人对客人隐逸情怀的赞美与羡慕。故选D。</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">3.对下面这首词的赏析,不恰当的一项是( &nbsp;&nbsp;)</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><strong><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">渔家傲</span></strong></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">范仲淹</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">塞下秋来风景异,衡阳雁去无留意。四面边声连角起,千嶂里,长烟落日孤城闭。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">浊酒一杯家万里,燕然未勒归无计。羌管悠悠霜满地,人不寐,将军白发征夫泪。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">A.这首词写出了我国北方秋季的景物特点,从词中的“塞下”“霜”等词语可以看出。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">B.“衡阳雁去”是说“大雁向衡阳飞去”而不是“大雁从衡阳飞走了”。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">C.这首词既表达了将士的爱国之心,又流露出思念亲人和家乡的感情。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">D.这首词感情悲观而消极,表达了鲜明的反战、厌战情绪。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【答案】</span><span style="font-family: 宋体;">D</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【解析】这首《渔家傲》为范仲淹创作,以描写北方秋季景物为背景,表达了作者对家国、亲人的思念以及将士们的英勇豪情。</span><span style="font-family: 宋体;">A项正确,词中的“塞下”“霜”等词语揭示了北方的秋季特点;B项正确,作者借衡阳雁南飞的景象暗示将士们向往家乡的渴望;C项正确,通过浓烈的爱国情感以及思念亲人的情绪表现,展现了作者的家国情怀和将士们的壮志豪情。不过,D项表述错误,词中并未明显表达反战、厌战情绪,其主要表达了将士们为国家和民族拼搏的精神。</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">4.对下面这首唐诗,赏析不恰当的一项是( &nbsp;&nbsp;)</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><strong><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">早梅</span></strong></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">万木冻欲折,孤根暖独回。前村深雪里,昨夜一枝开。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">风递幽香出,禽窥素艳来。明年如应律,先发望春台。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">A.首联把梅花与万木进行对比,万木的干枯摧折既有力地衬托了梅花的迎风斗雪,又好地照应了诗题中的“早”。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">B.颔联用华丽的语言为读者描绘出了一幅浓艳、高贵的雪中梅花图。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">C.颈联写梅花的风韵和姿色,尾联寄寓诗人深深的情思。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">D.这首咏梅诗,语言清丽,笔墨含蓄,有着强烈的艺术感染力。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【答案】</span><span style="font-family: 宋体;">B</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【解析】选项</span><span style="font-family: 宋体;">B不太恰当。颔联并没有用华丽的语言来描绘梅花,只是表达了梅花在寒雪中展现出的独立、高洁的风韵。这里并没有像选项B所说的“浓艳、高贵”。其余选项都能恰当地反映这首诗的特点和内容,因此答案选B。</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">5.对下面这首宋诗理解与赏析,不恰当的一项是( &nbsp;&nbsp;)</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><strong><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">村行</span></strong></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">王禹偁</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">马穿山径菊初黄,信马悠悠野兴长。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">万壑有声含晚籁,数峰无语立斜阳。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">棠梨叶落胭脂色,荞麦花开白雪香。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">何事吟余忽惆怅?村桥原树似吾乡。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">A.首联照应题目,点明地点和时令,写出了诗人信马徐行、观赏山野景色的悠然兴致。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">B.第二联上下句构成对比,生动地表现出山中有时喧响有时静穆的景象。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">C.第三联以“胭脂”和“白雪”为喻,形象地描绘出山村绚丽多彩的秋景。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">D.最后两句设为问答,抒发了诗人由外界景物所触发的浓浓的思乡之情。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【答案】</span><span style="font-family: 宋体;">B</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【解析】本题考查了对宋诗《村行》的理解与赏析。首先,</span><span style="font-family: 宋体;">A选项指出首联照应题目,点明了诗人信马行走在山间小路,看到菊花初黄,意境开阔。B选项提到第二联表现了山中有时喧响有时静穆的景象,但该联实际上并没有对比色彩,而是展示出千山万壑中奔涌着生机勃勃的晚响,无言的数峰沐浴在斜阳中。C选项正确地概括了第三联的内容,诗人通过赞美胭脂色的棠梨叶和白雪般芬芳的荞麦花存在世上,描绘出色彩斑斓的美景。D选项陈述了诗末以问答形式流露出的诗人对故乡情感的深刻思索。因此,答案为B选项,不恰当地解读了第二联。</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">6.下列对这首诗的赏析,不正确的一项是( &nbsp;&nbsp;)</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><strong><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">酬元九侍御赠璧竹鞭长句</span></strong><strong><sup><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt; vertical-align: super;"><span style="font-family: 宋体;">①</span></span></sup></strong></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">刘禹锡</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">碧玉孤根生在林,美人相赠比双金。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">初开郢客缄封后,想见巴山冰雪深。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">多节本怀端直性,露青犹有岁寒心。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">何时策马同归去,关树扶疏</span><sup><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt; vertical-align: super;"><span style="font-family: 宋体;">②</span></span></sup><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">敲镫吟。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">[注]①此诗写于“永贞革新”失败后,作者被贬为朗州(今湖南)司马之时。元九,即诗人元稹,当时被贬为江陵(今湖北荆州)府士曹参军。②关树:关中之树。扶疏:枝叶繁茂。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">A.首联运用比兴手法,以碧玉般竹鞭的名贵,暗示赠鞭者的高尚,赞扬之情跃然纸上</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">B.颔联写诗人看到朋友赠礼后内心非常欣喜,很想去观赏生长碧竹的巴山冰雪美景。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">C.颈联通过“节”字,将“竹节”与“节操”相关联,把咏鞭与赞人联系在了一起。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">D.尾联由竹鞭引发联想,表达了诗人愿与友人“策马同去”“敲镫吟诗”的美好愿望。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【答案】</span><span style="font-family: 宋体;">B</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【解析】</span><span style="font-family: 宋体;">“很想去观赏生长碧竹的巴山冰雪美景”赏析有误。领联表达的意思是,我一打开郢客的缄封之后,立刻想到冰冻巴山雪深深。目睹竹鞭而展开联想,写出了制鞭之竹在“巴山冰雪”中傲然挺立的景象。这是对元稹不畏权势、宁折不弯的形象写照。是以竹喻人,表达对友人的赞美。译文:绿如碧玉的孤竹生在深林,用它制的璧竹鞭名贵万分;贤稳之人将竹鞭赠送给我,这份厚礼胜过了万两黄金。我一打开郢客的绒封之后,立刻想到冰冻巴山雪深深。鞭上多节,节节怀着端直性,遍体露青犹有岁寒后凋心。我们何时才能策马同归去,在扶疏的关树下敲镫高吟?</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">7.下列对这首诗的赏析,不正确的一项是( &nbsp;&nbsp;)</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><strong><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">见别离者因赠之</span></strong><strong><sup><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt; vertical-align: super;"><span style="font-family: 宋体;">①</span></span></sup></strong></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">韩偓</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">征人草草尽戎装,征马萧萧立路傍。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">尊酒阑珊将远别,秋山迤逦更斜阳。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">白髭兄弟中年后,瘴海程途万里长。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">曾向天涯怀此恨,见君呜咽更凄凉。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">[注]①诗人生活在唐末战乱之际,当时自北而南,沿路所见,皆发于诗。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">A.标题点明本诗写作的原由,“别离”一词陡生无限伤感情绪,奠定全诗情感基调。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">B.首联紧扣“征人”与“征马”两个形象,真切地描绘了出征时的情景,画面感很强</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">C.颔联描写的是别后想象的虚景,诗人想象征人在离别亲人后沿着秋山远行的景象。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">D.尾联直抒胸臆,眼前的别离勾起了诗人对自身的感叹,抒发了心中的无奈和感慨。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【答案】</span><span style="font-family: 宋体;">C</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【解析】</span><span style="font-family: 宋体;">“颔联描写的是别后想象的虚景”说法错误,“尊酒阑珊将远别”是眼前实景。</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">8.下列对这首诗的赏析,不正确的一项是( &nbsp;&nbsp;)</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><strong><span style="mso-spacerun: 'yes'; font-family: 宋体; mso-ansi-font-weight: bold; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">大热五首(其一)</span></strong></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">戴复古</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">天地一大窑,阳炭烹六月。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">万物此陶镕,人何怨炎热。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">君看百谷秋,亦自暑中结。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">田水沸如汤,背汗湿如泼。</span></p>
-<p class="MsoNormal" style="text-align: center; vertical-align: middle; line-height: 150%;" align="center"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">农夫方夏耘,安坐吾敢食!</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">【注】陶镕:陶铸熔炼,比喻培育、造就。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">A.诗人把六月的天地比作一个大窑,太阳像炭火一样熔炼着其中的一切。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">B.暑天虽极炎热,诗人却认为不应抱怨,因为秋天的谷物均赖此而结实。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">C.田中的水被晒得似乎要沸腾,诗人的背上汗水流得就像刚刚用水泼过。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">D.这首诗描写暑热多用比喻和夸张修辞,语言平易浅近,风格质朴自然。</span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【答案】</span><span style="font-family: 宋体;">C</span></span></p>
-<p class="MsoNormal" style="vertical-align: middle; line-height: 150%;"><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;"><span style="font-family: 宋体;">【解析】</span><span style="font-family: 宋体;">“背汗湿如泼”描写的是</span></span><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">农夫在暑热中辛苦劳作的情景</span><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">,</span><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">而不是指</span><span style="mso-spacerun: 'yes'; font-family: 宋体; font-size: 10.5000pt; mso-font-kerning: 1.0000pt;">诗人自己。</span></p>
     '''
     # pprint(cons)
     # print(again_parse(cons))
     # print(again_parse(cons))
     # print(list(map(lambda x: str(x).replace("     ", " "), again_parse(cons))))
     # con1 = r'<p>解:A.研究跨栏动作时,刘翔的大小和形状不能忽略,不能看作质点,故A错误;<br/>B.选取不同的参考系,物体的运动状态是不相同的,故B错误;<br/>C.出租车收费是按路程收费的,故C错误;<br/>D.第<img src="http://192.168.1.145:10811/static/physical_formulas_imgs/16184553930794225.png" data-latex="${4 \rm{s} }$" width="13",height="11" />是指<img src="http://192.168.1.145:10811/static/physical_formulas_imgs/16184553931930702.png" data-latex="${1 \rm{s} }$" width="12",height="11" />的时间,是指从<img src="http://192.168.1.145:10811/static/physical_formulas_imgs/16184553930220437.png" data-latex="${3 \rm{s} }$" width="13",height="11" />末到<img src="http://192.168.1.145:10811/static/physical_formulas_imgs/16184553930794225.png" data-latex="${4 \rm{s} }$" width="13",height="11" />末这一段时间,故D正确;<br/>故选:D.</p>'
-
-    cons = css_label_wash(cons2)
+    path2 = r"F:\zwj\Text_Structure\accept_files\667d0bec1f8a0743e2aabc78_2.html"
+    html = open(path2, "r", encoding="utf-8").read()
+    cons = css_label_wash(html)
+    with open(r"F:\zwj\Text_Structure\accept_files\temp.txt", "w",encoding='utf-8') as f:
+        f.write(cons)
 
     print(cons)

+ 4 - 3
utils/ruku_opera.py

@@ -349,12 +349,13 @@ class Ruku():
             def sub2(s):
                 """将试题中的latex转为线上可访问图片地址"""
                 if s:
-                    all_ltx = re.findall(r'\$.*?\$', s)  # 查找试题结构中的公式
+                    all_ltx = re.findall(r'(?<!=")\s*\$.*?\$', s)  #20240709限制提取的latex公式范围
+                    # all_ltx = re.findall(r'\$.*?\$', s)  # 查找试题结构中的公式,可能有粘贴过来的公式,其属性含有data-latex
                     all_ltx.extend(re.findall('\\\\\(.*?\\\\\)', s))
                     all_ltx = list(set(all_ltx))
                     for ltx in all_ltx:
-                        new_ltx = ltx.replace("$", "").replace("\\(", "").replace("\\)", "")\
-                            .replace("&amp;", "&").replace("&lt;", "<")  #.replace(" ", "\\u200a")
+                        new_ltx = ltx.strip().replace("$", "").replace("\\(", "").replace("\\)", "")\
+                            .replace("&amp;", "&").replace("&lt;", "<").replace("<br/>", " ")  #.replace(" ", "\\u200a")
                         if new_ltx in self.ltx2url:  # 将latex换为其渲染图片的线上可访问地址
                             s = s.replace(ltx, self.ltx2url[new_ltx])
                         else:

+ 21 - 14
utils/washutil.py

@@ -368,7 +368,7 @@ class HtmlWash():
             # kk = re.search('(<img src=".*?image\d+\.(png|gif|jpg|jpeg))', src)
             # new_src = src.replace(kk.group(1), self.img_url[kk.group(1)]) if type(self.img_url) == dict and kk else src
             # 图片信息简化替换
-            print(src)
+            # print(src)
             new_src = re.sub(r'( data-latex)="\s*\\\[(.*?)\\\]\s*"', r'\1="$\2$"', src)
             new_src = re.sub(r'( data-latex="\$[^"]+?\$")',
                              lambda x: x.group(1).replace("<", " \lt ").replace("  ", " "), new_src)
@@ -382,7 +382,7 @@ class HtmlWash():
             # image_id = re.search(r'<img src=".*?/(new_)?image([\da-z]+)\.', src).group(2)
             image_info = re.search(r'<img src=".*?/([^/]+?)/(new_)?image([\da-z]+)\.', src)  # 2023.12.1
 
-            print(image_info.groups())
+            # print(image_info.groups())
             image_id = image_info.group(1) + image_info.group(3)
             if len(image_id) > 10:
                 image_id = image_id[-10:]
@@ -414,8 +414,8 @@ class HtmlWash():
         # <造成的css标签冲突处理  2021-10-13
         def sub2(ss):
             if re.search(r'^(img|/?h[123456]|/?su[bp]>|t\d+b>|br\s*/?>'
-                         r'|/?(p|span|font|article|ul|ol|div|table|t?body|html|head|t[drh])(\s*|\s+style=.*?")>'
-                         r'|/?[a-z]+ style=.*?">)', ss.group(1)) is None:
+                         r'|/?(p|span|font|article|ul|ol|div|table|t?body|html|head|t[drh])(\s*|\s+style=.*?"|\s+class=.*?")>'
+                         r'|/?[a-z]+ style=.*?">|meta [a-z]+=.*?"\s*/?>)', ss.group(1)) is None:
                 return "&lt;{}".format(ss.group(1))
             else:
                 return "<{}".format(ss.group(1))
@@ -758,21 +758,25 @@ def wash_after(res_dict, paperid,subject="数学"):
         # ----------------------------------------------------------------
     # 物理题型批量调接口:节约时间
     if "物理" in subject:
-        t1 = time.time()
-        epoches = int(len(all_content_str_list) / 10)
+        non_xuanze_idx = [id for id, tp in enumerate(topic_type_list) if tp not in ['单选题', '多选题', '选择题']]
+        topic_type_list = [tp for tp in topic_type_list if tp not in ['单选题', '多选题', '选择题']]
+        all_content_str_list = [con for id, con in enumerate(all_content_str_list) if id in non_xuanze_idx]
+        epoches = int(len(all_content_str_list) / 20)
         pred_topic_types = []
+        stime = time.time()
         if epoches > 0:
             last = 0
             for epoch in range(epoches):
                 input_data = {"content": all_content_str_list[last:(epoch+1)*10], "period": "高中",
                               "topic_type": topic_type_list[last:(epoch+1)*10]}
-                last = (epoch+1)*10
+                last = (epoch+1)*20
                 try:
                     r = requests.post(url=configs.phy_topicType_ip, json=input_data)
                     pred_topic_types.extend(r.json()["res"])
                 except Exception as e:
-                    print(e)
-                    pred_topic_types.extend([""]*10)
+                    # print(e)
+                    pred_topic_types.extend([""]*20)
+                    logger.info("----【paper_id:{}】题型预测报错:{}".format(paperid, str(e)))
             rest_con = all_content_str_list[last:]
             rest_topic_type = topic_type_list[last:]
         else:
@@ -780,20 +784,22 @@ def wash_after(res_dict, paperid,subject="数学"):
             rest_topic_type = topic_type_list
         if rest_con:
             input_data = {"content": rest_con, "period": "高中", "topic_type": rest_topic_type}
+            # print(rest_con)
             try:
                 r = requests.post(url=configs.phy_topicType_ip, json=input_data)
                 pred_topic_types.extend(r.json()["res"])
             except Exception as e:
-                print(e)
+                # print(e)
                 pred_topic_types.extend([""] * len(rest_con))
+                logger.info("----【paper_id:{}】题型预测报错:{}".format(paperid, str(e)))
+        logger.info("----【paper_id:{}---{}题】采用题型预测服务花费time:{}".format(paperid, len(rest_topic_type), time.time() - stime))
         # 将预测题型替换到res_dict中
-        if any([True for i in pred_topic_types if i]) and len(pred_topic_types) == len(res_dict):
+        if any([True for i in pred_topic_types if i]) and len(pred_topic_types) == len(non_xuanze_idx):
             for idx, pred_type in enumerate(pred_topic_types):
-                if pred_type and res_dict[idx]['type'] in ["填空题", "解答题"]:
+                if pred_type and res_dict[non_xuanze_idx[idx]]['type'] in ["填空题", "解答题"]:
                     if pred_type == "简答题":
                         pred_type = "解答题"
-                    res_dict[idx]['type'] = pred_type
-        logger.info("----【paper_id:{}】采用题型预测服务花费time:{}".format(paperid, time.time() - t1))
+                    res_dict[non_xuanze_idx[idx]]['type'] = pred_type
     # --------------------------------------------------------------
     # 换行符替换
     convert_huanhang(res_dict)
@@ -849,6 +855,7 @@ def convert_huanhang(items_list):
                                             items_list["stem"])
     elif isinstance(items_list, str):
         item_str = items_list.strip().replace("\n\n", "\n")
+        item_str = re.sub(r"( {2,})", lambda x: "&nbsp;"*(x.group(1).count(" ")), item_str) #20240710空格用&nbsp;
         item_str = re.sub(r'(</table>)(<br\s*/?>|\n)+', r"\1", item_str)
         return item_str.replace("\n", "<br/>")
     else:

+ 9 - 0
更改记录.txt

@@ -0,0 +1,9 @@
+1、再解析清洗中,遇到表格里面公式的情况,需要对公式的标签"math-tex"进行清洗--->修改html_again_parse.py
+2、入库保存记录中发现存在一些“latex替换为imgurl失败”的情况,
+   发现是:从items_list中获取$xxx$公式时,经过查重替换的公式也被提取出来,但替换后的latex与原本的latex不一定写法完全一样
+   修改:ruku_opera.py中sub2
+3、再解析清洗中,对于红色标记的公式先保留标记(修改html_again_parse.py),
+   但需确定svg生成的图片是否也显示红色?====>已验证:带红色标记的latex生成svg,svg2png后显示黑色
+   最后保存入库时,将红色标记去掉--->在ruku_opera.py中修改sub1
+4、试题中的多个连续空格在前端显示时会被折叠成1个,需要将多个空格改为多个&nbsp;====>修改washutil.py中的convert_huanhang
+2024.7.11 html清洗时漏掉了<meta charset="utf-8" />的“<” 不能替换为&lt;  ====》 修改washutil.py中html_cleal 

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio