5 місяців тому · 7483661600
--- a/structure/structure_main.py
+++ b/structure/structure_main.py
@@ -1,6 +1,7 @@
 
				 #!/usr/bin/env/python
			
 
				 # -*- coding:utf-8 -*-
			
 
				 
			
 
				+
			
 
				 from pprint import pprint
			
 
				 # from utils.exam_type import get_exam_type
			
 
				 from structure.final_structure import one_item_structure
			
@@ -13,6 +14,7 @@ from func_timeout import func_set_timeout
 
				 
			
 
				 from utils.xuanzuoti2slave import toslave_bef, toslave_aft
			
 
				 
			
 
				+
			
 
				 paper_types = ["第三种试卷格式：题目与答案分开",
			
 
				                "第二种试卷格式: 不同时含有或都不含有{答案}和{解析}关键字",
			
 
				                "第一种试卷格式：教师用卷，含答案和解析关键字"]
			
@@ -216,11 +218,12 @@ if __name__ == '__main__':
 
				     #    6239991e6ca622396925f66b     624cf82d12cd45a7836f3430  626b4b1f81b582c0470d01b0
			
 
				     # 627b64b0814132f0d7b12589    627b622981b582c0470d020e
			
 
				     # 6294326cf84c0e279ac6484e.html   62903acaf84c0e279ac647fb
			
 
				-    path2 = r"C:\Users\Python\Desktop\62d8eaaa6c6aff2279346c1e.html"
			
 
				+    path2 = r"C:\Users\Administrator\Desktop\666a67fec3c4da9e7009b531.html"
			
 
				+    path2 = r"C:\Users\Administrator\Desktop\66459c62c3c4da9e7009ae9d.html"
			
 
				     # path2 = r"F:\zwj\Text_Structure\accept_files\62aae86a765759d85567a475.html"
			
 
				-    # html = open(path2, "r", encoding="utf-8").read()
			
 
				+    html = open(path2, "r", encoding="utf-8").read()
			
 
				     # html = json.loads(html)  621845626ca622396925f55c
			
 
				-    html = """
			
 
				+    html2 = """
			
 
				 1. I’m anxious___________ your injury.Are you feeling any better now?
			
 
				 2. After he was back on his feet, he was anxious___________ (return) to school as soon as possible.
			
 
				 3. Helen was ___________ to death when she saw the ___________scene.She hid herself in the corner, shaking with___________(fright).
			
@@ -234,7 +237,7 @@ if __name__ == '__main__':
 
				 11. He has worked for nearly 20 years, so he is senior ___________ most of his workmates.
			
 
				 12. Although he is three years junior ___________ me, he has more work experience.
			
 
				     """
			
 
				-    res1 = StructureExporter(html, "", "语文", 1).export()
			
 
				+    res1 = StructureExporter(html, "202406131725", "语文", 1).export()
			
 
				     # new_fpath = os.path.join(r"G:\zwj\WL\Text_Structure\fail_files", "res_政治.json")
			
 
				     # re_f = open(new_fpath, 'w', encoding='utf-8')
			
 
				     # json.dump(res1[0]["items"], re_f, ensure_ascii=False)
			
--- a/utils/html_again_parse.py
+++ b/utils/html_again_parse.py
@@ -87,8 +87,9 @@ def css_label_wash(content):
 
				         a = []
			
 
				 
			
 
				         if html.children():
			
 
				+            # temph = [str(i) for i in html.children().items()]
			
 
				             for line in html.children().items():  # <p>.*?</p>里面的内容可能会被过滤掉
			
 
				-                test = line.text()
			
 
				+                # test = str(line)  # line.text()
			
 
				                 # 保留下划线及着重符标签   <span style="text-decoration: underline;">
			
 
				                 # 波浪线：<span style="text-decoration: underline wavy;">
			
 
				                 # pq会将多个空格换成一个
			
@@ -160,6 +161,8 @@ def css_label_wash(content):
 
				                 elif str(line).startswith("<ol"):
			
 
				                     for i, ss in enumerate(line.children().items()):
			
 
				                         a.append(str(i + 1) + "." + ss.text())
			
 
				+                elif str(line).startswith("<br>") or str(line).startswith("<br/>"):
			
 
				+                    a.append(str(line))
			
 
				                 else:
			
 
				                     # print('test:',line.text())  # 自动去掉了图片
			
 
				                     if line.text().strip():