|
@@ -1,7 +1,7 @@
|
|
|
#!/usr/bin/env/python
|
|
|
# -*- coding:utf-8 -*-
|
|
|
-# import sys
|
|
|
-# sys.path.append(r"F:\zwj\Text_Structure\new_tiku_structure_v3_sci")
|
|
|
+import sys
|
|
|
+sys.path.append(r"F:\zwj\Text_Structure\new_tiku_structure_v3_sci")
|
|
|
|
|
|
from pprint import pprint
|
|
|
from typing import Any
|
|
@@ -343,7 +343,7 @@ if __name__ == '__main__':
|
|
|
|
|
|
# path2 = r"C:\Users\Python\Desktop\bug\5-9\663c90361ec1003b58557474.html"
|
|
|
path2 = r"F:\zwj\Text_Structure\accept_files\667cb9c0c3c4da9e7009b8c4.html"
|
|
|
- path2 = r"F:\zwj\Text_Structure\accept_files\668f4d57c3c4da9e7009bcd8.html"
|
|
|
+ path2 = r"F:\zwj\Text_Structure\accept_files\66e3ec74c3c4da9e7009cfb5.html"
|
|
|
# path2 = r"C:\Users\Python\Desktop\bug\6419746d11a1cdad550f5502.html"
|
|
|
# path2 = r"F:\zwj\Text_Structure\new_tiku_structure_v3_sci\data\620bbf7aa7d375f4518b98e1.html"
|
|
|
# path2 = r"F:\zwj\new_word_text_extract_v2\data\地理\2\2020-2021学年广东省揭阳市揭西县五校九年级(下)第二次联考地理试卷-普通用卷.html"
|
|
@@ -361,9 +361,9 @@ if __name__ == '__main__':
|
|
|
# print(html)
|
|
|
# html = "\n1、已知集合M满足{1,2}≤M≤{1,2,5,6,7},则\n符合条件的集合M有__个."
|
|
|
# html = html.replace('<img src="files', '<img src="/word/media')
|
|
|
- res1 = WordParseStructure(html, "668f4d57c3c4da9e7009bcd8",
|
|
|
+ res1 = WordParseStructure(html, "66e3ec74c3c4da9e7009cfb5",
|
|
|
is_reparse=1, must_latex=1,
|
|
|
- source="qtk", subject="数学")()
|
|
|
+ source="qtk", subject="高中化学")()
|
|
|
# new_fpath = os.path.join(r"F:\zwj\Text_Structure\fail_files", "res1.html")
|
|
|
# re_f = open(new_fpath, 'a+', encoding='utf-8')
|
|
|
# for i in res1[0]["items"]:
|