Parcourir la source

支持中职解析,调准入库参数

莺声燕语 il y a 1 an
Parent
commit
e2b1e633ac
3 fichiers modifiés avec 6 ajouts et 4 suppressions
  1. 1 1
      server.py
  2. 1 1
      utils/item_type_line.py
  3. 4 2
      utils/ruku_opera.py

+ 1 - 1
server.py

@@ -124,7 +124,7 @@ def danti_structure():
 @app.route('/ruku', methods=["GET", "POST"])
 def ruku():
     wordid = request.json.get("paper_id", "")
-    subject = request.json.get("subject", "")
+    subject = request.json.get("subject", "")  # 实际传入subject_id(int型)
     items_list = request.json.get("structured_items", "")  # 结构化试题
     ocr_html_data = request.json.get("html_data", "")  # 文本原始内容
     svg_data = request.json.get("svgs", {"svg_html_data": "", "svg_path": ""})  # mathjax的相关文本

+ 1 - 1
utils/item_type_line.py

@@ -47,7 +47,7 @@ def get_item_head_info(items_str):
 
     pattern1 = re.compile(r"[,,.第](\d+)题[^\d]{,5}(\d[\d.]{,2}\d{,2})分")
     pattern2 = re.compile(r"[,,.第](\d+)题?([,,、]\d+题?[,,、])*?[,,、~~](\d+)题?为选[做考]题")
-    pattern3 = re.compile(r"[共计有包含括]+?(\d+)个?小题.*?共(\d[\d.]{,2}\d{,2})分|[共计有]+?(\d+)个?小题")
+    pattern3 = re.compile(r"[共计有包含括]+?(\d+)个?[大]题.*?共(\d[\d.]{,2}\d{,2})分|[共计有]+?(\d+)个?[大]题")
     pattern4 = re.compile(r"共[计有]*?(\d[\d.]{,2}\d{,2})分")
     each_item_score2 = {}  # 第xx题xx分
     select_type_id = []  # 选做题id   第xx题为选做题

+ 4 - 2
utils/ruku_opera.py

@@ -14,6 +14,7 @@ from utils.label_data_Hphy import Label
 from bisect import bisect_left
 import re, time, os, json, datetime
 from copy import deepcopy
+import traceback
 
 logger = configs.myLog(__name__, log_cate="ruku_log").getlog()
 
@@ -502,6 +503,7 @@ class Ruku():
             return res_zyk
         except Exception as e:
             print("------【paper_id:{}】最后的保存失败:{}-------".format(self.wordid, str(e)))
+            print(traceback.print_exc())
             logger.info(json.dumps({"log_level": "warn",
                                     "paper_id": self.wordid,
                                     "status": "入库失败",
@@ -609,7 +611,7 @@ class Ruku():
         # -----------难度和知识点自动标注------------------------
         t11 = time.time()
         diffs_xbk = [""] * len(items_res_to_zyk)
-        if self.subject == "高中数学":
+        if self.subject == 3:  # "高中数学"
             from multiprocessing.dummy import Pool as ThreadPool
             pool = ThreadPool(2)  # 比# pool = multiprocessing.Pool(3)速度快
             diff_info = list(pool.map(get_item_diff, items_res_to_zyk))
@@ -622,7 +624,7 @@ class Ruku():
         t22 = time.time()
         # print(one_items)
         chachong_item_dict = {"topic_id": id + 1,
-                              "subject_id": subject_id[self.subject],
+                              "subject_id": self.subject,
                               "topic_type_id": one_items["checkType"]["id"],
                               "content": one_items["stem"]}
         if "options" in one_items: