server.py 10 KB


  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. # from gevent import monkey
  4. # monkey.patch_all() # 打上猴子补丁
  5. import pickle
  6. from flask import Flask, render_template, send_from_directory
  7. from flask import request, redirect, Response
  8. from flask_cors import *
  9. import re
  10. import traceback
  11. from flask.views import MethodView
  12. from multiprocessing import Process, Queue
  13. import configs
  14. from structure.danti_structure import single_parse
  15. from structure.structure_main import WordParseStructure
  16. import os, datetime, hashlib
  17. import time, json, random
  18. import pprint
  19. from utils.ruku_opera import Ruku
  20. from pprint import pprint
  21. logger = configs.myLog(__name__, log_cate="reparse_ruku_log").getlog()
  22. app = Flask(__name__)
  23. app.debug = True
  24. CORS(app, supports_credentials=True)
  25. @app.route('/word_structure', methods=["GET", "POST"])
  26. def word_structure():
  27. """
  28. word 批量结构化
  29. :return:
  30. """
  31. mydata = request.json.get("sci_html_data", "")
  32. is_reparse = request.json.get("is_reparse", "0")
  33. word_id = request.json.get("paper_id", 0)
  34. must_latex = request.form.get("must_latex", 1)
  35. print("【再解析】==request.POST.dict==>is_reparse:{}, word_id:{}".format(is_reparse, word_id))
  36. # print(mydata)
  37. loginfo = {"log_level": "info",
  38. "request_ip": request.remote_addr,
  39. "receive_data": {"paper_id": word_id,
  40. "is_reparse": is_reparse},
  41. "task_name": "批量文本结构化解析"}
  42. # 接收的文件记录一下,按wordid命名
  43. # time_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S')
  44. # new_fpath = os.path.join(configs.FAIL_FOLDER, str(time_str)+".html")
  45. if not word_id:
  46. word_id = get_wordid(mydata)
  47. loginfo["receive_data"]["is_auto_id"] = 1
  48. loginfo["receive_data"]["paper_id"] = word_id
  49. logger.info(json.dumps(loginfo, ensure_ascii=False))
  50. if word_id:
  51. getfile_savepath = os.path.dirname(os.getcwd()) + '\\accept_files\\' + str(word_id) + ".html"
  52. if os.path.exists(getfile_savepath):
  53. print("同一份wordid文件发送多次:{}".format(word_id))
  54. re_f = open(getfile_savepath, 'w', encoding='utf-8')
  55. re_f.write(mydata)
  56. re_f.close()
  57. result = {"errcode": 0, "errmsgs": "", "data": {}}
  58. st1 = time.time()
  59. try:
  60. if int(is_reparse) and word_id: # 再解析
  61. res, paper_type = WordParseStructure(mydata, str(word_id), int(is_reparse), must_latex).structure()
  62. # print(res)
  63. if "errcode" not in res:
  64. result["data"] = res
  65. else:
  66. result = res
  67. print("【再解析】==解析结束==> word_id:{}".format(word_id))
  68. elif not int(is_reparse) and mydata: # 不是再解析
  69. res, paper_type = WordParseStructure(mydata, "").structure()
  70. # print(res)
  71. if "errcode" not in res:
  72. result["data"] = res
  73. else:
  74. result = res
  75. print("【再解析】==解析结束==> word_id:{}".format(word_id))
  76. else:
  77. result["errmsgs"] = "无data或paper_id"
  78. result["errcode"] = 1
  79. if result["errcode"]:
  80. logger.info(json.dumps({"log_level": "info",
  81. "paper_id": word_id,
  82. "status": "解析失败",
  83. "errmsg": result["errmsgs"],
  84. "task_time": time.time() - st1,
  85. "task_name": "批量文本结构化解析"}, ensure_ascii=False))
  86. else:
  87. logger.info(json.dumps({"log_level": "info",
  88. "paper_id": word_id,
  89. "status": "解析成功",
  90. "task_time": time.time() - st1,
  91. "task_name": "批量文本结构化解析"}, ensure_ascii=False))
  92. except Exception as e:
  93. # 先保存文件
  94. # now_time = datetime.datetime.now()
  95. # time_str = datetime.datetime.strftime(now_time, '%Y_%m_%d_%H_%M_%S')
  96. # aft_modify = (str(random.random())).encode("utf-8")
  97. # aft_name = hashlib.md5(aft_modify).hexdigest() + '__' + time_str + '.json'
  98. print("【再解析】==解析失败==> word_id:{}, 报错信息:{}".format(word_id, traceback.format_exc()))
  99. new_fpath = configs.FAIL_FOLDER + '/' + str(word_id) + '.json'
  100. re_f = open(new_fpath, 'w', encoding='utf-8')
  101. json.dump(mydata, re_f, ensure_ascii=False)
  102. result["errmsgs"] = "解析失败"
  103. result["errcode"] = 1
  104. logger.info(json.dumps({"log_level": "warn",
  105. "paper_id": word_id,
  106. "status": "解析失败",
  107. "errmsg": str(e),
  108. "task_time": time.time() - st1,
  109. "task_name": "批量文本结构化解析"}, ensure_ascii=False))
  110. return json.dumps(result, ensure_ascii=False)
  111. @app.route('/danti_structure', methods=["GET", "POST"])
  112. def danti_structure():
  113. """
  114. 单题再解析、结构化
  115. :return:
  116. """
  117. word_id = request.json.get("paper_id", 0)
  118. one_item = request.json.get("single_item_data", "")
  119. item_type = request.json.get("item_type", "")
  120. print("【单题解析】==request.POST.dict==>word_id:{}, item_type:{}".format(word_id, item_type))
  121. # logger.info("【单题解析】==request.POST.single_item_data==>\n{}\n".format(one_item))
  122. print(word_id, item_type)
  123. loginfo = {"log_level": "info",
  124. "request_ip": request.remote_addr,
  125. "receive_data": {"paper_id": word_id,
  126. "item_type": item_type},
  127. "task_name": "单题解析"}
  128. if not word_id:
  129. word_id = get_wordid(one_item)
  130. loginfo["receive_data"]["is_auto_id"] = 1
  131. loginfo["receive_data"]["paper_id"] = word_id
  132. logger.info(json.dumps(loginfo, ensure_ascii=False))
  133. res = {"errcode": 0, "errmsgs":"", "data": {}}
  134. if item_type:
  135. one_res = single_parse(one_item, item_type, word_id)
  136. # pprint(one_res)
  137. if type(one_res) == str:
  138. res["errcode"] = 1
  139. res["errmsgs"] = one_res
  140. else:
  141. res["data"] = one_res
  142. else:
  143. res["errcode"] = 1
  144. res["errmsgs"] = "没有选定题型"
  145. return json.dumps(res, ensure_ascii=False)
  146. # class rukuAPI(MethodView):
  147. # def post(self):
  148. # self.wordid = request.json.get("paper_id", "")
  149. # subject = request.json.get("subject", "")
  150. # items_list = request.json.get("structured_items", "") # 结构化试题
  151. # ocr_html_data = request.json.get("html_data", "") # 文本原始内容
  152. # svg_data = request.json.get("svgs", {"svg_html_data": "", "svg_path": ""}) # mathjax的相关文本
  153. # callback_info = request.json.get("user", {"callback_url": "", "source": ""})
  154. # def upload_img(self):
  155. # return
  156. @app.route('/ruku', methods=["POST"])
  157. def ruku():
  158. wordid = request.json.get("paper_id", "")
  159. subject = request.json.get("subject", "")
  160. items_list = request.json.get("structured_items", "") # 结构化试题
  161. ocr_html_data = request.json.get("html_data", "") # 文本原始内容
  162. svg_data = request.json.get("svgs", {"svg_html_data": "", "svg_path": ""}) # mathjax的相关文本
  163. callback_info = request.json.get("user", {"callback_url": "", "source": ""})
  164. print("【入库】==request.POST.dict==>word_id:{}, callback_info:{},subject:{}"
  165. .format(wordid, callback_info, subject))
  166. loginfo = {"log_level": "info",
  167. "request_ip": request.remote_addr,
  168. "receive_data": {"paper_id": wordid,
  169. "subject": subject,
  170. "callback_info": callback_info},
  171. "task_name": "保存入库"}
  172. # print(svg_data["svg_html_data"])
  173. # pickle.dump(items_list, open("./struct_items622.pickle", 'wb'))
  174. # pickle.dump(svg_data, open("./svg_data622.pickle", 'wb'))
  175. # logger.info("【入库】word_id:{}==公式数据==>svg_data:{}\n".format(wordid, str(svg_data).encode("utf8", "ignore")))
  176. if not wordid:
  177. wordid = get_wordid(str(items_list))
  178. loginfo["receive_data"]["is_auto_id"] = 1
  179. loginfo["receive_data"]["paper_id"] = wordid
  180. logger.info(json.dumps(loginfo, ensure_ascii=False))
  181. if wordid and items_list:
  182. st1 = time.time()
  183. # try:
  184. res = Ruku(items_list, ocr_html_data, svg_data, str(wordid), callback_info, subject).save()
  185. logger.info("【入库】==结束==> word_id:{}".format(wordid))
  186. # pprint(res)
  187. logger.info(json.dumps({"log_level": "info",
  188. "paper_id": wordid,
  189. "status": "任务结束",
  190. "task_time": time.time() - st1,
  191. "task_name": "保存入库"}, ensure_ascii=False))
  192. return json.dumps(res, ensure_ascii=False)
  193. # except Exception as e:
  194. # # print
  195. # logger.info("【入库】==失败==> word_id:{}\n{}".format(wordid, e))
  196. # return json.dumps({"errcode":1, "errmsgs": "入库失败!", "data":{}}, ensure_ascii=False)
  197. else:
  198. return "需要paperid"
  199. @app.route('/ser_static/<path:file_path>', methods=["GET"])
  200. def ser_static(file_path): # endpoint的位置是函数接口名,不能用static,与flask内部变量重名
  201. """
  202. :param file_path: 图片的本地绝对路径
  203. :return:
  204. """
  205. return send_from_directory(configs.IMG_FOLDER, file_path)
  206. def get_wordid(mydata):
  207. """
  208. 针对传参中没有wordid的情况,提取或生成wordid
  209. :return:
  210. """
  211. wordid_info1 = re.search("/(zyk/uploadfiles/wording|ser_static)/(\d+)/", mydata)
  212. wordid_info2 = re.search("/imgpaper/lqy_upload/(\d+)/", mydata)
  213. if wordid_info1:
  214. word_id = wordid_info1.group(2)
  215. elif wordid_info2:
  216. word_id = wordid_info2.group(1)
  217. else:
  218. name_list = random.sample(range(100000, 999999), 1)
  219. word_id = str(int(time.time())) + str(name_list[0])
  220. # md = hashlib.md5()
  221. # md.update(word_id_temp.encode("utf-8"))
  222. # word_id = str(md.hexdigest())
  223. return word_id
  224. if __name__ == "__main__":
  225. app.run(host=configs.server_ip, port=configs.server_port, threaded=True, debug=True) # threaded=True
  226. # app.run(processes=4) # 多进程或多线程只能选择一个,不能同时开启
  227. # 5fc64a0a4994183dda7e74b9
  228. # from gevent import pywsgi
  229. # # app.debug = True
  230. # server = pywsgi.WSGIServer((configs.server_ip, configs.server_port), app)
  231. # server.serve_forever()