server.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. import pickle
  4. from flask import Flask, render_template, send_from_directory
  5. from flask import request, redirect, Response
  6. from flask_cors import *
  7. from multiprocessing import Process, Queue
  8. import configs
  9. from structure.danti_structure import single_parse
  10. from structure.structure_main import StructureExporter
  11. import os, datetime, hashlib
  12. import time, json, random
  13. import pprint
  14. from utils.ruku_opera import Ruku
  15. from pprint import pprint
  16. logger = configs.myLog(__name__, log_cate="reparse_ruku_log").getlog()
  17. app = Flask(__name__)
  18. app.debug = True
  19. CORS(app, supports_credentials=True)
  20. @app.route('/word_structure', methods=["GET", "POST"])
  21. def word_structure():
  22. """
  23. word 批量结构化
  24. :return:
  25. """
  26. mydata = request.json.get("art_html_data", "")
  27. is_reparse = request.json.get("is_reparse", "0")
  28. word_id = request.json.get("paper_id", 0)
  29. subject = request.json.get("subject", "")
  30. logger.info("【再解析】==request.POST.dict==>is_reparse:{}, word_id:{},subject:{}".format(is_reparse, word_id, subject))
  31. # logger.info("==request.POST.dict==>{}".format(request.json.dict))
  32. # print(request.json.to_dict())
  33. # print(mydata)
  34. if not word_id:
  35. name_list = random.sample(range(100000, 999999), 1)
  36. word_id = str(int(time.time())) + str(name_list[0])
  37. # 接收的文件记录一下,按wordid命名
  38. # time_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S')
  39. # new_fpath = os.path.join(configs.FAIL_FOLDER, str(time_str)+".html")
  40. if word_id:
  41. getfile_savepath = os.path.dirname(os.getcwd()) + '\\accept_files\\' + str(word_id) + ".html"
  42. print(999999999999, getfile_savepath)
  43. if os.path.exists(getfile_savepath):
  44. logger.info("同一份wordid文件发送多次:{}".format(word_id))
  45. re_f = open(getfile_savepath, 'w', encoding='utf-8')
  46. re_f.write(mydata)
  47. re_f.close()
  48. result = {"errcode": 0, "errmsgs": "", "data": {"items": []}}
  49. try:
  50. if int(is_reparse) and word_id: # 再解析
  51. res, paper_type = StructureExporter(mydata, str(word_id), subject, int(is_reparse)).export()
  52. print(res)
  53. if "errcode" not in res:
  54. result["data"] = res
  55. else:
  56. result = res
  57. logger.info("【再解析】==解析结束==> word_id:{}".format(word_id))
  58. elif not int(is_reparse) and mydata: # 不是再解析
  59. res, paper_type = StructureExporter(mydata, "", subject).export()
  60. print(res)
  61. if "errcode" not in res:
  62. result["data"] = res
  63. else:
  64. result = res
  65. logger.info("【再解析】==解析结束==> word_id:{}".format(word_id))
  66. else:
  67. result["errmsgs"] = "无data或paper_id"
  68. result["errcode"] = 1
  69. except:
  70. # 先保存文件
  71. # now_time = datetime.datetime.now()
  72. # time_str = datetime.datetime.strftime(now_time, '%Y_%m_%d_%H_%M_%S')
  73. # aft_modify = (str(random.random())).encode("utf-8")
  74. # aft_name = hashlib.md5(aft_modify).hexdigest() + '__' + time_str + '.json'
  75. print("解析失败")
  76. logger.info("【再解析】==解析失败==> word_id:{}".format(word_id))
  77. new_fpath = configs.FAIL_FOLDER + '/' + str(word_id) + '.json'
  78. re_f = open(new_fpath, 'w', encoding='utf-8')
  79. json.dump(mydata, re_f, ensure_ascii=False)
  80. result["errmsgs"] = "解析失败"
  81. result["errcode"] = 1
  82. return json.dumps(result, ensure_ascii=False)
  83. @app.route('/danti_structure', methods=["POST"])
  84. def danti_structure():
  85. """
  86. 单题再解析、结构化
  87. :return:
  88. """
  89. word_id = request.json.get("paper_id", 0)
  90. one_item = request.json.get("single_item_data", "")
  91. item_type = request.json.get("item_type", "")
  92. subject = request.json.get("subject", "")
  93. logger.info("【单题解析】==request.POST.dict==>word_id:{}, item_type:{},subject:{}".format(word_id, item_type, subject))
  94. # logger.info("【单题解析】==request.POST.single_item_data==>\n{}\n".format(one_item))
  95. print(word_id, item_type)
  96. if not word_id:
  97. name_list = random.sample(range(100000, 999999), 1)
  98. word_id = str(int(time.time())) + str(name_list[0])
  99. res = {"errcode": 0, "errmsgs": "", "data": {}}
  100. if item_type:
  101. one_res = single_parse(one_item, item_type, word_id, subject)
  102. if type(one_res) == str:
  103. res["errcode"] = 1
  104. res["errmsgs"] = one_res
  105. else:
  106. res["data"] = one_res
  107. else:
  108. res["errcode"] = 1
  109. res["errmsgs"] = "没有选定题型"
  110. return json.dumps(res, ensure_ascii=False)
  111. @app.route('/ruku', methods=["GET", "POST"])
  112. def ruku():
  113. wordid = request.json.get("paper_id", "")
  114. subject = request.json.get("subject", "") # 实际传入subject_id(int型)
  115. items_list = request.json.get("structured_items", "") # 结构化试题
  116. ocr_html_data = request.json.get("html_data", "") # 文本原始内容
  117. svg_data = request.json.get("svgs", {"svg_html_data": "", "svg_path": ""}) # mathjax的相关文本
  118. # callback_url = request.json.get("callback_url", "")
  119. callback_info = request.json.get("user", {"callback_url": "", "source": ""})
  120. logger.info("【入库】==request.POST.dict==>word_id:{}, callback_url:{},subject:{}"
  121. .format(wordid, callback_info, subject))
  122. # svg_html_data = svg_data["svg_html_data"]
  123. # svg_path = svg_data["svg_path"]
  124. # print(wordid)
  125. # print('--------items_list-------------')
  126. # print(items_list)
  127. # print('--------ocr_html_data-------------')
  128. # print(ocr_html_data)
  129. # print('--------svg_data-------------')
  130. # print(svg_data)
  131. pickle.dump(items_list, open("./struct_items.pickle", 'wb'))
  132. pickle.dump(svg_data, open("./svg_data.pickle", 'wb'))
  133. if wordid and items_list:
  134. # try:
  135. res = Ruku(items_list, ocr_html_data, svg_data, str(wordid), callback_info, subject).save()
  136. logger.info("【入库】==结束==> word_id:{}".format(wordid))
  137. pprint(res)
  138. return json.dumps(res, ensure_ascii=False)
  139. else:
  140. return "需要paperid"
  141. @app.route('/ser_static/<path:file_path>', methods=["GET"])
  142. def ser_static(file_path): # endpoint的位置是函数接口名,不能用static,与flask内部变量重名
  143. """
  144. :param file_path: 图片的本地绝对路径
  145. :return:
  146. """
  147. return send_from_directory(configs.IMG_FOLDER, file_path)
  148. if __name__ == "__main__":
  149. app.run(host=configs.server_ip, port=configs.server_port, threaded=True, debug=True)
  150. # app.run(processes=4) # 多进程或多线程只能选择一个,不能同时开启
  151. # 5fc64a0a4994183dda7e74b9