123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- #!/usr/bin/env/python
- # -*- coding:utf-8 -*-
- import pickle
- from flask import Flask, render_template, send_from_directory
- from flask import request, redirect, Response
- from flask_cors import *
- from multiprocessing import Process, Queue
- import configs
- from structure.danti_structure import single_parse
- from structure.structure_main import StructureExporter
- import os, datetime, hashlib
- import time, json, random
- import pprint
- from utils.ruku_opera import Ruku
- from pprint import pprint
- logger = configs.myLog(__name__, log_cate="reparse_ruku_log").getlog()
- app = Flask(__name__)
- app.debug = True
- CORS(app, supports_credentials=True)
- @app.route('/word_structure', methods=["GET", "POST"])
- def word_structure():
- """
- word 批量结构化
- :return:
- """
- mydata = request.json.get("art_html_data", "")
- is_reparse = request.json.get("is_reparse", "0")
- word_id = request.json.get("paper_id", 0)
- subject = request.json.get("subject", "")
- logger.info("【再解析】==request.POST.dict==>is_reparse:{}, word_id:{},subject:{}".format(is_reparse, word_id, subject))
- # logger.info("==request.POST.dict==>{}".format(request.json.dict))
- # print(request.json.to_dict())
- # print(mydata)
- if not word_id:
- name_list = random.sample(range(100000, 999999), 1)
- word_id = str(int(time.time())) + str(name_list[0])
- # 接收的文件记录一下,按wordid命名
- # time_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S')
- # new_fpath = os.path.join(configs.FAIL_FOLDER, str(time_str)+".html")
- if word_id:
- getfile_savepath = os.path.dirname(os.getcwd()) + '\\accept_files\\' + str(word_id) + ".html"
- print(999999999999, getfile_savepath)
- if os.path.exists(getfile_savepath):
- logger.info("同一份wordid文件发送多次:{}".format(word_id))
- re_f = open(getfile_savepath, 'w', encoding='utf-8')
- re_f.write(mydata)
- re_f.close()
- result = {"errcode": 0, "errmsgs": "", "data": {"items": []}}
- try:
- if int(is_reparse) and word_id: # 再解析
- res, paper_type = StructureExporter(mydata, str(word_id), subject, int(is_reparse)).export()
- print(res)
- if "errcode" not in res:
- result["data"] = res
- else:
- result = res
- logger.info("【再解析】==解析结束==> word_id:{}".format(word_id))
- elif not int(is_reparse) and mydata: # 不是再解析
- res, paper_type = StructureExporter(mydata, "", subject).export()
- print(res)
- if "errcode" not in res:
- result["data"] = res
- else:
- result = res
- logger.info("【再解析】==解析结束==> word_id:{}".format(word_id))
- else:
- result["errmsgs"] = "无data或paper_id"
- result["errcode"] = 1
- except:
- # 先保存文件
- # now_time = datetime.datetime.now()
- # time_str = datetime.datetime.strftime(now_time, '%Y_%m_%d_%H_%M_%S')
- # aft_modify = (str(random.random())).encode("utf-8")
- # aft_name = hashlib.md5(aft_modify).hexdigest() + '__' + time_str + '.json'
- print("解析失败")
- logger.info("【再解析】==解析失败==> word_id:{}".format(word_id))
- new_fpath = configs.FAIL_FOLDER + '/' + str(word_id) + '.json'
- re_f = open(new_fpath, 'w', encoding='utf-8')
- json.dump(mydata, re_f, ensure_ascii=False)
- result["errmsgs"] = "解析失败"
- result["errcode"] = 1
- return json.dumps(result, ensure_ascii=False)
- @app.route('/danti_structure', methods=["POST"])
- def danti_structure():
- """
- 单题再解析、结构化
- :return:
- """
- word_id = request.json.get("paper_id", 0)
- one_item = request.json.get("single_item_data", "")
- item_type = request.json.get("item_type", "")
- subject = request.json.get("subject", "")
- logger.info("【单题解析】==request.POST.dict==>word_id:{}, item_type:{},subject:{}".format(word_id, item_type, subject))
- # logger.info("【单题解析】==request.POST.single_item_data==>\n{}\n".format(one_item))
- print(word_id, item_type)
- res = {"errcode": 0, "errmsgs": "", "data": {}}
- if item_type:
- one_res = single_parse(one_item, item_type, word_id, subject)
- if type(one_res) == str:
- res["errcode"] = 1
- res["errmsgs"] = one_res
- else:
- res["data"] = one_res
- else:
- res["errcode"] = 1
- res["errmsgs"] = "没有选定题型"
- return json.dumps(res, ensure_ascii=False)
- @app.route('/ruku', methods=["GET", "POST"])
- def ruku():
- wordid = request.json.get("paper_id", "")
- subject = request.json.get("subject", "") # 实际传入subject_id(int型)
- items_list = request.json.get("structured_items", "") # 结构化试题
- ocr_html_data = request.json.get("html_data", "") # 文本原始内容
- svg_data = request.json.get("svgs", {"svg_html_data": "", "svg_path": ""}) # mathjax的相关文本
- # callback_url = request.json.get("callback_url", "")
- callback_info = request.json.get("user", {"callback_url": "", "source": ""})
- logger.info("【入库】==request.POST.dict==>word_id:{}, callback_url:{},subject:{}"
- .format(wordid, callback_info, subject))
- # svg_html_data = svg_data["svg_html_data"]
- # svg_path = svg_data["svg_path"]
- # print(wordid)
- # print('--------items_list-------------')
- # print(items_list)
- # print('--------ocr_html_data-------------')
- # print(ocr_html_data)
- # print('--------svg_data-------------')
- # print(svg_data)
- pickle.dump(items_list, open("./struct_items.pickle", 'wb'))
- pickle.dump(svg_data, open("./svg_data.pickle", 'wb'))
- if wordid and items_list:
- # try:
- res = Ruku(items_list, ocr_html_data, svg_data, str(wordid), callback_info, subject).save()
- logger.info("【入库】==结束==> word_id:{}".format(wordid))
- pprint(res)
- return json.dumps(res, ensure_ascii=False)
- else:
- return "需要paperid"
- @app.route('/ser_static/<path:file_path>', methods=["GET"])
- def ser_static(file_path): # endpoint的位置是函数接口名,不能用static,与flask内部变量重名
- """
- :param file_path: 图片的本地绝对路径
- :return:
- """
- return send_from_directory(configs.IMG_FOLDER, file_path)
- if __name__ == "__main__":
- app.run(host=configs.server_ip, port=configs.server_port, threaded=True, debug=True)
- # app.run(processes=4) # 多进程或多线程只能选择一个,不能同时开启
- # 5fc64a0a4994183dda7e74b9
|