#!/usr/bin/env/python # -*- coding:utf-8 -*- import pickle from flask import Flask, render_template, send_from_directory from flask import request, redirect, Response from flask_cors import * from multiprocessing import Process, Queue import configs from structure.danti_structure import single_parse from structure.structure_main import StructureExporter import os, datetime, hashlib import time, json, random import pprint from utils.ruku_opera import Ruku from pprint import pprint logger = configs.myLog(__name__, log_cate="reparse_ruku_log").getlog() app = Flask(__name__) app.debug = True CORS(app, supports_credentials=True) @app.route('/word_structure', methods=["GET", "POST"]) def word_structure(): """ word 批量结构化 :return: """ mydata = request.json.get("art_html_data", "") is_reparse = request.json.get("is_reparse", "0") word_id = request.json.get("paper_id", 0) subject = request.json.get("subject", "") logger.info("【再解析】==request.POST.dict==>is_reparse:{}, word_id:{},subject:{}".format(is_reparse, word_id, subject)) # logger.info("==request.POST.dict==>{}".format(request.json.dict)) # print(request.json.to_dict()) # print(mydata) if not word_id: name_list = random.sample(range(100000, 999999), 1) word_id = str(int(time.time())) + str(name_list[0]) # 接收的文件记录一下,按wordid命名 # time_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S') # new_fpath = os.path.join(configs.FAIL_FOLDER, str(time_str)+".html") if word_id: getfile_savepath = os.path.dirname(os.getcwd()) + '\\accept_files\\' + str(word_id) + ".html" print(999999999999, getfile_savepath) if os.path.exists(getfile_savepath): logger.info("同一份wordid文件发送多次:{}".format(word_id)) re_f = open(getfile_savepath, 'w', encoding='utf-8') re_f.write(mydata) re_f.close() result = {"errcode": 0, "errmsgs": "", "data": {"items": []}} try: if int(is_reparse) and word_id: # 再解析 res, paper_type = StructureExporter(mydata, str(word_id), subject, int(is_reparse)).export() print(res) if "errcode" not in res: result["data"] = res else: result = res logger.info("【再解析】==解析结束==> word_id:{}".format(word_id)) elif not int(is_reparse) and mydata: # 不是再解析 res, paper_type = StructureExporter(mydata, "", subject).export() print(res) if "errcode" not in res: result["data"] = res else: result = res logger.info("【再解析】==解析结束==> word_id:{}".format(word_id)) else: result["errmsgs"] = "无data或paper_id" result["errcode"] = 1 except: # 先保存文件 # now_time = datetime.datetime.now() # time_str = datetime.datetime.strftime(now_time, '%Y_%m_%d_%H_%M_%S') # aft_modify = (str(random.random())).encode("utf-8") # aft_name = hashlib.md5(aft_modify).hexdigest() + '__' + time_str + '.json' print("解析失败") logger.info("【再解析】==解析失败==> word_id:{}".format(word_id)) new_fpath = configs.FAIL_FOLDER + '/' + str(word_id) + '.json' re_f = open(new_fpath, 'w', encoding='utf-8') json.dump(mydata, re_f, ensure_ascii=False) result["errmsgs"] = "解析失败" result["errcode"] = 1 return json.dumps(result, ensure_ascii=False) @app.route('/danti_structure', methods=["POST"]) def danti_structure(): """ 单题再解析、结构化 :return: """ word_id = request.json.get("paper_id", 0) one_item = request.json.get("single_item_data", "") item_type = request.json.get("item_type", "") subject = request.json.get("subject", "") logger.info("【单题解析】==request.POST.dict==>word_id:{}, item_type:{},subject:{}".format(word_id, item_type, subject)) # logger.info("【单题解析】==request.POST.single_item_data==>\n{}\n".format(one_item)) print(word_id, item_type) if not word_id: name_list = random.sample(range(100000, 999999), 1) word_id = str(int(time.time())) + str(name_list[0]) res = {"errcode": 0, "errmsgs": "", "data": {}} if item_type: one_res = single_parse(one_item, item_type, word_id, subject) if type(one_res) == str: res["errcode"] = 1 res["errmsgs"] = one_res else: res["data"] = one_res else: res["errcode"] = 1 res["errmsgs"] = "没有选定题型" return json.dumps(res, ensure_ascii=False) @app.route('/ruku', methods=["GET", "POST"]) def ruku(): wordid = request.json.get("paper_id", "") subject = request.json.get("subject", "") # 实际传入subject_id(int型) items_list = request.json.get("structured_items", "") # 结构化试题 ocr_html_data = request.json.get("html_data", "") # 文本原始内容 svg_data = request.json.get("svgs", {"svg_html_data": "", "svg_path": ""}) # mathjax的相关文本 # callback_url = request.json.get("callback_url", "") callback_info = request.json.get("user", {"callback_url": "", "source": ""}) logger.info("【入库】==request.POST.dict==>word_id:{}, callback_url:{},subject:{}" .format(wordid, callback_info, subject)) # svg_html_data = svg_data["svg_html_data"] # svg_path = svg_data["svg_path"] # print(wordid) # print('--------items_list-------------') # print(items_list) # print('--------ocr_html_data-------------') # print(ocr_html_data) # print('--------svg_data-------------') # print(svg_data) pickle.dump(items_list, open("./struct_items.pickle", 'wb')) pickle.dump(svg_data, open("./svg_data.pickle", 'wb')) if wordid and items_list: # try: res = Ruku(items_list, ocr_html_data, svg_data, str(wordid), callback_info, subject).save() logger.info("【入库】==结束==> word_id:{}".format(wordid)) pprint(res) return json.dumps(res, ensure_ascii=False) else: return "需要paperid" @app.route('/ser_static/', methods=["GET"]) def ser_static(file_path): # endpoint的位置是函数接口名,不能用static,与flask内部变量重名 """ :param file_path: 图片的本地绝对路径 :return: """ return send_from_directory(configs.IMG_FOLDER, file_path) if __name__ == "__main__": app.run(host=configs.server_ip, port=configs.server_port, threaded=True, debug=True) # app.run(processes=4) # 多进程或多线程只能选择一个,不能同时开启 # 5fc64a0a4994183dda7e74b9