cdZWj
/
new_tiku_structure_v3_art


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
							#!/usr/bin/env/python
# -*- coding:utf-8 -*-
import pickle

from flask import Flask, render_template, send_from_directory
from flask import request, redirect, Response
from flask_cors import *
from multiprocessing import Process, Queue

import configs
from structure.danti_structure import single_parse
from structure.structure_main import StructureExporter
import os, datetime, hashlib
import time, json, random
import pprint

from utils.ruku_opera import Ruku
from pprint import pprint

logger = configs.myLog(__name__, log_cate="reparse_ruku_log").getlog()

app = Flask(__name__)
app.debug = True
CORS(app, supports_credentials=True)


@app.route('/word_structure', methods=["GET", "POST"])
def word_structure():
    """
    word 批量结构化
    :return:
    """
    mydata = request.json.get("art_html_data", "")
    is_reparse = request.json.get("is_reparse", "0")
    word_id = request.json.get("paper_id", 0)
    subject = request.json.get("subject", "")
    logger.info("【再解析】==request.POST.dict==>is_reparse:{}, word_id:{},subject:{}".format(is_reparse, word_id, subject))
    # logger.info("==request.POST.dict==>{}".format(request.json.dict))
    # print(request.json.to_dict())
    # print(mydata)

    if not word_id:
        name_list = random.sample(range(100000, 999999), 1)
        word_id = str(int(time.time())) + str(name_list[0])
    # 接收的文件记录一下,按wordid命名
    # time_str = datetime.datetime.strftime(datetime.datetime.now(), '%Y_%m_%d_%H_%M_%S')
    # new_fpath = os.path.join(configs.FAIL_FOLDER, str(time_str)+".html")
    if word_id:
        getfile_savepath = os.path.dirname(os.getcwd()) + '\\accept_files\\' + str(word_id) + ".html"
        print(999999999999, getfile_savepath)
        if os.path.exists(getfile_savepath):
            logger.info("同一份wordid文件发送多次：{}".format(word_id))
        re_f = open(getfile_savepath, 'w', encoding='utf-8')
        re_f.write(mydata)
        re_f.close()
    result = {"errcode": 0, "errmsgs": "", "data": {"items": []}}

    try:
        if int(is_reparse) and word_id:  # 再解析
            res, paper_type = StructureExporter(mydata, str(word_id), subject, int(is_reparse)).export()
            print(res)
            if "errcode" not in res:
                result["data"] = res
            else:
                result = res
            logger.info("【再解析】==解析结束==> word_id:{}".format(word_id))
        elif not int(is_reparse) and mydata:  # 不是再解析
            res, paper_type = StructureExporter(mydata, "", subject).export()
            print(res)
            if "errcode" not in res:
                result["data"] = res
            else:
                result = res
            logger.info("【再解析】==解析结束==> word_id:{}".format(word_id))
        else:
            result["errmsgs"] = "无data或paper_id"
            result["errcode"] = 1
    except:
        # 先保存文件
        # now_time = datetime.datetime.now()
        # time_str = datetime.datetime.strftime(now_time, '%Y_%m_%d_%H_%M_%S')
        # aft_modify = (str(random.random())).encode("utf-8")
        # aft_name = hashlib.md5(aft_modify).hexdigest() + '__' + time_str + '.json'
        print("解析失败")
        logger.info("【再解析】==解析失败==> word_id:{}".format(word_id))
        new_fpath = configs.FAIL_FOLDER + '/' + str(word_id) + '.json'
        re_f = open(new_fpath, 'w', encoding='utf-8')
        json.dump(mydata, re_f, ensure_ascii=False)
        result["errmsgs"] = "解析失败"
        result["errcode"] = 1
    return json.dumps(result, ensure_ascii=False)


@app.route('/danti_structure', methods=["POST"])
def danti_structure():
    """
    单题再解析、结构化
    :return:
    """
    word_id = request.json.get("paper_id", 0)
    one_item = request.json.get("single_item_data", "")
    item_type = request.json.get("item_type", "")
    subject = request.json.get("subject", "")
    logger.info("【单题解析】==request.POST.dict==>word_id:{}, item_type:{},subject:{}".format(word_id, item_type, subject))
    # logger.info("【单题解析】==request.POST.single_item_data==>\n{}\n".format(one_item))
    print(word_id, item_type)

    res = {"errcode": 0, "errmsgs": "", "data": {}}
    if item_type:
        one_res = single_parse(one_item, item_type, word_id, subject)
        if type(one_res) == str:
            res["errcode"] = 1
            res["errmsgs"] = one_res
        else:
            res["data"] = one_res

    else:
        res["errcode"] = 1
        res["errmsgs"] = "没有选定题型"

    return json.dumps(res, ensure_ascii=False)


@app.route('/ruku', methods=["GET", "POST"])
def ruku():
    wordid = request.json.get("paper_id", "")
    subject = request.json.get("subject", "")  # 实际传入subject_id(int型)
    items_list = request.json.get("structured_items", "")  # 结构化试题
    ocr_html_data = request.json.get("html_data", "")  # 文本原始内容
    svg_data = request.json.get("svgs", {"svg_html_data": "", "svg_path": ""})  # mathjax的相关文本
    # callback_url = request.json.get("callback_url", "")
    callback_info = request.json.get("user", {"callback_url": "", "source": ""})
    logger.info("【入库】==request.POST.dict==>word_id:{}, callback_url:{},subject:{}"
                .format(wordid, callback_info, subject))

    # svg_html_data = svg_data["svg_html_data"]
    # svg_path = svg_data["svg_path"]
    # print(wordid)
    # print('--------items_list-------------')
    # print(items_list)
    # print('--------ocr_html_data-------------')
    # print(ocr_html_data)
    # print('--------svg_data-------------')
    # print(svg_data)

    pickle.dump(items_list, open("./struct_items.pickle", 'wb'))
    pickle.dump(svg_data, open("./svg_data.pickle", 'wb'))

    if wordid and items_list:
        # try:
            res = Ruku(items_list, ocr_html_data, svg_data, str(wordid), callback_info, subject).save()
            logger.info("【入库】==结束==> word_id:{}".format(wordid))
            pprint(res)
            return json.dumps(res, ensure_ascii=False)
    else:
        return "需要paperid"


@app.route('/ser_static/<path:file_path>', methods=["GET"])
def ser_static(file_path):  # endpoint的位置是函数接口名，不能用static，与flask内部变量重名
    """
    :param file_path: 图片的本地绝对路径
    :return:
    """
    return send_from_directory(configs.IMG_FOLDER, file_path)


if __name__ == "__main__":
    app.run(host=configs.server_ip, port=configs.server_port, threaded=True, debug=True)
    # app.run(processes=4)  # 多进程或多线程只能选择一个，不能同时开启
    # 5fc64a0a4994183dda7e74b9