from gevent import monkey; monkey.patch_all() from flask import Flask, request, jsonify import requests from gevent.pywsgi import WSGIServer import config from hnsw_logic import Hnsw_Logic from school_dup_logic import School_Dup_Logic app = Flask(__name__) # 日志采集初始化 math_dup_logger = config.LogConfig(config.math_dup_path, "math_dup").get_log() # hnsw_logic初始化 hnsw_logic = Hnsw_Logic(math_dup_logger) # 校本题库查重逻辑判断初始化 school_dup_logic = School_Dup_Logic(math_dup_logger) # 云题库查重 @app.route('/repeat', methods=['GET', 'POST']) def repeat_retrieve(): if request.method == 'POST': # 获取post数据 topics_dict = request.get_json() # 接收日志采集 math_dup_logger.info(config.log_msg.format(id="云题库查重", type="repeat接收", message=topics_dict)) if isinstance(topics_dict, dict): retrieve_list = topics_dict["topics"] callback_url = topics_dict["callback_url"] elif isinstance(topics_dict, list): retrieve_list = topics_dict callback_url = None else: return jsonify("请输入正确格式数据") if not retrieve_list: return jsonify("请输入查重数据") # 调用清洗分词函数和句向量计算函数 res_list = hnsw_logic.logic_process(retrieve_list, hnsw_index=0) res_dict = {retrieve_list[i]["topic_id"]:ele for i,ele in enumerate(res_list) if len(ele)>0} if callback_url is not None: requests.post(callback_url, json=res_dict, timeout=10) # 返回日志采集 math_dup_logger.info(config.log_msg.format(id="云题库查重", type="repeat返回", message=res_dict)) return jsonify(res_dict) # 校本题库查重 @app.route('/chc', methods=['GET', 'POST']) def chc_retrieve(): if request.method == 'POST': # 获取post数据 topics_dict = request.get_json() if not topics_dict: return jsonify("请输入查重数据") # 接收日志采集 math_dup_logger.info(config.log_msg.format(id="校本题库查重", type="chc接收", message=topics_dict)) # chc转移异步查重 requests.post(r"http://localhost:8858/chc/transfer", json=topics_dict, timeout=10) return jsonify({"errcode": 0, "errmsg": "OK"}) # 校本题库查重 @app.route('/chc/process', methods=['GET', 'POST']) def chc_process(): if request.method == 'POST': # 获取post数据 topics_dict = request.get_json() res_dict = school_dup_logic(hnsw_logic.logic_process, topics_dict["topics"]) # 将结果post给callback_url requests.post(topics_dict["callback_url"], json=res_dict, timeout=10) # 获取chc查重试卷id post_doc_id = topics_dict["callback_url"].split('/')[-1].split('\\')[-1] # 返回日志采集 math_dup_logger.info(config.log_msg.format( id="校本题库查重", type="chc返回-{}查重完毕".format(post_doc_id), message=res_dict)) return jsonify("") # 云题库数据更新 @app.route('/cloud/update', methods=['GET', 'POST']) def cloud_update(): if request.method == 'POST': # 获取post数据 update_dict = request.get_json() # 接收日志采集 math_dup_logger.info(config.log_msg.format(id="云题库数据更新", type="cloud/update接收", message=update_dict)) if not update_dict: return jsonify("请输入更新数据") # 更新HNSW模型 hnsw_logic.update(update_dict["id"], hnsw_index=0) return jsonify("数据更新完毕") if __name__ == '__main__': # app.run(host='0.0.0.0',port='8855') server = WSGIServer(('0.0.0.0', 8855), app) server.serve_forever()