math_dup_app.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. from gevent import monkey; monkey.patch_all()
  2. from flask import Flask, request, jsonify
  3. import requests
  4. from gevent.pywsgi import WSGIServer
  5. import config
  6. from hnsw_logic import Hnsw_Logic
  7. from school_dup_logic import School_Dup_Logic
  8. app = Flask(__name__)
  9. # 日志采集初始化
  10. math_dup_logger = config.LogConfig(config.math_dup_path, "math_dup").get_log()
  11. # hnsw_logic初始化
  12. hnsw_logic = Hnsw_Logic(math_dup_logger)
  13. # 校本题库查重逻辑判断初始化
  14. school_dup_logic = School_Dup_Logic(math_dup_logger)
  15. # 云题库查重
  16. @app.route('/repeat', methods=['GET', 'POST'])
  17. def repeat_retrieve():
  18. if request.method == 'POST':
  19. # 获取post数据
  20. topics_dict = request.get_json()
  21. # 接收日志采集
  22. math_dup_logger.info(config.log_msg.format(id="云题库查重",
  23. type="repeat接收",
  24. message=topics_dict))
  25. if isinstance(topics_dict, dict):
  26. retrieve_list = topics_dict["topics"]
  27. callback_url = topics_dict["callback_url"]
  28. elif isinstance(topics_dict, list):
  29. retrieve_list = topics_dict
  30. callback_url = None
  31. else:
  32. return jsonify("请输入正确格式数据")
  33. if not retrieve_list:
  34. return jsonify("请输入查重数据")
  35. # 调用清洗分词函数和句向量计算函数
  36. res_list = hnsw_logic.logic_process(retrieve_list, hnsw_index=0)
  37. res_dict = {retrieve_list[i]["topic_id"]:ele for i,ele in enumerate(res_list) if len(ele)>0}
  38. if callback_url is not None:
  39. requests.post(callback_url, json=res_dict, timeout=10)
  40. # 返回日志采集
  41. math_dup_logger.info(config.log_msg.format(id="云题库查重",
  42. type="repeat返回",
  43. message=res_dict))
  44. return jsonify(res_dict)
  45. # 校本题库查重
  46. @app.route('/chc', methods=['GET', 'POST'])
  47. def chc_retrieve():
  48. if request.method == 'POST':
  49. # 获取post数据
  50. topics_dict = request.get_json()
  51. if not topics_dict:
  52. return jsonify("请输入查重数据")
  53. # 接收日志采集
  54. math_dup_logger.info(config.log_msg.format(id="校本题库查重",
  55. type="chc接收",
  56. message=topics_dict))
  57. # chc转移异步查重
  58. requests.post(r"http://localhost:8858/chc/transfer", json=topics_dict, timeout=10)
  59. return jsonify({"errcode": 0, "errmsg": "OK"})
  60. # 校本题库查重
  61. @app.route('/chc/process', methods=['GET', 'POST'])
  62. def chc_process():
  63. if request.method == 'POST':
  64. # 获取post数据
  65. topics_dict = request.get_json()
  66. res_dict = school_dup_logic(hnsw_logic.logic_process, topics_dict["topics"])
  67. # 将结果post给callback_url
  68. requests.post(topics_dict["callback_url"], json=res_dict, timeout=10)
  69. # 获取chc查重试卷id
  70. post_doc_id = topics_dict["callback_url"].split('/')[-1].split('\\')[-1]
  71. # 返回日志采集
  72. math_dup_logger.info(config.log_msg.format(
  73. id="校本题库查重",
  74. type="chc返回-{}查重完毕".format(post_doc_id),
  75. message=res_dict))
  76. return jsonify("")
  77. # 云题库数据更新
  78. @app.route('/cloud/update', methods=['GET', 'POST'])
  79. def cloud_update():
  80. if request.method == 'POST':
  81. # 获取post数据
  82. update_dict = request.get_json()
  83. # 接收日志采集
  84. math_dup_logger.info(config.log_msg.format(id="云题库数据更新",
  85. type="cloud/update接收",
  86. message=update_dict))
  87. if not update_dict:
  88. return jsonify("请输入更新数据")
  89. # 更新HNSW模型
  90. hnsw_logic.update(update_dict["id"], hnsw_index=0)
  91. return jsonify("数据更新完毕")
  92. if __name__ == '__main__':
  93. # app.run(host='0.0.0.0',port='8855')
  94. server = WSGIServer(('0.0.0.0', 8855), app)
  95. server.serve_forever()