server_phy.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. # import logging, os
  4. from flask import Flask, render_template
  5. from flask import request, redirect, Response, send_from_directory
  6. from flask_cors import *
  7. from multiprocessing import Process, Queue
  8. from server_tools import *
  9. from six.moves import urllib
  10. import pandas as pd
  11. from ps_configs import myLog
  12. import ps_configs as config
  13. logger = myLog(__name__).getlog()
  14. app = Flask(__name__)
  15. app.debug = True
  16. CORS(app, supports_credentials=True)
  17. UPLOAD_FOLDER = config.UPLOAD_FOLDER # 上传路径
  18. '''
  19. errcode=0成功
  20. errcode=1失败
  21. errmsg是具体的失败消息
  22. '''
  23. # --------------------------------------------------------------
  24. @app.route('/')
  25. def index():
  26. return redirect("/all_subject_parse")
  27. # 文件上传的HTML模板,这里没有额外去写html模板了,直接写在这里,方便点吧
  28. @app.route('/all_subject_parse')
  29. def upload():
  30. return '''
  31. <html>
  32. <head>
  33. </head>
  34. <body>
  35. <h1>欢迎使用全学科试题word解析</h1>
  36. <form action="/all_subject_parse" method="post" enctype="multipart/form-data">
  37. <input type="file" name="mydata" />
  38. <input type="submit" value="Upload" />
  39. </form>
  40. </body>
  41. </html>
  42. '''
  43. def read(cache):
  44. while True:
  45. filename, filename_root, callback_url, flag, sid = cache.get(True) # 缓存
  46. print("\n+++++++++++++++还有{}缓存解析+++++++++++++++\n".format(cache.qsize() - 1))
  47. print("\n------callback_url===> {} -------\n".format(callback_url))
  48. logger.info('开始解析文件信息:Get filename={}, filename_root={}, callback_url={}, is_cloud ={},sid ={} from queue\n'
  49. .format(filename, filename_root, callback_url, flag, sid))
  50. # 获取上传id号
  51. upload_id = '0000' # 无id号
  52. if re.search(r"wid[/=](\d+)/?", str(callback_url)):
  53. upload_id = re.search(r"wid[/=](\d+)/?", str(callback_url)).group(1)
  54. # 单线程解析
  55. stime1 = time.time()
  56. try:
  57. res, paper_type = parse_word(filename, filename_root, flag, sid, upload_id)
  58. etime1 = time.time()
  59. logger.info("----【upload_id:{}】,整个解析所占时间:{}".format(upload_id, etime1 - stime1))
  60. logger.info("----【upload_id:{}】,该份试卷的格式类型:{}".format(upload_id, paper_type))
  61. except:
  62. # print(traceback.print_exc())
  63. print("--------have callback_url,but time out of parse_word-------")
  64. res = {"time_out": 120,
  65. "id": 0,
  66. "errcode": 1,
  67. "errmsgs": "解析超时,若文件太大,请将文件拆开、分两次上传!"}
  68. # "errmsg": "word读取失败。请尝试:\n 1.请将word中图片改为嵌入式,然后上传。\n 2. 请将word题目拆分为多个word上传。\n 3.请将word中内容复制到新word上传。"
  69. print("------------parse is fail---------------")
  70. try:
  71. headers = {'Content-Type': 'application/json', }
  72. # print(res)
  73. r = requests.post(callback_url,
  74. # json=res, # 可以,但是会进行转义
  75. data=json.dumps(res).encode("utf-8"),
  76. headers=headers)
  77. # timeout=10
  78. logger.info("------【upload_id:{}】,post 回调地址状态===> {} -------\n".format(upload_id, r.status_code))
  79. print("\n------【upload_id:{}】,post 返回结果===> {} -------\n".format(upload_id, r.text))
  80. except TimeoutError:
  81. # print("回调超时")
  82. logger.info("------【upload_id:{}】回调超时-------".format(upload_id))
  83. except Exception as e:
  84. # print(e, "------回调出错")
  85. logger.info("------【upload_id:{}】回调出错:{}-------".format(upload_id, e))
  86. if "items" in res and res["items"]:
  87. if res["items"]:
  88. save_fname = save_post_file(res, filename, upload_id) # 保存返回的解析结果
  89. logger.info("----【upload_id:{}】{}解析完保存的数据文件名:\n{}\n".format(upload_id, os.path.basename(filename), save_fname))
  90. else:
  91. logger.info("----【upload_id:{}】{}解析报错:\n{}\n".format(upload_id, os.path.basename(filename), res["errmsgs"]))
  92. # noinspection PyTypeChecker
  93. @app.route('/all_subject_parse', methods=['POST'])
  94. def do_upload():
  95. """
  96. 该接口函数专门用来接收word和参数,并保存到本地,将保存后的数据放入缓存中;
  97. 参数关键字为:mydata,callback_url,is_cloud,sid
  98. # 文件上传,overwrite=True为覆盖原有的文件,是bottle的用法f.raw_filename
  99. # 如果不加这参数,当服务器已存在同名文件时,将返回“IOError: File exists.”错误
  100. :return:
  101. """
  102. img_file_count = 0
  103. if os.listdir(UPLOAD_FOLDER):
  104. img_file_count = max([int(i) for i in os.listdir(UPLOAD_FOLDER)]) + 1
  105. logger.info("==接收文件:request.POST.dict==>{}".format(request.form.to_dict())) # php传过来的文件
  106. print("==request.POST.dict==>{}".format(request.form.to_dict()))
  107. callback_url = request.form.get('callback_url', "")
  108. flag = request.form.get('is_cloud', "0") # word中的图片是否上传ucloud
  109. sid = request.form.get('sid', "0") # 学管端、教师端参数
  110. # word文件接收
  111. # 旧接口:整个word文件传过来,主要应用在云题库端上传
  112. try:
  113. upfile = request.files.get('mydata')
  114. print("\n------接受文件名==> {}-------\n".format(os.path.basename(upfile.filename)))
  115. if os.path.splitext(upfile.filename)[1] not in [".doc", ".docx"]:
  116. return "only accept .doc .docx files"
  117. file_root1 = os.path.join(UPLOAD_FOLDER, str(img_file_count)) # 每份word保存的文件夹路径
  118. if not os.path.isdir(file_root1):
  119. os.makedirs(file_root1)
  120. new_filename = str(img_file_count) + os.path.splitext(upfile.filename)[1]
  121. filename = os.path.join(file_root1, new_filename) # 文件的绝对路径, 不会含有汉字
  122. upfile.save(filename) # 保存到当地
  123. except:
  124. # 新接口:mydata传过来的是文件地址(json格式),需另外下载
  125. # j_data = json.loads(request.json.get('mydata'))
  126. up_para = request.form.get('mydata')
  127. print("up_para:", up_para)
  128. upfile_url = json.loads(up_para)[0]["file_url"] # word文件的绝对路径
  129. print("文件地址:", upfile_url)
  130. print("\n------接受文件名==> {}-------\n".format(os.path.basename(upfile_url)))
  131. if os.path.splitext(upfile_url)[1] not in [".doc", ".docx"]:
  132. return "only accept .doc .docx files"
  133. file_root1 = os.path.join(UPLOAD_FOLDER, str(img_file_count)) # 每份word保存的文件夹路径
  134. if not os.path.isdir(file_root1):
  135. os.makedirs(file_root1)
  136. # 根据url地址下载word文件
  137. new_filename = str(img_file_count) + os.path.splitext(upfile_url)[1]
  138. filename = os.path.join(file_root1, new_filename) # 文件的绝对路径, 不会含有汉字
  139. try:
  140. urllib.request.urlretrieve(upfile_url, filename) # 下载到指定位置
  141. except:
  142. return "word文件下载失败,请重新上传"
  143. print('------word Successfully downloaded-----')
  144. # ----------------------------------------------------------------------
  145. # 开一个进程对filename 进行解析
  146. if callback_url:
  147. cache_file.put([filename, file_root1, callback_url, flag, sid])
  148. print("-----当前缓存还有{}-----".format(int(cache_file.qsize()) - 1))
  149. return json.dumps({"errcode": 0, "errmsg": "OK", "docsbefore": int(cache_file.qsize()) - 1},
  150. ensure_ascii=False).encode("utf-8")
  151. else: # 没有回调地址时,简易网页上传
  152. stime1 = time.time()
  153. try:
  154. res, paper_type = parse_word(filename, None)
  155. etime1 = time.time()
  156. print("------------简易接口上传,解析成功---------------")
  157. logger.info("------------简易接口上传,解析成功---------------")
  158. logger.info("----整个解析所占时间:{}".format(etime1 - stime1))
  159. logger.info("----该份试卷的格式类型:{}".format(paper_type))
  160. except:
  161. print("------------简易接口上传,解析失败---------------")
  162. res = {"time_out": 3,
  163. "errcode": 1,
  164. "errmsg": "word读取失败。请尝试:\n 1.请确定试卷版面格式是否为常见的试卷排版格式,删除不规范格式;\n"
  165. "2.请将word中内容复制到新word上传;\n 3. 请将word题目拆分为多个word上传。\n 4.请检查图片格式,将word中图片改为嵌入式后再上传。"}
  166. if "items" in res and res["items"]:
  167. save_fname = save_post_file(res, filename, '0011')
  168. print("----{}解析完保存的数据文件名:\n{}".format(os.path.basename(filename), save_fname))
  169. new_res = []
  170. for r in res["items"]:
  171. dd = [r["item_id"], r["type"], r["stem"],
  172. r["key"], r["analysis"], r["text_errmsgs"]]
  173. if "options" in r:
  174. r["options"] = [chr(65+k)+'、'+ opt for k, opt in enumerate(r["options"])]
  175. dd[2] += "<br/>===================<br/>【选项】<br/>" \
  176. + ";<br/>".join(r["options"]).replace("<br/>;", ";")
  177. new_res.append(dd)
  178. resdf = pd.DataFrame(new_res)
  179. resdf.columns = ["题号", "题型", "题干", "答案", "解析","错误信息"]
  180. pd.set_option('display.max_columns', None)
  181. pd.set_option('display.max_rows', None)
  182. pd.set_option('expand_frame_repr', True) # 允许换行
  183. pd.set_option('display.width', -1)
  184. pd.set_option('display.max_colwidth', -1) # 自动最大列宽
  185. # 如何将图片显示出来:图片属性格式要符合前端显示标准
  186. return render_template('res.html',
  187. items=[resdf.to_html(header=True,
  188. index=False,
  189. escape=False,
  190. justify="center")],
  191. res_json=json.dumps(res, ensure_ascii=False)
  192. )
  193. else:
  194. # resp = Response_headers(json.dumps(res, ensure_ascii=False).encode("utf-8")) # , indent=4
  195. save_fname = save_post_file(res, filename, '0011')
  196. print("\n----{}解析完保存的数据文件名:\n{}".format(os.path.basename(filename), save_fname))
  197. resp = json.dumps(res, ensure_ascii=False)
  198. return resp
  199. @app.route('/ser_static/<path:file_path>', methods=["GET"])
  200. def ser_static(file_path): # endpoint的位置是函数接口名,不能用static,与flask内部变量重名
  201. """
  202. 将本地图片供php调用,上传Ucloud时该函数不使用
  203. :param file_path: 图片的本地绝对路径
  204. :return:
  205. """
  206. # filepath = request.args.get('q') # 针对/**/=?类型
  207. file = os.path.join(UPLOAD_FOLDER, file_path)
  208. if "image" in file:
  209. resize_img(file, file)
  210. # resp = Response(base64.b64encode(open(file, 'rb').read()).decode()) # 仍无法显示图片
  211. # resp = Response(open(file, 'rb'))
  212. # return resp
  213. return send_from_directory(UPLOAD_FOLDER, file_path)
  214. if __name__ == "__main__":
  215. # 解析文件的队列
  216. cache_file = Queue()
  217. # start_word2html_app(kill_mathtype=True)
  218. # 读取解析进程
  219. pr_parse = Process(target=read, args=(cache_file,))
  220. pr_parse.start()
  221. print("parse pid:", pr_parse.pid)
  222. # sleep(2)
  223. # 图片供应进程,根据ip和端口号找到本地储存图片并启动进程
  224. # pr_img = Process(app.run, args=(), # 有问题
  225. # kwargs={"host": '192.168.1.140', "port": '18083'})
  226. # pr_img.start()
  227. # print("image pid:", pr_img.pid)
  228. app.run(host=config.server_ip, port=config.server_port, threaded=True, debug=True)