server_new.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. import logging
  4. from flask import Flask, render_template
  5. from flask import request, redirect, Response
  6. from flask_cors import *
  7. from multiprocessing import Process, Queue
  8. from server_tools import *
  9. import pandas as pd
  10. import base64
  11. from six.moves import urllib
  12. logger = logging.getLogger(__name__)
  13. logger.setLevel(level=logging.INFO)
  14. log_file = os.path.join(r'./logs', 'parse_log.txt') # 日志地址
  15. handler = logging.FileHandler(log_file, mode='a', encoding='utf-8', delay=True)
  16. handler.setLevel(logging.INFO)
  17. formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  18. handler.setFormatter(formatter)
  19. # console = logging.StreamHandler()
  20. # console.setLevel(logging.INFO)
  21. # console.setFormatter(formatter)
  22. logger.addHandler(handler)
  23. # logger.addHandler(console)
  24. app = Flask(__name__)
  25. app.debug = True
  26. CORS(app, supports_credentials=True)
  27. '''
  28. errcode=0成功
  29. errcode=1失败
  30. errmsg是具体的失败消息
  31. '''
  32. # 定义上传路径
  33. UPLOAD_FOLDER = r'D:\zwj\word_uploads' # wordbin服务生成文件路径
  34. if not os.path.isdir(UPLOAD_FOLDER):
  35. os.makedirs(UPLOAD_FOLDER)
  36. # --------------------------------------------------------------
  37. @app.route('/')
  38. def index():
  39. return redirect("/word_parse")
  40. # 文件上传的HTML模板,这里没有额外去写html模板了,直接写在这里,方便点吧
  41. @app.route('/word_parse')
  42. def upload():
  43. return '''
  44. <html>
  45. <head>
  46. </head>
  47. <body>
  48. <h1>欢迎使用理科试题word解析</h1>
  49. <form action="/word_parse" method="post" enctype="multipart/form-data">
  50. <input type="file" name="mydata" />
  51. <input type="submit" value="Upload" />
  52. </form>
  53. </body>
  54. </html>
  55. '''
  56. def read(cache):
  57. while True:
  58. filename, filename_root, callback_url, flag, sid = cache.get(True) # 缓存
  59. print("\n+++++++++++++++还有{}缓存解析+++++++++++++++\n".format(cache.qsize() - 1))
  60. print("\n------callback_url===> {} -------\n".format(callback_url))
  61. logger.info('Get filename={}, filename_root={}, callback_url={}, is_cloud ={},sid ={} from queue\n'
  62. .format(filename, filename_root, callback_url, flag, sid))
  63. # 单线程解析
  64. stime1 = time.time()
  65. try:
  66. res, wordbin_time, paper_type, img_upload_time = parse_word(filename, filename_root, flag, sid)
  67. etime1 = time.time()
  68. logger.info("\n----wordbin服务所占时间:{}".format(wordbin_time))
  69. logger.info("\n----整个解析所占时间:{}".format(etime1 - stime1))
  70. logger.info("\n----该份试卷的格式类型:{}".format(paper_type))
  71. if img_upload_time:
  72. logger.info("\n----图片上传时间:{}".format(img_upload_time))
  73. except:
  74. # print(traceback.print_exc())
  75. print("--------have callback_url,but time out of parse_word-------")
  76. res = {"time_out": 90,
  77. "id": 0,
  78. "errcode": 1,
  79. "errmsgs": "解析超时,请重试"}
  80. # "errmsg": "word读取失败。请尝试:\n 1.请将word中图片改为嵌入式,然后上传。\n 2. 请将word题目拆分为多个word上传。\n 3.请将word中内容复制到新word上传。"
  81. print("------------parse is fail---------------")
  82. try:
  83. headers = {'Content-Type': 'application/json', }
  84. # print(json.dumps(res, ensure_ascii=False).encode("utf-8"))
  85. r = requests.post(callback_url,
  86. # json=res, # 可以,但是会进行转义
  87. data=json.dumps(res, ensure_ascii=False), # 文件开头有编码显示
  88. headers=headers)
  89. # timeout=10
  90. print("\n------post 回调地址===> {} -------\n".format(r.status_code))
  91. print("\n------post 返回结果===> {} -------\n".format(r.text))
  92. except TimeoutError:
  93. print("回调超时")
  94. except Exception as e:
  95. print(e)
  96. print("回调出错")
  97. if "items" in res and res["items"]:
  98. if re.search(r"wid/(\d+)/?", str(callback_url)):
  99. upload_id = re.search(r"wid/(\d+)/?", str(callback_url)).group(1)
  100. save_fname = save_post_file(res, filename, upload_id) # 保存返回的解析结果
  101. else:
  102. save_fname = save_post_file(res, filename, '0000')
  103. logger.info("\n----{}解析完保存的数据文件名:\n{}".format(os.path.basename(filename), save_fname))
  104. # noinspection PyTypeChecker
  105. @app.route('/word_parse', methods=["POST"])
  106. def do_upload():
  107. """
  108. 该接口函数专门用来接收word和参数,并保存到本地,将保存后的数据放入缓存中;
  109. 参数关键字为:mydata,callback_url,flag,sid
  110. # 文件上传,overwrite=True为覆盖原有的文件,是bottle的用法f.raw_filename
  111. # 如果不加这参数,当服务器已存在同名文件时,将返回“IOError: File exists.”错误
  112. :return:
  113. """
  114. img_file_count = 0
  115. if os.listdir(UPLOAD_FOLDER):
  116. img_file_count = max([int(i) for i in os.listdir(UPLOAD_FOLDER)]) + 1
  117. logger.info("==request.POST.dict==>{}\n".format(request.form.to_dict())) # php传过来的文件
  118. callback_url = request.form.get('callback_url', "")
  119. # flag = request.form.get('is_cloud', [0])[0]
  120. # sid = request.form.get('sid', [0])[0]
  121. flag = request.form.get('is_cloud', "0") # word中的图片是否上传云端
  122. sid = request.form.get('sid', "0") # 学管端、教师端参数
  123. # word文件接收
  124. # 旧接口::整个word文件传过来,主要应用在云题库端上传
  125. try:
  126. upfile = request.files.get('mydata') # 题库上传
  127. print("\n------接受文件名==> {}-------\n".format(os.path.basename(upfile.filename)))
  128. if os.path.splitext(upfile.filename)[1] not in [".doc", ".docx"]:
  129. return "only accept .doc .docx files"
  130. file_root1 = os.path.join(UPLOAD_FOLDER, str(img_file_count)) # 保存文件所在位置的上一层根目录
  131. if not os.path.isdir(file_root1):
  132. os.makedirs(file_root1)
  133. upfile.filename = str(img_file_count) + os.path.splitext(upfile.filename)[1]
  134. filename = os.path.join(file_root1, upfile.filename) # 文件的绝对路径, 不会含有汉字
  135. upfile.save(filename) # 保存到当地
  136. # -----------------------------------------------------------
  137. except:
  138. # 新接口mydata传过来的是json
  139. # j_data = json.loads(request.json.get('mydata'))
  140. l_data = request.form.get('mydata')
  141. print("l_data:", l_data)
  142. upfile_url = json.loads(l_data)[0]["file_url"]
  143. print("文件地址:", upfile_url)
  144. print("\n------接受文件名==> {}-------\n".format(os.path.basename(upfile_url)))
  145. if os.path.splitext(upfile_url)[1] not in [".doc", ".docx"]:
  146. return "only accept .doc .docx files"
  147. file_root1 = os.path.join(UPLOAD_FOLDER, str(img_file_count)) # 保存文件所在位置的上一层根目录
  148. if not os.path.isdir(file_root1):
  149. os.makedirs(file_root1)
  150. # 根据url地址下载word文件
  151. # r_file = requests.get(upfile_url)
  152. # if r_file.status_code != 200:
  153. # print('下载异常')
  154. # with open(filename, "wb") as f: # 将文件下载到本地
  155. # f.write(r_file.content)
  156. filename = str(img_file_count) + os.path.splitext(upfile_url)[1]
  157. filename = os.path.join(file_root1, filename) # 文件的绝对路径, 不会含有汉字
  158. try:
  159. urllib.request.urlretrieve(upfile_url, filename)
  160. except:
  161. return "word文件下载失败,请重新上传"
  162. print('------word Successfully downloaded-----')
  163. # 开一个进程对filename 进行解析
  164. if callback_url:
  165. cache_file.put([filename, file_root1, callback_url, flag, sid])
  166. print("-----当前缓存还有{}-----".format(int(cache_file.qsize()) - 1))
  167. return json.dumps({"errcode": 0, "errmsg": "OK", "docsbefore": int(cache_file.qsize()) - 1},
  168. ensure_ascii=False)
  169. else: # 没有回调地址时,简易网页上传
  170. stime1 = time.time()
  171. try:
  172. res, wordbin_time, paper_type, img_upload_time = parse_word(filename, None)
  173. etime1 = time.time()
  174. print("------------简易接口上传,解析成功---------------")
  175. logger.info("\n------------简易接口上传,解析成功---------------")
  176. logger.info("\n开始解析文件:{}".format(filename))
  177. logger.info("\n----wordbin服务所占时间:{}".format(wordbin_time))
  178. logger.info("\n----整个解析所占时间:{}".format(etime1 - stime1))
  179. logger.info("\n----该份试卷的格式类型:{}".format(paper_type))
  180. except:
  181. print("------------简易接口上传,解析失败---------------")
  182. res = {"time_out": 3,
  183. "errcode": 1,
  184. "errmsg": "word读取失败。请尝试:\n 1.请确定试卷版面格式是否为常见的试卷排版格式,删除不规范格式;\n"
  185. "2.请将word中内容复制到新word上传;\n 3. 请将word题目拆分为多个word上传。"
  186. "\n 4.请检查图片格式,将word中图片改为嵌入式后再上传。"}
  187. save_fname = save_post_file(res, filename, '0011')
  188. logger.info("\n----{}解析完保存的数据文件名【简易上传】:\n{}\n".format(os.path.basename(filename), save_fname))
  189. if "items" in res and res["items"]:
  190. # save_fname = save_post_file(res, filename, '0011')
  191. # logger.info("\n----{}解析完保存的数据文件名【简易上传】:\n{}\n".format(os.path.basename(filename), save_fname))
  192. new_res = []
  193. for r in res["items"]:
  194. dd = [r["item_id"], r["type"], r["stem"],
  195. r["key"], r["analysis"],r["text_errmsgs"]]
  196. if "options" in r:
  197. r["options"] = [chr(65+k)+'、'+ opt for k, opt in enumerate(r["options"])]
  198. dd[2] += "<br/>===================<br/>【选项】<br/>" + ";<br/>".join(r["options"]).replace("<br/>;", ";")
  199. new_res.append(dd)
  200. resdf = pd.DataFrame(new_res)
  201. resdf.columns = ["题号", "题型", "题干", "答案", "解析"]
  202. pd.set_option('display.max_columns', None)
  203. pd.set_option('display.max_rows', None)
  204. pd.set_option('expand_frame_repr', True) # 允许换行
  205. pd.set_option('display.width', -1)
  206. pd.set_option('display.max_colwidth', -1) # 自动最大列宽
  207. # 如何将图片显示出来:图片属性格式要符合前端显示标准
  208. return render_template('res.html',
  209. items=[resdf.to_html(header=True,
  210. index=False,
  211. escape=False, # 影响字符串格式
  212. justify="center")],
  213. res_json=json.dumps(res, ensure_ascii=False)
  214. )
  215. else:
  216. # resp = Response_headers(json.dumps(res, ensure_ascii=False).encode("utf-8")) # , indent=4
  217. resp = json.dumps(res, ensure_ascii=False)
  218. return resp
  219. # @app.route('/static/<a>/<b>/<filepath>', methods=["GET"])
  220. # def server_static(a, b, filepath):
  221. @app.route('/ser_static/<path:file_path>', methods=["GET"])
  222. def ser_static(file_path): # endpoint的位置是函数接口名
  223. """
  224. 将本地图片供页面缓存机制调用,上传cloud时该函数不使用
  225. :param file_path: 图片的本地绝对路径
  226. :return:
  227. """
  228. # filepath = request.args.get('q') # 针对/**/=?类型
  229. file = os.path.join(UPLOAD_FOLDER, file_path)
  230. if "image" in file:
  231. resize_img(file, file)
  232. # resp = Response(base64.b64encode(open(file, 'rb').read()).decode()) # 仍无法显示图片
  233. resp = Response(open(file, 'rb'))
  234. return resp
  235. # 单独在网页上输入图片地址时,用下面的模板渲染图片显示出来
  236. # img_stream = return_img_stream(file)
  237. # return render_template('index.html',
  238. # img_stream=img_stream)
  239. # @app.template_filter("src_render")
  240. # def src_render(src_cont):
  241. # src_cont = src_cont.replace("&lt;", "<").replace("&gt;", ">")
  242. # return src_cont
  243. if __name__ == "__main__":
  244. # 解析文件的队列
  245. cache_file = Queue()
  246. # start_word2html_app(kill_mathtype=True)
  247. # 读取解析进程
  248. pr_parse = Process(target=read, args=(cache_file,))
  249. pr_parse.start()
  250. print("parse pid:", pr_parse.pid)
  251. # sleep(2)
  252. # 图片供应进程
  253. # pr_img = Process(target=(), args=(),
  254. # kwargs={"host": "192.168.1.140", "port": "18083", "server": "tornado"})
  255. # pr_img.start()
  256. # print("image pid:", pr_img.pid)
  257. app.run(host="0.0.0.0", port=11088, threaded=True, debug=True)