# -*- coding:utf-8 -*- import random, hashlib import requests import configs import shutil from utils.diffi_label import get_item_diff from utils.img2latex import get_ocrlatex_by_url from utils.qcloud_bucket import upload_img_to_qcloud, img_inbucket_count, client, filestream_upload from concurrent.futures import ProcessPoolExecutor from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor from utils.image_convert import svg2png from utils.label_data_Hphy import Label from bisect import bisect_left import re, time, os, json, datetime from copy import deepcopy logger = configs.myLog(__name__, log_cate="ruku_log").getlog() subject_id = { "高中数学": 3, "高中物理": 12, "高中化学": 13, "高中生物": 14, "高中政治": 15, "高中历史": 16, "高中地理": 17, "初中数学": 41, "初中英语": 42, "初中物理": 43, "初中化学": 44, "初中生物": 45, "初中地理": 46, } class Ruku(): def __init__(self, items_list, htmlt, svg_data, wordid, callback_info, subject=""): self.items_list = items_list self.htmlt = htmlt self.svg_data = svg_data self.wordid = wordid self.callback_url = callback_info["callback_url"] self.source = callback_info["source"] self.subject = subject self.callback_code = 0 self.callback_err = "" def bucket_img_del(self): """ wordbin中图片上传腾讯云的原因:1.存在线上服务器的图片要定期删除,而有的题可能还没审核完;2.上传腾讯云比较便宜 删除腾讯云中的图片 根据【解析结果】文本中删除的图片信息,通过阙值判断是否从腾讯云中删除图片 也有限制:解析结果不能为空,结果中出现的图片在原试卷应该也要有! :return: """ items_str = str(self.items_list) raw_imgs = [] # img_source = "" http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/imgpaper/lqy_upload/612f60307ddb8b2765e50512/img_23.png img_source = "" items_str = re.sub(r'( 5: # 多余图片超过5张才开始删除,包含mathjax生成的图片 bucket_imgs = [i['Key'] for i in bucket_imgs] # dict:[{'Key': put_key}, {}] to_del_imgs = [bimg for bimg in bucket_imgs if bimg not in raw_imgs] if to_del_imgs: objects = { "Quiet": "true", "Object": [{'Key': item} for item in to_del_imgs] } client.delete_objects(configs.public_bucket, objects) # 批量删除 logger.info("----【paper_id:{}】删除桶数据时间:{}".format(self.wordid, time.time() - time2)) def mathjx2png(self): """ 【基于mathjax渲染输出是svg格式】 将mathjax渲染的公式转化为图片格式 mathjax渲染的svg图片提取->保存->格式转化 :return: """ file_path = configs.IMG_FOLDER + "/" + str(self.wordid) svgp_ltx = {} # svg图片本地路径 映射 latex put_key_mjmath = [] # 桶中key local_mjmath = [] # 本地图片存储位置 # self.ltx2url = {} # latex 映射 线上可访问url ltx2svgcss = {} # latex 映射 svg-css self.ltx2url = { # "{\\text{H}}_{\\text{2}}\\text{(g)+}\\frac{\\text{1}}{\\text{2}}{\\text{O}}_{\\text{2}}{\\text{(g)=H}}_{\\text{2}}\\text{O(g)}\\hspace{0.17em}\\hspace{0.17em}\\hspace{0.17em}\\text{\\u200a}{\\text{ΔH}}_{\\text{1}}": # "", # "\\frac{\\text{1}}{\\text{2}}{\\text{N}}_{\\text{2}}{\\text{(g)+O}}_{\\text{2}}{\\text{(g)=NO}}_{\\text{2}}\\text{(g)}\\hspace{0.17em}\\hspace{0.17em}\\hspace{0.17em}\\text{\\u200a}{\\text{ΔH}}_{\\text{2}}": # "", # "\\frac{\\text{1}}{\\text{2}}{\\text{N}}_{\\text{2}}\\text{(g)+}\\frac{\\text{3}}{\\text{2}}{\\text{H}}_{\\text{2}}\\text{(g)}\\underset{}{\\overset{}{⇌}}{\\text{NH}}_{\\text{3}}\\text{(g)}\\hspace{0.17em}\\hspace{0.17em}\\hspace{0.17em}\\text{\\u200a}{\\text{ΔH}}_{\\text{3}}": # "", } if self.callback_url and "MathJax" in str(self.svg_data["svg_html_data"]): # 再解析中存在mathjax公式渲染的标签 # 需要统计mathjax转png失败时的具体定位 # 题号所在位置 topicinfo = [[nm.end(),nm.group(1)] for nm in re.finditer('class="topic-number">(\d+)\.', str(self.svg_data["svg_html_data"]))] topicidx = [i[0] for i in topicinfo] topicno = [i[1] for i in topicinfo] time3 = time.time() all_mathjax = [] for one_svghtml in self.svg_data["svg_html_data"]: # some_mathjax = [[a.start(), a.group(1)] for a in # re.finditer('(()*)', one_svghtml)] some_mathjax = [[topic_n + 1, a.group(1)] for a in re.finditer('(()*)', one_svghtml)] all_mathjax.extend(some_mathjax) # all_mathjax3 = re.findall('(()*)', # str(self.svg_data["svg_html_data"])) all_linkdata = re.findall('()', self.svg_data["svg_path"]) link_dict = {a[1]: a[0] for a in all_linkdata if a} # all_svg, all_latex = [], [] for n, jax in enumerate(all_mathjax): svgs = re.findall("", jax[1]) latex = re.findall('()*', jax[1]) if latex and svgs: if latex and not latex[0][0].replace("{", "").replace("}", "").strip(): # id_idx = bisect_left(topicidx, jax[0]) # 二分法查找 logger.info("----【paper_id:{}】第{}题存在mathjax公式为空:latex:{}\nsvg:{}".format( self.wordid, jax[0], latex, svgs)) ltx2svgcss["{}".format(latex[0][0])] = svgs[0] # latex去重 else: # id_idx = bisect_left(topicidx, jax[0]) logger.info("----【paper_id:{}】第{}题存在mathjax公式格式有问题:latex:{}\nsvg:{}".format( self.wordid, jax[0], latex, svgs)) if latex: return "第{}题的公式latex:{}不规范,请编辑正确!".format(jax[0], latex[0][0]) else: return "第{}题的存在不规范的公式,,请重新编辑!".format(jax[0]) # all_latex.append("{}".format(latex[0][0])) # all_svg.append(svgs[0]) # else: # all_svg.append("") # all_latex.append("") if ltx2svgcss: if not os.path.exists(file_path): os.makedirs(file_path) svg_mjmath_path = os.path.join(file_path, "svg_mjmath") if not os.path.exists(svg_mjmath_path): os.makedirs(svg_mjmath_path) else: # 需要清空 shutil.rmtree(svg_mjmath_path) os.makedirs(svg_mjmath_path) n = 0 name_list = random.sample(range(100000, 999999), len(ltx2svgcss)) # 随机数 for ltx, svg in ltx2svgcss.items(): linkkeys = list(set(re.findall(' xlink:href="#(.*?)"', svg))) linkvalues = [link_dict[ld] for ld in linkkeys] svg_p = file_path + "/svg_mjmath/MJMATH-{}.svg".format(str(int(time.time())) + str(name_list[n])) fs = open(svg_p, 'w', encoding='utf8') svg_1, svg_2 = svg.split("\n' + "\n".join(linkvalues) + "\n") fs.write(">svg转png if svgp_ltx: try: with ThreadPoolExecutor(max_workers=6) as t: all_png_info = [t.submit(svg2png, arg) for arg in svgp_ltx.keys()] except Exception as e: logger.info(json.dumps({"log_level": "warn", "paper_id": self.wordid, "status": "svg2png失败", "errmsg": str(e)}, ensure_ascii=False)) all_png_info = [i.result() for i in all_png_info] for shape, img_path in all_png_info: local_mjmath.append(img_path) imgname_online = "/zyk/uploadfiles/wording/" + str(self.wordid) + "/{}".format(os.path.basename(img_path)) put_key_mjmath.append(imgname_online) mj_ltx = svgp_ltx[img_path.replace(".png", ".svg")] # .replace("\\\\", "\\") mjmath_online = ''.format( shape[0] / 2, shape[1] / 2, "${}$".format(mj_ltx)) self.ltx2url[mj_ltx] = mjmath_online logger.info(json.dumps({"log_level": "info", "paper_id": self.wordid, "svg2png_time": time.time() - time4, "ltx2url": self.ltx2url}, ensure_ascii=False).encode("utf8")) # print("self.ltx2url:", self.ltx2url) return put_key_mjmath, local_mjmath def upload_img(self): """ items_list:结构化纯文本 htmlt:ocr或word解析后的html文本 svg_data:{"svg_html_data": "", "svg_path": ""} 含svg数据的结构化整体html文本,svg中的索引数据 wordid:试卷存储id callback_url: 回调地址 入库操作包含: 1、再解析后确认入库时,将image上传,htmlt中图片; 2、学管端可以是组合(文本+图片)的解析结果。学管可以选择和修改文本,但选择的文本解析时都会替换原先的img标签内容, 最后入库时,传回学管端的都是带img标签的结果 3、传回学管端和共享题库的题目中的$$公式要转换为图片; $$左右还有公式字符串的话,在结构化过程中一起并入!!! 4、调取查重关联功能,暂时不调自动标注; 5、传入校本题库(发送标注),将结构化后的每道题设置个与大数据资源库的关联标签,如"zyk_id":id+题号 5种图片:1>>ser_static/.*?/word/media(解析服务中最开始保存在线上服务器本地) 2>>/zyk/uploadfiles/wording/(解析服务中上传到腾讯云) 3、4>>ser_static/.*?/(new_image[^"]*?|eq_img_\d.png):批量再解析和单题再解析中, 用户在编辑页面新粘贴进来的base64图片,以及批量再解析中域公式转图片,临时存在结构化服务器本地 5、利用mathjax渲染latex的svg格式转为png,再上传到腾讯云 上传之前,将腾讯云桶里的new_image全部替换,raw_image判断下再替换 items_list:list 所有题目 :return: res_zyk = {"data":{"html": xxx, "items": xxx}, "errcode": 0, "errmsgs": ""} res_xbk = {"items": xxx, "errcode": 0, "errmsg":"ok"} """ # if not self.items_list: # return {"errcode":1, "errmsgs": "无结果,不能入库", "data":{}} # # # if any([True if "stem_img" not in i and "stem" not in i else False # # for i in self.items_list]): # 存在新增的空试题时,只有3个字段type,img_status,check_type # # return {"errcode":1, "errmsgs": "存在空试题,请检查!", "data":{}} # contain_id = [] # for i in self.items_list: # if "stem_img" not in i and "stem" not in i: # return {"errcode": 1, "errmsgs": "存在空试题,请检查!", "data": {}} # if contain_id: # return {"errcode": 1, "errmsgs": "存在报错试题【{}题】还没改正过来,请检查!".format("、".join(contain_id)), "data": {}} # elif "errmsgs" in i and i["errmsgs"]: # return {"errcode": 1, "errmsgs": "存在报错试题还没改正过来,请检查!", "data": {}} res_xbk = {"items": [], "errcode": 0, "errmsg":"ok", "callback_type":2} res_zyk = {"errcode": 0, "errmsgs": "", "data": {"html": self.htmlt, "items": self.items_list}} # 1>>判断删除腾讯云桶内图片 # self.bucket_img_del() # --------------------------------------------------------------------- # 1.2>>将zyk/uploadfiles/wording/ 路径的缺latex的公式图片,再调mathpix接口拿到latex填充 # 2>>mathjax渲染的svg图片提取->保存->格式转化 svg_convert_res = self.mathjx2png() if type(svg_convert_res) == str: res_xbk = {"items": [], "errcode": 1, "errmsg": svg_convert_res, "callback_type": 2} res_zyk = {"errcode": 1, "errmsgs": svg_convert_res, "data":{}} if self.callback_url: self.callback_user(res_xbk, self.callback_url) self.callback_php(res_xbk) return res_zyk put_key_mjmath, local_mjmath = svg_convert_res # -------------------------------------------------------------------- # 3>> 统计 ocr和结构化试题 中的新图片,以防结构化入库的试题少了而导致图片上传不足,左边页面无法显示 put_key_list = [] # 桶中key localnewpic_list = [] # 本地图片存储位置 imgs_url_list = [] # 远程服务器上图片存储位置 put_key_imgsurl = [] new_imgs = re.findall(r'"\s*/>', r" data-latex=\1 />", s) return s time6 = time.time() # 4>> 结构化题目中图片地址替换,需要区分下学管端还是云题库!!!!!一定会保存一份在资源库 items_res_to_zyk = self.items_list.copy() for one_items in items_res_to_zyk: for k in ["stem", "key", "parse", "options"]: # "analysis", if k in one_items: if k == "options": one_items[k] = list(map(sub1, one_items[k])) else: one_items[k] = sub1(one_items[k]) # -----------难度和知识点自动标注------------------------ diffs_xbk, items_res_to_zyk = self.get_diff(items_res_to_zyk.copy()) # 难度 for nn, one_items in enumerate(deepcopy(items_res_to_zyk)): new_one_item = {} if self.subject == "高中数学": new_one_item["difficulty"] = diffs_xbk[nn] # if one_items["checkType"]["name"] == "填空题": # new_one_item["blank_num"] = one_items["blank_num"] keys_items = ["stem", "key", "parse", "options"] # if one_items['img_status'] == 1 and ("stem_img" in one_items and one_items["stem_img"]): # logger.info("----【paper_id:{}】mathjax2svg所取的字段是带img的".format(self.wordid)) # keys_items = ["stem_img", "key_img", "parse_img", "options_img"] if self.callback_url: # 查重 # if self.source != "xue_guan": # repeat_r, repeat_time = self.repeat_check(nn, one_items, repeat_time) # new_one_item["repeat_res"] = repeat_r new_one_item["topic_type_id"] = one_items["checkType"]["id"] if "options_rank" in one_items and one_items["options_rank"]: new_one_item["options_rank"] = one_items["options_rank"] pprint(self.ltx2url) for k in keys_items: if k in one_items: if self.ltx2url: if k == "options": one_items[k] = list(map(sub2, one_items[k])) else: one_items[k] = sub2(one_items[k]) new_one_item[k] = one_items[k] # else: # 有的题本来就没有options字段 # logger.info("----【paper_id:{}】第{}道题{}字段有问题".format(self.wordid, one_items["topic_num"], k)) # if self.subject == "高中物理": # new_one_item["label_auto"], kps_label_time = self.get_phy_kps_auto(one_items, kps_label_time) items_res_to_xbk.append(new_one_item) # 5>> ocr-htmlt中图片地址替换成云上地址 self.htmlt = re.sub(r'(> new_image上传腾讯云 try: logger.info('----【paper_id:{}】再解析开始上传图片到cloud,并替换成线上地址----'.format(self.wordid)) stime_u = time.time() # TODO 一个进程解析,一个进程上传 executor1 = ThreadPoolExecutor(5) executor1.map(upload_img_to_qcloud, zip(put_key_list, localnewpic_list)) if imgs_url_list: executor1.map(filestream_upload, zip(put_key_imgsurl, imgs_url_list)) executor1.shutdown(wait=True) # 进程池内部的进程都执行完毕,才会关闭,然后执行后续代码 img_upload_time = time.time() - stime_u logger.info(json.dumps({"log_level": "info", "paper_id": self.wordid, "img_upload_time": img_upload_time}, ensure_ascii=False)) res_zyk = {"data":{"html":self.htmlt, "items":items_res_to_zyk}, "errcode":0, "errmsgs": ""} res_xbk = {"items": items_res_to_xbk, "errcode": 0, "errmsg":"ok", "callback_type":2} except Exception as e: res_xbk = {"items": [], "errcode": 1, "errmsg": "公式或图片上传腾讯云失败", "callback_type":2} res_zyk = {"data":{"html": self.htmlt, "items": self.items_list}, "errcode": 1, "errmsgs": "公式或图片上传腾讯云失败"} logger.info(json.dumps({"log_level": "warn", "paper_id": self.wordid, "status": "公式或图片上传腾讯云失败", "errmsg": str(e)}, ensure_ascii=False)) else: # -----------难度、知识点自动标注------------------------ diffs_xbk, self.items_list = self.get_diff(self.items_list) # 难度 if self.callback_url: for nn, one_items in enumerate(self.items_list): new_one_item = {} if self.subject == "高中数学": new_one_item["difficulty"] = diffs_xbk[nn] # 查重 if self.source != "xue_guan": repeat_r, repeat_time = self.repeat_check(nn, one_items, repeat_time) new_one_item["repeat_res"] = repeat_r new_one_item["topic_type_id"] = one_items["checkType"]["id"] if "options_rank" in one_items and one_items["options_rank"]: new_one_item["options_rank"] = one_items["options_rank"] keys_items = ["stem", "key", "parse", "options"] for k in keys_items: if k in one_items: new_one_item[k] = one_items[k] # 知识点自动标注 # if self.subject == "高中物理": # new_one_item["label_auto"], kps_label_time = self.get_phy_kps_auto(one_items, kps_label_time) items_res_to_xbk.append(new_one_item) res_xbk = {"items": items_res_to_xbk, "errcode": 0, "errmsg":"ok", "callback_type":2} logger.info(json.dumps({"log_level": "info", "paper_id": self.wordid, "repeat_time": repeat_time}, ensure_ascii=False)) # 先传一份到校本题库,再返回结果 if self.callback_url: # 上传腾讯云失败,将结果保存本地一份 self.save_post_file(res_xbk) self.callback_user(res_xbk, self.callback_url) self.callback_php(res_xbk) # return res_zyk return res_xbk def save(self): """ 入口函数 :return: """ res_xbk = {"items": [], "errcode": 1, "errmsg": "保存失败", "callback_type": 2} try: res_zyk = self.upload_img() return res_zyk except Exception as e: print("------【paper_id:{}】最后的保存失败:{}-------".format(self.wordid, e)) logger.info(json.dumps({"log_level": "warn", "paper_id": self.wordid, "status": "入库失败", "errmsg": str(e), "task_name": "保存入库"}, ensure_ascii=False)) if self.callback_url: self.callback_code = 1 self.callback_err = "保存失败" self.callback_user(res_xbk, self.callback_url, err="1") self.callback_php(res_xbk) # else: # logger.info("------【paper_id:{}】保存失败-------".format(self.wordid)) # self.callback_code = 1 # self.callback_err = "保存失败" # self.callback_user(res_xbk, self.callback_url) # self.callback_php(res_xbk) return {"errcode": 1, "errmsgs": "入库失败!", "data": {}} def upload_img_with_stream(self): """ 图片以文件流形式上传腾讯云 :return: """ if not self.items_list: return {"errcode":1, "errmsgs": "无结果,不能入库", "data":{}} if any([True for i in self.items_list if len(i)==1]): # 存在新增的空试题时,只有一个type字段 return {"errcode":1, "errmsgs": "存在空试题,请检查!", "data":{}} res_xbk = {"items_res": self.items_list, "errcode": 0} res_zyk = {"errcode":0, "errmsgs": "", "data":{"html": self.htmlt, "items": self.items_list}} # 1>>判断删除腾讯云桶内图片 # self.bucket_img_del() # 1.2>>将zyk/uploadfiles/wording/ 路径的缺latex的公式图片,再调mathpix接口拿到latex填充 # 2>>mathjax渲染的svg图片提取->保存->格式转化 put_key_mjmath, local_mjmath = self.mathjx2png() # -------------------------------------------------------------------- # 3>> 统计 ocr和结构化试题 中的新图片,以防结构化入库的试题少了而导致图片上传不足,左边页面无法显示 put_key_list = [] # 桶中key # localnewpic_list = [] # 本地图片存储位置 imgs_url = [] # 存在线上服务器本地的线上可访问地址 new_imgs = re.findall(r'> 结构化题目中图片地址替换,需要区分下学管端还是云题库!!!!!一定会保存一份在资源库 items_res_to_zyk = self.items_list.copy() for one_items in items_res_to_zyk: for k in ["stem", "key", "parse", "analysis", "options"]: if k in one_items: if k == "options": one_items[k] = list(map(sub1, one_items[k])) else: one_items[k] = sub1(one_items[k]) items_res_to_xbk = items_res_to_zyk.copy() if self.callback_url and self.ltx2url: # items_res_to_xbk = items_res_to_zyk.copy() for one_items in items_res_to_xbk: keys_items = ["stem", "key", "parse", "analysis", "options"] # if one_items['img_status'] == 1 and ("stem_img" in one_items and one_items["stem_img"].strip()): # keys_items = ["stem_img", "key_img", "parse_img", "options_img"] for k in keys_items: if k in one_items: if k == "options": one_items[k] = list(map(sub2, one_items[k])) else: one_items[k] = sub2(one_items[k]) # ----------------------------------------------------------------------- # 5>> ocr-htmlt中图片地址替换成云上地址 self.htmlt = re.sub(r'(> new_image上传腾讯云 try: logger.info('----【paper_id:{}】再解析开始上传图片到cloud,并替换成线上地址----'.format(self.wordid)) stime_u = time.time() # TODO 一个进程解析,一个进程上传 executor1 = ProcessPoolExecutor(5) if self.callback_url: executor1.map(upload_img_to_qcloud, zip(put_key_mjmath, local_mjmath)) executor1.map(filestream_upload, zip(put_key_list, imgs_url)) executor1.shutdown(wait=True) # 进程池内部的进程都执行完毕,才会关闭,然后执行后续代码 img_upload_time = time.time() - stime_u logger.info("----【paper_id:{}】再解析图片上传时间img_upload_time:{}".format(self.wordid, img_upload_time)) res_zyk = {"data":{"html":self.htmlt, "items":items_res_to_zyk}, "errcode":0, "errmsgs": ""} # res_xbk = {"items_res": items_res_to_xbk, "errcode": 0, "errmsgs": ""} # res_zyk = {"html": self.htmlt, "items": items_res_to_xbk, "errcode": 0} except: # res_xbk = {"items_res": self.items_list, "errcode": 1, "errmsgs": "公式或图片上传腾讯云失败"} logger.info("----【paper_id:{}】公式或图片上传腾讯云失败".format(self.wordid)) res_zyk = {"data":{"html": self.htmlt, "items": self.items_list}, "errcode": 1, "errmsgs": "公式或图片上传腾讯云失败"} # 先传一份到校本题库,再返回结果 # if callback_url: # 上传腾讯云失败,将结果保存本地一份 # self.save_post_file(res_xbk) # try: # r = requests.post(callback_url, # # json=res, # 可以,但是会进行转义 # data=json.dumps(res_xbk).encode("utf-8"), # 文件开头有编码显示 # # headers=headers, # ) # logger.info("------【paper_id:{}】,post 回调地址状态===> {} -------\n".format(self.wordid, r.status_code)) # except TimeoutError: # # print("回调超时") # logger.info("------【paper_id:{}】回调超时-------".format(self.wordid)) # except Exception as e: # # print(e, "------回调出错") # logger.info("------【paper_id:{}】回调出错:{}-------".format(self.wordid, e)) return res_zyk def save_post_file(self, parse_res): """保存回调的数据解析结果""" now_time = datetime.datetime.now() time_str = datetime.datetime.strftime(now_time, '%Y_%m_%d_%H_%M_%S') # aft_modify = (str("word_name") + '__' + str(random.random())).encode("utf-8") # hh = hashlib.md5(aft_modify).hexdigest() aft_name = self.wordid + '__' + time_str + '.json' res_folder = configs.RES_FOLDER # 保存的文件夹 new_fpath = os.path.join(res_folder, aft_name) re_f = open(new_fpath, 'w', encoding='utf-8') json.dump(parse_res, re_f,ensure_ascii=False) def get_phy_kps_auto(self, one_items, kps_label_time): """ 调取物理自动标注的端口获取考点 :return: """ def phy_kps(input_data): label_auto = {} if input_data: r = requests.post(configs.kps_phy_ip, json=input_data) # json={"input_data": item} if r.status_code == 200: res = r.json()['result'] # eval(r.text) if res: label_auto.update(res) return label_auto t1= time.time() temp_items = {"server_state": 11, "fine_state": 0, "add_parse_state": 1} one_info = {"topic_id": one_items["topic_num"], "content": one_items["stem"], "option": one_items["options"] if "options" in one_items else [], "parse": one_items["parse"], "kpl_codes": [], "ori_sences": [], "ori_kpl_codes": [], "topic_type_id": one_items["checkType"]["id"]} temp_items["info"] = one_info # 散题 # temp_items["parse"] = str(one_items["key"]) + "" + one_items["parse"] # if "analysis" in one_items: # temp_items["parse"] += "" + one_items["analysis"] auto_kps = phy_kps(temp_items) print("自动标注考点:", auto_kps) kps_label_time += time.time() - t1 return auto_kps, kps_label_time def img2latex(self): # 3>> 统计结构化试题data-latex为空的图片,再ocr-latex处理并替换 time5 = time.time() items_str = str(self.items_list) all_imgs_no_latex = re.findall(r'()'.format(m), sub3, items_str) self.items_list = eval(items_str) logger.info("----【paper_id:{}】结构化试题中空latex掉mathpix转化时间:{}".format(self.wordid, time.time() - time5)) except: logger.info("----【paper_id:{}】latex-ocr转化过程失败".format(self.wordid)) # ------------------------------------------------------------------- def get_diff(self, items_res_to_zyk): # -----------难度和知识点自动标注------------------------ t11 = time.time() diffs_xbk = [""] * len(items_res_to_zyk) if self.subject == "高中数学": from multiprocessing.dummy import Pool as ThreadPool pool = ThreadPool(2) # 比# pool = multiprocessing.Pool(3)速度快 diff_info = list(pool.map(get_item_diff, items_res_to_zyk)) items_res_to_zyk = [f[0] for f in diff_info] diffs_xbk = [f[1] for f in diff_info] logger.info("----【paper_id:{}】结构化试题中难度标注时间:{}".format(self.wordid, time.time() - t11)) return diffs_xbk, items_res_to_zyk def repeat_check(self, id, one_items, repeat_time): t22 = time.time() chachong_item_dict = {"topic_id": id + 1, "subject_id": subject_id[self.subject], "topic_type_id": one_items["checkType"]["id"], "content": one_items["stem"].strip(), } if "options" in one_items: chachong_item_dict["options"] = one_items["options"] repeat_r = requests.post(url=configs.repeat_ip, json=[chachong_item_dict]).json() print("查重结果:", repeat_r) # [str(id + 1)] if repeat_r: repeat_r = repeat_r[str(id + 1)] else: repeat_r = [] repeat_time += time.time() - t22 return repeat_r, repeat_time def callback_user(self, res_xbk, callback_url, err=""): """全学科题库、第三方上传试卷回调""" if not err: if res_xbk["errcode"]: logger.info(json.dumps({"log_level": "info", "paper_id": self.wordid, "status": "入库失败", "errmsg": res_xbk["errmsg"], "task_name": "保存入库"}, ensure_ascii=False)) else: logger.info(json.dumps({"log_level": "info", "paper_id": self.wordid, "status": "入库成功", "task_name": "保存入库"}, ensure_ascii=False)) try: r = requests.post(callback_url, # json=res, # 可以,但是会进行转义 data=json.dumps(res_xbk, ensure_ascii=False).encode("utf-8"), # 文件开头有编码显示 ) print("------【paper_id:{}】,post 回调地址状态:{}===>回调结果: {} -------\n".format(self.wordid, r.status_code, r.text)) logger.info(json.dumps( {"log_level": "info", "paper_id": self.wordid, "callback_res": {"status": "回调成功" if r.status_code == 200 else "回调失败", "status_code": r.status_code, "text": r.text, "callback_url": callback_url}, "task_name": "入库回调"}, ensure_ascii=False)) # except TimeoutError: # # print("回调超时") # callback_err += "回调超时" # logger.info("------【paper_id:{}】回调超时-------".format(self.wordid)) except Exception as e: self.callback_err += "回调出错" self.callback_code = 1 print("------【paper_id:{}】回调出错:{}-------".format(self.wordid, e)) logger.info(json.dumps( {"log_level": "warn", "paper_id": self.wordid, "callback_res": {"status": "回调失败", "callback_url": callback_url}, "errmsg": str(e), "task_name": "入库回调"}, ensure_ascii=False)) else: if r.status_code != 200: self.callback_code = 1 self.callback_err = "回调出错:{}".format(r.status_code) logger.info(json.dumps( {"log_level": "warn", "paper_id": self.wordid, "callback_res": {"status": "回调失败", "status_code": r.status_code, "text": r.text, "callback_url": callback_url}, "task_name": "入库回调"}, ensure_ascii=False)) def callback_php(self, res_xbk): """ 将解析校对完的试题返回数据库端 :param res_xbk: 返回给其它端的数据 :return: """ if res_xbk["errmsg"] not in ["ok", "保存失败"]: #公式上传腾讯云失败等 self.callback_err += res_xbk["errmsg"] if self.callback_err: self.callback_code = 1 type_check = {"errcode": self.callback_code, "errmsg": self.callback_err, "parseId": self.wordid, "callback_type": 2, "data": [k["checkType"]["id"] for k in self.items_list], } try: r2 = requests.post(configs.callback_url_taskcheck, data=json.dumps(type_check, ensure_ascii=False).encode("utf-8")) print("------【paper_id:{}】taskcheck post数据:{} 回调地址状态:{}===>回调结果: {}-------".format( self.wordid, type_check, r2.status_code, r2.text)) logger.info(json.dumps({"log_level": "info", "paper_id": self.wordid, "callback_data": type_check, "callback_res": {"status": "回调成功" if r2.status_code == 200 else "回调失败", "status_code": r2.status_code, "text": r2.text, "callback_url": configs.callback_url_taskcheck}, "task_name": "入库回调"}, ensure_ascii=False)) except Exception as e: print("------【paper_id:{}】taskcheck回调出错:{}-------".format(self.wordid, e)) logger.info(json.dumps({"log_level": "warn", "paper_id": self.wordid, "callback_data": type_check, "callback_res": {"status": "回调失败", "callback_url": configs.callback_url_taskcheck}, "errmsg": str(e), "task_name": "入库回调"}, ensure_ascii=False)) else: if r2.status_code != 200: print("------【paper_id:{}】taskcheck回调出错:{}".format(self.wordid, r2.status_code)) logger.info(json.dumps({"log_level": "warn", "paper_id": self.wordid, "callback_data": type_check, "callback_res": {"status": "回调失败", "status_code": r2.status_code, "text": r2.text, "callback_url": configs.callback_url_taskcheck}, "task_name": "入库回调"}, ensure_ascii=False)) if __name__ == '__main__': from pprint import pprint import pickle # items_list = [{'id': '60bdcd734a5335001b0a73cf', 'type': '填空题', # 'stem': '在数列\\({\\lbrace a_{n}\\rbrace }\\)中,若\\({a_{1}=1,a_{n}-a_{n-1}=n(n\\geq 2),}\\)则该数列的通项\\({a_{n}=}\\)', # 'options': [], 'key': '\\( \\frac{1}{2} \\) \n ', # 'subject': '数学', 'errmsgs': '', 'topic_num': 0, 'parse_img': '略', 'analysis': '', 'slave_img': '', # 'parse': '略 \n \n ', 'category': ['月考'], 'grade': '高一', # 'stem_img': "", 'period': '高中', # 'province': '湖北', 'susp_pic': None, 'option_str': '', 'blank_num': None, 'year': 2020, 'difficulty': '中', # 'specials': [], 'upload_time': '2021-07-16T13:51:49.561000Z', # 'key_img': "", 'options_img': [], # 'options_rank': None, 'text_status': None, 'img_status': 1, # 'source': {'type': 's', # 'related_exampaper': [{'paper_id': '5fc0d256407550d0b7d9a43c', # 'file_name': '十堰市一中 2019 级高一下4月月考 数学试题包含答案 ', # 'item_id': None}]}}] # ocr_html = r'数学命题人:王旭辉一、选择题(本大题共12小题,每小题5分,请将正确答案填涂在答题卡相应的位置。)1.cos80°sin40°+sin50°cos10°的值为()A.${\quad \frac{1}{2}}$B.${\quad \frac{\sqrt {2}}{2}}${c.\quad \frac{\sqrt {3}}{2}}${b.\quad -\frac{\sqrt {3}}{2}}$2.已知在等比数列${\lbrace a_{n}\rbrace }$中,${a_{1}=1,\quad a_{5}=9,}${a_{3}=(\quad }$B.±5.±3D.33.已知a>b>0,则下列不等式成立的是()A.${\quad a>b>\frac{a+b}{2}>\sqrt {ab}}$B.${\quad a>\frac{a+b}{2}>\sqrt {ab}>b}$${c.\quad a>\frac{a+b}{2}>b>\sqrt {ab}}$D.${\quad a>\sqrt {ab}>\frac{a+b}{2}>b}$4.给出下列命题:①棱柱的侧棱都相等,侧面都是全等的平行四边形;②用一个平面去截棱锥,棱锥底面与截面之间的部分是棱台;③半圆绕着它的直径所在的直线旋转一周所形成的曲面叫做球面;④棱台的侧棱延长后交于一点,侧面是等腰梯形.其中正确命题的序号是()A.①②④B.①②③C.②③).③5.已知向量a${i=(1,2),\vec {b}=(2,-2),\quad \vec {c}=(\lambda ,-1)}$,若${\vec {c}//(2\vec {a}+\vec {b})}$则λ=()A.-2B.-1${c.\quad -\frac{1}{2}}$D.${\quad \frac{1}{2}}$6.已知△ABC中,a=1,${b=\sqrt {3},}$A=30°,则B等于()A.30°B.30°或150°C.60°D.60°或120°7.在△ABC中,a、b、c分别为角A、B、C的对边,若b=2,c=1,C=30°,则a=()()A.${\quad \sqrt {3}}$B.3C.${\quad \sqrt {5}}$D.18.若(a+b+c)(b+c-a)=3bc,且sinA=2sinBcosC,那么△ABC是()A.直角三角形B.等边三角形C.等腰三角形D.等腰直角三角形9.在等差数列${\lbrace a_{n}\rbrace }$中,${S_{15}>0,\quad S_{16}<0,}$则使${a_{n}>0}$成立的n的最大值为()A.6B.7C.8D.S10.已知等比数列${\lbrace a_{n}\rbrace }$的前n项和为${S_{n},}${S_{5}=2,\quad S_{10}=6,}${a_{16}+a_{17}+a_{18}+a_{19}+a_{20}=}$()A.54B.48C.32D.1611.设点D为△ABC中BC边上的中点,O为AD边上靠近点A的三等分点,则()A.${\quad \overrightarrow {BO}=-\frac{1}{6}\overrightarrow {AB}+\frac{1}{2}\overrightarrow {AC}}$B.${\quad \overrightarrow {BO}=\frac{1}{6}\overrightarrow {AB}-\frac{1}{2}\overrightarrow {AC}}$${\therefore \overrightarrow {BO}=\frac{5}{6}\overrightarrow {AB}-\frac{1}{6}\overrightarrow {AC}}$D.${\quad \overrightarrow {BO}=-\frac{5}{6}\overrightarrow {AB}+\frac{1}{6}\overrightarrow {AC}}$12.△ABC中,角A、B、C的对边分别为a、b、c,且2a+b=2ccosB,若△ABC的面积为${S=\sqrt {3}c,}$则ab的最小值为()A.12B.24C.28D.48二、填空题(本大题共4小题,每小题5分.请将正确答案填写在答题卡相应位置。)13.在数列${\lbrace a_{n}\rbrace }$中,若${a_{1}=1,a_{n}-a_{n-1}=n(n\geq 2),}$则该数列的通项${a_{n}=}$14.已知圆锥的侧面展开图是一个半径为6cm,圆心角为${\frac{2\pi}{3}}$的扇形,则此圆锥的体积为15.已知平面向量${\vec {a}}${\vec {b}}$的夹角为${\frac{\pi}{3},\quad \vec {a}=(\sqrt {3},-1),\quad |\vec {b}|=1,}$则|${2\vec {a}-\vec {b}|=}$16.已知α、β为锐角,sinα=2,tan(β-α)=-,则tanβ=______三.解答题(本大题共6小题,共70分.请将正确答案写在答题卡相应位置。解答应写出文字说明、证明过程或演算步骤.)17.(10分)(1)解不等式(x-1)(x-a)≥0(2)已知${f(x)=\frac{x^{2}+6x+9}{x+1},}$其中x>-1,求f(x)的最小值.18.(12分)已知函数j${{(x)=2\sin x\cos (x+\frac{\pi}{3})+\frac{\sqrt {3}}{2}}}$(1)求函数f(x)的最小正周期;(2)若f(x)+m≤0对${x\∈\lbrack 0,\frac{\pi}{2}\rbrack }$亘成立,求实数m的取值范围.19.(12分)已知等比数列${\lbrace a_{n}\rbrace }$的前n项和为${S_{n},}$且满足${S_{3}=7,\quad S_{6}=63,}$(1)求数列${\lbrace a_{n}\rbrace }$的通项公式;(2)若${b_{n}=a_{n}+\log _{2}a_{n},}$求数列${\lbrace b_{n}\rbrace }$的前n项和${T_{n}}$0.(12分)已知数列${\lbrace a_{n}\rbrace }$满足${a_{1}=\frac{3}{2},}${l_{n}=\frac{a_{n-1}}{2}+\frac{1}{2^{n-1}}(n\geq 2,n\∈N^{*})}$(1)求证:数列${\lbrace 2^{n}a_{n}\rbrace }$是等差数列,并求出数列${\lbrace a_{n}\rbrace }$的通项公式;(2)求数列${\lbrace a_{n}\rbrace }$的前n项和${S_{n}.}$21.(12分)如图,在△ABC中,${C=\frac{\pi}{4},\overrightarrow {CA}\cdot \overrightarrow {CB}=48,}$点D在BC边上,且${AD=5\sqrt {2},\cos \angle ADB=\frac{3}{5}}$(I)求AC,CD的长;(II)求cos∠BAD的值.22.(12分)数列{a,}中,a,=1,当n≥2时,其前n项和S,满足S,2=a,(S,--)${(u_{n})^{\prime }//t_{1},\quad u_{1}-}${4b_{n}//A_{n}-u_{n}\cdot (\beta _{n}-\bar {2})}$(1)求${S_{n}}$的表达式;(2)求数列${\lbrace a_{n}\rbrace }$的通项公式;(3)设${b_{n}=\frac{S_{n}}{2n+1}.}$求数列${\lbrace b_{n}\rbrace }$的前n项和${T_{n}.}$数学答案一、选择题二、填空题${13.\frac{n^{2}+n}{2}\quad 14.\frac{16\sqrt {2}}{3}\pi\quad 15.\sqrt {13}\quad 16.\frac{73}{9}}$三、解答题17.(1)当a>1时,原不等式解集是{x|x≥a,或x≤1};当a=1时,原不等式解集是R:当a<1时,原不等式解集是(x|x|1或X]a}_5分(2)∵x>-1,则x+1>0,由基本不等式得${f(x)=\frac{x^{2}+6x+9}{x+1}=}$+1x+1${(x+1)+\frac{4}{x+i}+4}$${=2\sqrt {(x+1)\cdot \frac{4}{x+1}}+4=8}$(当且仅当${x+1=\frac{4}{x+1}}$时,即当x=1时取得等号)因此,函数${f(x)=\frac{x^{2}+6x+9}{x+1}(x>-1}$)的最小值为810分18.解:(1)因为${f(x)=2\sin x\cos (x+}$${=2\sin x(\cos x\cos \frac{\pi}{3}-\sin x\sin x\sin }$${=2\sin x(\frac{1}{2}\cos x-\frac{\sqrt {3}}{2}\sin x}$${\div \sin x\cos x-\sqrt {3}\sin ^{2}x+\frac{\sqrt {3}}{2}}$${=\frac{1}{2}\sin 2x+\frac{\sqrt {3}}{2}\cos 2x}$${=\sin (2x+\frac{\pi}{3})}$所以f(x)的最小正周期为${f=\frac{2\pi}{2}=\pi}$(2)"f(x)+m≤0对${x\∈\lbrack 0,\frac{\pi}{2}\rbrack }$恒成立"等价于"${f(x)_{\max }+m\leq 0^{n}}$因为${x\∈\lbrack 0,\frac{\pi}{2}\rbrack }$所以${2x+\frac{\pi}{3}\∈\lbrack \frac{\pi}{3},\frac{4\pi}{3}\rbrack }$${2x+\frac{\pi}{3}=\frac{\pi}{2},}${x=\frac{\pi}{12}}$f(x)的最大值为${f(\frac{\pi}{12})=1.}$所以实数m的取值范围为${(-9O,-1\rbrack }$12分19.(1)由题意知S${\because _{6}\neq 2s_{3},q\neq 1\cdots }$${\therefore S_{3}=\frac{a_{1}(7-q^{3})}{1-q}=7}$........................3分${s_{6}=\frac{a_{1}(1-q^{6})}{1-q}}$解得${\left\lbrace \begin{array}{l}{a_{1}=7}\\{q=2}\end{array}\right.\cdots \cdots }$5分${\therefore a_{n}=2^{n-7}\cdots \cdots }$.6分2)由(1)知${b_{n}=2^{n-7}+n-1\cdots \cdots \cdots }$..7分∴T。=(1+2+-..+${y^{-x-3}_{n}}$+[1+2+--+(n-1)].................9分${=2^{n}+\frac{n^{2}-n}{2}-1\cdots }$.........12分20.(1)因为${a_{n}=\frac{a_{n-1}}{2}+\frac{1}{2^{n-1}}(n\geq 2,n\∈N^{*})}$,所以${2^{n}a_{n}=2^{n-1}a_{a}+2}$,即${2^{n}a_{n}-2^{n-1}a_{n-1}=2,}$所以数列${\lbrace 2^{n}a_{n}\rbrace }$是等差数列,且公差d=2,其首项${2a_{1}=3}$所以${2^{7}a_{n}=3+(n-1)\times 2=2n+7}$,解得${a_{n}=\frac{2n+i}{2^{n}}}$${2)\quad S_{n}=\frac{3}{2}+\frac{5}{2^{2}}+\frac{7}{2^{3}}+\cdots +\frac{2n-1}{2^{p-1}}+\frac{2n+1}{z^{\theta }},}$${\frac{S_{n}}{2}=\frac{3}{2^{2}}+\frac{5}{2^{3}}+\frac{7}{2^{4}}+\cdots +\frac{2n-1}{2^{n}}+\frac{2n+1}{2^{n}+1},}$①-②,得${\frac{S_{e}}{z}=\frac{3}{z}+}${+\frac{7}{2^{3}}+\cdots +\frac{1}{2^{s}}}${=\frac{3}{2}+}${z^{e}+x^{2}}$ ${1-\frac{7}{2}}$${=\frac{5}{x}-\frac{2n+5}{a+1},}$所以${S_{n}=5-\frac{2n+5}{2^{n}}}$12分21.(I)在△ABD中,∵${\cos \angle ADB=\frac{3}{5},\therefore \sin \angle ADB=\frac{4}{5}.}$sin∠CAD=sin(∠ADB-${\angle AcD,=\sin \angle ADB\cos \frac{\pi}{4}-\cos \angle ADB\sin \frac{\pi}{4}}$${=\frac{4}{5}\times \frac{\sqrt {2}}{2}-\frac{3}{5}\times \frac{\sqrt {2}}{2}=\frac{\sqrt {2}}{10}}$在△ADC中,由正弦定理得${\frac{Ac}{\sin \angle ADc}=\frac{cD}{\sin \angle CAD}=\frac{AD}{\sin \angle ACD}}${\frac{AC}{\frac{4}{5}}=\frac{cD}{\sqrt {2}}=\frac{5\sqrt {2}}{\sqrt {2}}}$解得${AC=8,CD=\sqrt {2},}$${\Pi)\because \overrightarrow {CA}\cdot \overrightarrow {CB}=48,}${8\cdot cB\cdot \frac{\sqrt {2}}{2}=48.}$解得${c_{B}=6\sqrt {2},}$∴BD=CB-CD=5√2在△ABC中,A${B=\sqrt {8^{2}+(6\sqrt {2})-2\times 8\times 6\sqrt {2}}=2\sqrt {10}}$,在△ABD中${\infty \angle BAD=\frac{(2\sqrt {10})^{2}+(5\sqrt {2})}{2\times 2\sqrt {10}\times 5\sqrt {2}}=\frac{\sqrt {5}}{5}}$22.(1)由${\sin ^{2}=5}=S_{n}-5_{5}}&{T_{6}^{z}=-7}}${s_{w}^{2}=(S_{w}-S_{6-1})(s_{w}-\frac{1}{2})=s_{e}^{\vec {x}}-\frac{1}{2}S_{\varphi }-S_{6}=+\frac{7}{2}s}$${\therefore \frac{1}{s_{n}}-\frac{1}{S_{n-1}}=2(n\supseteq 2)}$${\therefore \left\lbrace \begin{array}{l}{\frac{1}{s}\rbrace }$是以${\frac{1}{s_{7}}}$为首项,以2为公差的等差数列,${\therefore \frac{1}{s_{n}}=2n-1,}$${s_{n}=\frac{1}{2n-1}(n\∈N^{*}.}$${a_{n}=\left\lbrace \begin{array}{l}{7,n=7}\\{\frac{1}{2p-1}-\frac{7}{2n-3},n\geq }\end{array}\right.}$(3)${b_{n}=\frac{7}{(2n-7)(2n+i)}=}$${\therefore 7_{8}=\frac{1}{2}(1-\frac{1}{3}+\frac{1}{3}-\frac{1}{5}+\frac{1}{2\pi-1}-\frac{1}{2\pi+1})=\frac{1}{2}(-\frac{1}{2n+1})=\frac{\pi}{2\pi}}$12/' # svg_data = {'svg_html_data': ['1.在数列{an}中,若a1=1,an−an−1=n(n≥2),则该数列的通项an=【答案】12 \n 【解析】略 \n \n 编辑删除'], 'svg_path': ''} # # wordid = "5fc0d256407550d0b7d9a43c" # # res=Ruku(items_list, ocr_html, svg_data, wordid).upload_img() # pprint(res) one_item1 = [{"repeat_res": [], "topic_type_id": 34, "options_rank": 1,"checkType":{"id":2}, "stem": "NA表示阿伏伽德罗常数的值。下列叙述中正确的是", "key": "A", "parse": "A.所含的电子数为=10NA,A正确;B.常温常压下,的物质的量小于0.1mol,故其与足量的NaOH溶液反应,2NaOH+Cl2=NaCl+NaClO+H2O,转移的电子数为小于0.1NA,B错误;C.根据反应2Fe+3Cl2$2FeCl3可知,常温常压,5.6gFe在足量${\\text{Cl}}_{\\text{2}}$中燃烧,转移的电子数为$\\frac{\\text{5}\\text{.6g}}{{\\text{56g•mol}}^{\\text{-1}}}{\\text{×3×N}}_{\\text{A}}{\\text{mol}}^{\\text{-1}}$=0.3NA,C错误;" "D.分子总数为NA的${\\text{SO}}_{\\text{2}}$和${\\mathrm{CO}}_{2}$的混合气体中含有的氧原子数为2NA,D错误;故答案为A。", "options": ["所含的电子数为10NA", "常温常压下,与足量的NaOH溶液反应,转移的电子数为0.1NA", "常温常压,5.6gFe在足量中燃烧,转移的电子数为0.2NA", "分子总数为NA的和的混合气体中含有的氧原子数为4NA"] }] one_item = [ {"repeat_res": [], "topic_type_id": 34, "options_rank": 1,"checkType":{"id":2}, "stem": "已知反应:①${\\text{H}}_{\\text{2}}\\text{(g)+}\\frac{\\text{1}}{\\text{2}}{\\text{O}}_{\\text{2}}{\\text{(g)=H}}_{\\text{2}}\\text{O(g)}\\hspace{0.17em}\\hspace{0.17em}\\hspace{0.17em}\\text{ }{\\text{ΔH}}_{\\text{1}}$②$\\frac{\\text{1}}{\\text{2}}{\\text{N}}_{\\text{2}}{\\text{(g)+O}}_{\\text{2}}{\\text{(g)=NO}}_{\\text{2}}\\text{(g)}\\hspace{0.17em}\\hspace{0.17em}\\hspace{0.17em}\\text{ }{\\text{ΔH}}_{\\text{2}}$③$\\frac{\\text{1}}{\\text{2}}{\\text{N}}_{\\text{2}}\\text{(g)+}\\frac{\\text{3}}{\\text{2}}{\\text{H}}_{\\text{2}}\\text{(g)}\\underset{}{\\overset{}{⇌}}{\\text{NH}}_{\\text{3}}\\text{(g)}\\hspace{0.17em}\\hspace{0.17em}\\hspace{0.17em}\\text{ }{\\text{ΔH}}_{\\text{3}}$则反应${\\text{2NH}}_{\\text{3}}\\text{(g)+}\\frac{\\text{7}}{\\text{2}}{\\text{O}}_{\\text{2}}{\\text{(g)=2NO}}_{\\text{2}}{\\text{(g)+3H}}_{\\text{2}}\\text{O(g)}$的$\\text{ΔH}$为", "key": "A", "parse": "", "options": ["锡制器皿长期处在低于13.2℃的环境中,会自行毁坏", "锡在常温下以灰锡状态存在", "灰锡转为白锡的反应是放热反应", "ΔH1>ΔH2"]} ] f1 = open(r"F:\zwj\\text_Structure\new_tiku_structure_v3_sci\struct_items1.pickle",'rb').read() items_list = pickle.loads(f1) # pprint(items_list) f2 = open(r"F:\zwj\\text_Structure\new_tiku_structure_v3_sci\svg_data5-13.pickle", 'rb').read() svg_data = pickle.loads(f2) # print(svg_data["svg_html_data"]) res = Ruku(items_list, "", svg_data, "0213441313131", {"callback_url": "1223", "source": ""}, "高中物理").upload_img() # a,t = Ruku("","","","").get_phy_kps_auto(one_item, 0) print(res)