123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447 |
- #!/usr/bin/env/python
- # -*- coding:utf-8 -*-
- import re
- from pprint import pprint
- from washutil import table_label_cleal, find_seq_num
- # 本文件包含以下函数
- # con_ans_split:将传进来的单道题(含答案或解析)按题干、答案、解析 拆分
- # ans_structure_total:针对答案部分解析结构化汇总
- # ans_structure: 拆分答案,并根据已拆分好的题目item_res 补上答案和解析
- # stem_ans_struc_combine: 题干结构化与答案结构化的合并
- # manyans_oneline_split(item_str, one_type_num):对一行多个答案的情况进行拆分
- # only_parse_split: 拆分出答案和解析,主要针对答案页中的每个题的答案进行拆分
- # get_ans_from_parse: 从已知解析中 挑选 答案
- # def ans_structure_step1(anss, item_type_classify, item_res):
- # """
- # 针对答案部分解析结构化汇总
- # anss : 整个答案部分
- # :return: dd = {'parse': , 'answer': }
- # """
- # anss = [k for k in anss if k.strip()]
- # ans_label = [k for k, a in enumerate(anss) if re.match("【答案】", a.strip())]
- # parse_label = [k for k, a in enumerate(anss) if re.match("【解析】", a.strip())]
- # if len(ans_label) == 1 and len(parse_label) == 1:
- # ans1 = anss[ans_label[0] + 1: parse_label[0]]
- # parse1 = anss[parse_label[0]+1:]
- # res_ans, flag1 = ans_structure_step2(ans1, item_type_classify, item_res,'group_ans')
- # res_parse, flag2 = ans_structure_step2(parse1, item_type_classify, item_res, 'group_parse')
- # if flag1 == flag2 == 1:
- # for idx, item_r in enumerate(item_res):
- # if not res_ans[idx]['answer']:
- # if not res_parse[idx]['answer']:
- # item_res[idx]['answer'] = "见解析"
- # else:
- # item_res[idx]['answer'] = res_parse[idx]['answer']
- # else:
- # item_res[idx]['answer'] = res_ans[idx]['answer']
- #
- # if not res_ans[idx]['parse']:
- # item_res[idx]['parse'] = res_parse[idx]['parse']
- # else: # 解析中的parse肯定有
- # item_res[idx]['parse'] = res_ans[idx]['parse']+"<br/>【解析】"+res_parse[idx]['parse']
- # return item_res
- # elif flag1 == 2:
- # return "【答案】组中题型数量与题目中不一致,请重点检查题目序号,重新手输题目序号"
- # elif flag2 == 2:
- # return "【解析】组中题型数量与题目中不一致,请重点检查题目序号,重新手输题目序号"
- # else:
- # return '【答案】组和【解析】组中题型数量与题目中均不一致,请重点检查题目序号,重新手输题目序号'
- # else:
- # res_ans, flag1 = ans_structure_step2(anss, item_type_classify, item_res)
- # if flag1 == 1:
- # for idx, item_r in enumerate(item_res):
- # item_res[idx]['answer'] = res_ans[idx]['answer']
- # item_res[idx]['parse'] = res_ans[idx]['parse']
- # else:
- # # return "答案中题目数量与题目中不一致,①请重点检查题目序号,重新手输题目序号;②将参考答案开头没用的信息去掉;" \
- # # "③是否有遗漏答案或答案格式不对;④答案中若存在一行多个答案时,保证每个题的答案间要留有多个空格!"
- # return res_ans
- # return item_res
- #
- #
- # def ans_structure_step2(anss, item_type_classify, item_res, *group):
- # """
- # 拆分答案,并根据已拆分好的题目item_res 补上答案和解析
- # 有的答案放在表格里,如选择题、填空题、判断题,有的一行多个答案
- # 思路:1.先按一行没有多个题答案的情况取答案,数量与题干不同 时 >>>> 2.再按一行多个答案的情况取答案:
- # 1)先判断表格,拿到表格的答案;2)一行多个答案
- # anss: 一组按所有不重复题号的答案
- # item_type_classify: 题目中对各题型的统计
- # :return: [{'parse': , 'answer': },{},{}]
- # """
- # while not anss[0]:
- # anss = anss[1:]
- # if re.match(".+?省.+?试[卷题]|[^a-zA-Z]*?【专题】", anss[0]):
- # anss = anss[1:]
- #
- # # 预处理: 对答案部分的题号进行处理, 将(\d)类型的题号改为\d、类型
- # sub_item_no = [int(no[0]+no[2]) for no in
- # re.findall(r'\n\s*([1-9]|[1-4][0-9])\s*[..、、]|\n\s*([1-9]|[1-4][0-9])\s*[..、、].+?\s+([1-9]|[1-4][0-9])\s*[..、、].+?',
- # "\n" + "\n".join(anss))]
- # if len(sub_item_no) <= 2:
- # sub_item_no = [int(no[0]+no[2]) for no in re.findall(r'\n\s*\(([1-9]|[1-4][0-9])\)\s*[..、、]?'
- # r'|\n\s*\(([1-9]|[1-4][0-9])\)\s*[..、、]?.+?\s+\(([1-9]|[1-4][0-9])\)\s*[..、、]?.+?',
- # "\n" + "\n".join(anss))]
- # if len(sub_item_no) > 3:
- # anss = re.sub(r'\n\s*\(([1-9]|[1-4][0-9])\)\s*[..、、]?', "\n" + r"【@\1、", "\n" + "\n".join(anss))
- # anss = re.sub(r'(\n【@([1-9]|[1-4][0-9])、.+?\s+)\(([1-9]|[1-4][0-9])\)\s*[..、、]?', r"\1【@\3、", anss)
- # anss = anss.replace("【@", "").split("\n")[1:]
- #
- # # --------- 一行多个答案的情况----存在一行中有选择题和填空题答案,填空题答案尽量每题占一行----------
- # all_item_ans = []
- # table_ans = []
- # ans_no = []
- # while anss and "table" in anss[0]: # 答案以表格形式呈现, 表格应放在前两行位置,不要插在答案中间
- # row_list = [] # 要求表格形式为 横纵分明 ,不存在合并
- # for tt in re.finditer('<tr>(((?!(</?tr>)).)*)</tr>', anss[0], re.S): # 先划分每行
- # tt_list = re.split(r'</p></td>|<td><p>|</td><td>|</td>|<td>', tt.group(1)) # 再划分每列
- # # row_list.append([col for col in tt_list if col.strip()]) # 也有可能答案为空
- # row_list.append(tt_list)
- # if row_list:
- # print("^^^^^^存在答案放在表格里的情况!^^^^^^^")
- # if len(row_list) % 2 != 0:
- # print('表格形式呈现的答案不是偶数行')
- # else:
- # # print("row_list:", row_list)
- # for k, v in enumerate(row_list):
- # # print('-----',v)
- # if (k + 1) % 2 == 1: # 奇数行==》答案序号行
- # item_no = [int(i) if re.sub(r"[^\d]", "", i) else -1 for i in v]
- # item_no_st = [num for num, i in enumerate(item_no) if i != -1] # 可能开头是-1
- # ans_no.extend([i for i in item_no if i != -1]) # 表格序号
- # table_ans.extend(row_list[k + 1][item_no_st[0]: item_no_st[-1] + 1]) # 表格答案
- # anss = anss[1:]
- # # 先按一行没有多个题答案的情况取答案
- # anss_str = table_label_cleal("\n" + "\n".join(anss))
- # if re.search("<table>.+?</table>", anss_str) is None:
- # anss_str = anss_str.split("</table>")[-1].replace("</div>", "")
- # anss_str = re.sub(r"([A-H])\s*[..](\s*([1-4][0-9]|[1-9])\s*[..、、])", r"\1 \2", anss_str)
- # anss_str = re.sub(r"([;;])(\s*([1-4][0-9]|[1-9])\s*[.、、])", r"\1 \2", anss_str)
- #
- # rest_item_split = re.split(r'\n+\s*[1-4][0-9]\s*[..、、]|\n+\s*[1-9]\s*[..、、]', anss_str)
- # if not rest_item_split[0]:
- # rest_item_split = rest_item_split[1:]
- # all_item_ans.extend(table_ans)
- # all_item_ans.extend(rest_item_split)
- # print("表格答案:", table_ans)
- # pprint(all_item_ans)
- # # ------------先按没有一行多个答案的情况-------------------
- # if item_type_classify and len(all_item_ans) == sum(list(item_type_classify.values())):
- # res1 = []
- # for num1, one_ans in enumerate(all_item_ans):
- # parse = only_parse_split(one_ans, item_res[num1]["item_topic_name"], item_res[num1]['content'])
- # res1.append(parse)
- # return res1, 1
- # elif not item_type_classify and len(all_item_ans) == len(item_res):
- # res1 = []
- # for num1, one_ans in enumerate(all_item_ans):
- # parse = only_parse_split(one_ans, item_res[num1]["item_topic_name"], item_res[num1]['content'])
- # res1.append(parse)
- # return res1, 1
- # else: # 答案个数与题目不一致时,再按一行多个答案处理(题目个数正常,答案个数比题目少时)
- # print('-----存在一行多个答案的情况-----')
- # all_item_ans = []
- # all_item_ans.extend(table_ans)
- # # 再按一行多个答案的情况取答案
- # manyans_oneline_split = re.split(r'\n\s*[1-4][0-9]\s*[..、、]|\n\s*[1-9]\s*[..、、]'
- # r'|(?<![::..、、+\-*/=])\s[1-4][0-9]\s*[..、、]|(?<![::..、、+\-*/=])\s[1-9]\s*[..、、]'
- # r'|\s{2,}[1-4][0-9]\s*[..、、]|\s{2,}[1-9]\s*[..、、]', anss_str)
- #
- # temp_no = re.findall(r'\n\s*([1-4][0-9]|[1-9])\s*[..、、]'
- # r'|(?<![::..、、+\-*/=])\s([1-4][0-9]|[1-9])\s*[..、、]|\s{2,}([1-4][0-9]|[1-9])\s*[..、、]', anss_str)
- # temp_no = [int("".join(i)) for i in temp_no]
- # # print("temp_no:",temp_no)
- # # print('manyans_oneline_split:', manyans_oneline_split, len(manyans_oneline_split))
- # if not temp_no and not all_item_ans: # 没有表格答案的情况,如1~10 ACBBD...
- # row_ans = re.findall("[A-Z](?<!\))", manyans_oneline_split[0].strip())
- # all_item_ans.extend(row_ans)
- # temp_no = re.findall("(\d)-(\d{1,2})", manyans_oneline_split[0])
- # for t in temp_no:
- # ans_no.extend(list(range(int(t[0]), int(t[1])+1)))
- # if row_ans:
- # manyans_oneline_split = []
- # elif temp_no and not manyans_oneline_split[0]:
- # manyans_oneline_split = manyans_oneline_split[1:]
- # ans_no.extend(temp_no)
- # elif re.match("A-Z", manyans_oneline_split[1].strip()) is None and \
- # len(re.findall("[A-Z](?<!\))", manyans_oneline_split[0].strip())) == len(item_res) - (len(manyans_oneline_split)-1):
- # print('第一行答案不是以题号形式一个个给出')
- # row_ans = re.findall("[A-Z](?<!\))", manyans_oneline_split[0].strip())
- # all_item_ans.extend(row_ans)
- # manyans_oneline_split = manyans_oneline_split[1:]
- # if temp_no and temp_no[0] > len(row_ans):
- # ans_no.extend(list(range(temp_no[0]-len(row_ans), temp_no[0])))
- # ans_no.extend(temp_no)
- # else:
- # print("答案序号有问题!!")
- # ans_no.extend(['']*len(row_ans))
- # ans_no.extend(temp_no)
- # # print("manyans_oneline_split:************")
- # # pprint(manyans_oneline_split)
- # print("ans_no:", ans_no)
- # all_item_ans.extend(manyans_oneline_split)
- # combine_res = stem_ans_struc_combine(item_type_classify, item_res, all_item_ans, ans_no, group)
- # # if not combine_res:
- # # return '答案数量与题干数量不一致,请检查题干和答案中的题号,是否有遗漏答案或答案格式不对;' \
- # # '答案中若存在一行多个答案时,保证每个题的答案间要留有多个空格!', 2
- #
- # return combine_res
- # def stem_ans_struc_combine(item_type_classify, item_res, all_item_ans, ans_no, group):
- # """
- # 题干结构化与答案结构化的合并
- # :return:
- # """
- # print("item_type_classify:", item_type_classify)
- # print("题干中的题目数量:", len(item_res))
- # print("答案中的题目数量:", len(all_item_ans))
- # if item_type_classify and len(all_item_ans) == sum(list(item_type_classify.values())):
- # res1 = []
- # for num1, one_ans in enumerate(all_item_ans):
- # parse = only_parse_split(one_ans, item_res[num1]["item_topic_name"], item_res[num1]['content'])
- # res1.append(parse)
- # return res1, 1
- # elif not item_type_classify and len(all_item_ans) == len(item_res):
- # res1 = []
- # for num1, one_ans in enumerate(all_item_ans):
- # parse = only_parse_split(one_ans, item_res[num1]["item_topic_name"], item_res[num1]['content'])
- # res1.append(parse)
- # return res1, 1
- # else:
- # print('答案数量与题干数量不一致,请检查题干和答案中的题号,是否有遗漏答案或答案格式不对;',
- # '答案中若存在一行多个答案时,保证每个题的答案间要留有多个空格!', 2)
- # print("试题个数:", len(item_res))
- # print("答案中的题号:", ans_no)
- # # ----------------------是否正确对上序号还需进一步验证!!!!!!!!!!-------------------------------
- # res1 = []; simp_res = []
- # err_n = 0 # 与题目id没对上号的个数, 默认答案一般也是从前往后排序
- # for k, one_item in enumerate(item_res): # 以题目为主
- # search_range = ans_no
- # if k+3-err_n <= len(ans_no):
- # search_range = ans_no[k-err_n:k+3-err_n]
- # elif k-err_n < len(ans_no):
- # search_range = ans_no[k-err_n:]
- # # print("答案的搜索范围search_range:",search_range)
- # if one_item['item_id'] in search_range: # 在对应位置前
- # ans_no_st = [k1+k-err_n for k1, v1 in enumerate(search_range) if v1 == one_item['item_id']] # 默认取第一个作为对应答案
- # # print("答案的位置{0}:{1}, ----对应题目id:{2}".format(ans_no_st, all_item_ans[ans_no_st[0]],one_item['item_id']))
- # parse = only_parse_split(all_item_ans[ans_no_st[0]], one_item["item_topic_name"], one_item['content'])
- # one_item['answer'] = parse['answer']
- # one_item['parse'] = parse['parse']
- # res1.append(one_item)
- # if group == 'group_ans':
- # simp_res.append({'parse': "", 'answer': parse['answer'],'item_id':one_item['item_id']})
- # if group == 'group_parse':
- # simp_res.append({'parse': parse['parse'], 'answer': parse['answer'],'item_id':one_item['item_id']})
- # else:
- # err_n += 1
- # one_item.update({'parse': "", 'answer': ""})
- # res1.append(one_item)
- # if group:
- # simp_res.append({'parse': '', 'answer': '', 'item_id': one_item['item_id']})
- # if simp_res:
- # return simp_res, 1
- #
- # return res1, 2
- # def one2more_ans_split(item_list, item_type):
- # """
- # 对一行多个答案的情况进行拆分
- # :return:
- # """
- # manyans_oneline_split = []
- # while item_list and \
- # len(re.findall(r"(^|\s+)[1-9][0-9]?\s*[..、、]\s*(【答案】|答案\s*[::]?)?\s*[A-D]", item_list[0])) > 1:
- # print('选择题存在一行多个答案的情况!!!') # 主要以选择题的为主
- #
- # # 处理 1.xxx 2.xxx 3.xxx
- # ans_line1 = item_list[0]
- # if not item_type or item_type.replace("题", "") in ["单选", "多选", "选择", "单项选择", "多项选择"]:
- # ans_line1 = re.sub(r"[^A-D\d..、、()]", "", item_list[0])
- #
- # if re.match(r"[1-9][0-9]?[..、、][A-D]", ans_line1): # 第一个答案为选择题的情况
- # one_ans_split = re.split(r'^\s*[1-9][0-9]?\s*[..、、]|\s+[1-9][0-9]?\s*[..、、]', item_list[0])
- # if not one_ans_split[0]:
- # one_ans_split = one_ans_split[1:]
- # manyans_oneline_split.extend(one_ans_split)
- # else: # 第一个答案为非选择题或没有序号的情况
- # one_ans_split = re.split(r'^\s*[1-9][0-9]?\s*[..、、]|\s+[1-9][0-9]?\s*[..、、]', item_list[0])
- # if not one_ans_split[0]:
- # one_ans_split = one_ans_split[1:]
- # if manyans_oneline_split and not manyans_oneline_split[-1]: # 序号和答案跨行的情况
- # manyans_oneline_split[-1] = one_ans_split[0]
- # manyans_oneline_split.extend(one_ans_split[1:])
- # else:
- # manyans_oneline_split.extend(one_ans_split)
- # item_list = item_list[1:]
- # # if item_list:
- # # ans_line1 = re.sub(r"[^A-D\d..、、]", "", item_list[0])
- #
- # if manyans_oneline_split and not manyans_oneline_split[-1]:
- # print('答案要求:题目序号不在行首时不要与该题答案跨行')
- #
- # # 填空题答案也可能一行多个
- # if item_type == '填空题': # 在题干题型明确时
- # one_type_ans_split = re.split(r'\n\s*[1-9][0-9]?\s*[..、、]|(?<![::])\s+[1-9][0-9]?\s*[..、、](?!png)',
- # table_label_cleal("\n"+"\n".join(item_list)))
- # while not one_type_ans_split[0]:
- # del one_type_ans_split[0]
- # one_type_ans_split.extend(one_type_ans_split)
- # return one_type_ans_split
- # 对item_list剩余文本按题号继续拆分,包括一行多个答案的情况
- # while re.match("\n\s*([1-9][0-9]?)\s*[..、、].+?\s+([1-9][0-9]?)\s*[..、、]", item_list[0]):
- # item_no_seq = re.findall("[\n\s]\s*([1-9][0-9]?)\s*[..、、]", "\n" + "\n".join(item_list))
- # item_no_seq = [int(one) for one in item_no_seq]
- # # print("item_no_seq:",find_seq_num(item_no_seq))
- # if len(find_seq_num(item_no_seq)) == 1:
- # all_type_ans_split = re.split(r'\n\s*[1-9][0-9]?\s*[..、、]|(?<![::])\s+[1-9][0-9]?\s*[..、、](?!png)',
- # table_label_cleal("\n" + "\n".join(item_list)))
- # while not all_type_ans_split[0].strip():
- # del all_type_ans_split[0]
- # manyans_oneline_split.extend(all_type_ans_split)
- # return manyans_oneline_split
- # def get_ans_from_parse(item_parse, item_type, res_con):
- # """
- # 从已知解析中 挑选 答案
- # :param item_parse: 总解析
- # :param item_type: 题型
- # :return:
- # """
- # item_parse = re.split("【点评】|【点睛】", item_parse)[0].strip()
- # # 将解析中末尾出现的图片去掉
- # while re.search('\n\s*<imgsrc\d+\sw_h=(\d+\*\d{3})/>\s*$', item_parse):
- # item_parse = re.sub('\n\s*<imgsrc\d+\sw_h=(\d+\*\d{3})/>\s*$', "", item_parse)
- # item_ans = ""
- # if item_type.replace("题", "") in ["单选", "多选", "选择", "单项选择", "多项选择"]:
- # ans = re.search(r'故选\s*[::]?\s*<imgsrc\d+\sdata-latex="\$?([A-Z;;和与、、\s]+)\$?"/>'
- # r'|故选\s*[::]?\s*([A-Z;;和与、、\s]+)', item_parse.replace("$", ""))
- # if ans:
- # item_ans = ans.group(1) if ans.group(1) is not None else ans.group(2)
- # item_ans = re.sub(r"[.;;.]\s*$", "", item_ans)
- # elif not ans:
- # item_ans = "见解析"
- # elif item_type:
- # ans0 = re.search(r'故选\s*[::]?\s*([A-Z;;和与、、\s]+)[..;;。]?$', item_parse) # 试验题中可能还有选择题
- # ans01 = re.search(r'故选\s*[::]\s*<imgsrc\d+\sdata-latex="\$?([A-Z;;和与、、\s]+)\$?"/>', item_parse) # 选择题的题型可能前面分错
- # ans1 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*(((?!(<img)).)+?)[..]?\s*(\n|$)', item_parse)
- # ans11 = re.search(r'((?<!解)答\s*[::]|整理得\s*[::]?)\s*(.+?)([..;;]?\s*$|[..]\s*\n)', item_parse)
- # ans2 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*(<imgsrc.+?/>)[..]?\s*(\n|$)', item_parse, re.S)
- # ans22 = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*([^∴∵因所故即【】]+?)([..]\s*(\n|$)|$)', item_parse)
- # ans21 = re.search(r'综上所述\s*[::]\s*([^∴∵故因所即【】]+?)[..;;]\s*$', item_parse)
- # ans3 = re.search(r'(故|因[而此]|所以|∴)\s*[::]?.+?[为是填]\s*[::]?\s*([^∴∵故因所即【】]+?)([..;;,,]\s*$|[..]\s*\n)', item_parse)
- # ans31 = re.search(r'(故|因[而此]|所以|∴)\s*([^当为是填∴∵故因所即则【】]+?)[..;;]\s*$', item_parse)
- # ans32 = re.search(r'(故|因[而此]|所以)\s*[::]?[^当为是填∴∵故因所即【】]+?[为是填]\s*[::]?\s*(<imgsrc.+?/>)[..]?\s*(\n|$)',
- # item_parse, re.S)
- # ans4 = re.search(r'\n\s*[==]([^=\n]+?)[..]?\s*$', item_parse)
- # ans42 = re.search(r'[==](?!")(((?!([故=∴即]|原式|因[而此]|所以|\n|=[^"])).)+?)[..]?\s*$', item_parse)
- # ans41 = re.search(r'原式\s*[==].+?[==](?!")(((?!(=|=[^"])).)+?|\s*<imgsrc.+?/>)([..]?\s*$|[..]\s*\n)', item_parse)
- # if not (item_type == '填空题' and len(re.findall(r"_{2,}|_+([^_]*?)_+", res_con)) == 1) and \
- # len(re.findall(r"[((]\d[))]|[\n::;;。】]([((](i{1,3}|[ⅰⅱⅲⅳⅠⅡⅢIV①②③④])[))]|[①②③④]\s*(?![+-]))",
- # item_parse.replace(" ", ""))) > 1 or "证明" in item_parse:
- # item_ans = "见解析"
- # elif ans0:
- # item_ans = ans0.group(1)
- # elif ans01:
- # item_ans = ans01.group(1)
- # elif ans1 or ans11:
- # item_ans = ans1.group(3) if ans1 else ans11.group(2)
- # elif ans2:
- # item_ans = ans2.group(3)
- # elif ans22:
- # item_ans = ans22.group(3)
- # elif ans21:
- # item_ans = ans21.group(1)
- # elif (ans3 or ans31 or ans32) and '证明' not in item_parse:
- # if ans3:
- # item_ans = ans3.group(2)
- # if ans31:
- # item_ans = ans31.group(2)
- # if ans32:
- # item_ans = ans32.group(2)
- # elif (ans4 or ans41 or ans42) and '证明' not in item_parse:
- # if ans4:
- # item_ans = ans4.group(1)
- # if ans41:
- # item_ans = ans41.group(1)
- # if ans42:
- # item_ans = ans42.group(1)
- # else:
- # item_ans = "见解析"
- # return item_ans
- # def con_ans_split(one_item_list, item_topic_name, topic_no):
- # """
- # 将传进来的单道题(含答案或解析)按题干、答案、解析 拆分
- # :param one_item_list: [str,str,str] 每个元素为一行数据, ocr一行行识别, 保留每行数据;如果是word,直接用wordbin拿到html格式进行解析
- # :param item_topic_name:题型
- # :param topic_no:题号
- # :return: one_item:{"content":xxxx,"answer":xxx,"parse":xxx}
- # """
- # pattern1 = re.compile(r"【(.*?)\s+(.*?)】|【(.*?)<imgsrc.+?/>(.*?)】")
- # con0 = re.sub(r"】[::]", "】", "#&#".join(one_item_list)) # 是否换行后面再考虑
- # con0 = re.sub(r"\[来源.*?\]", "", con0).replace("\xa0", " ").replace("\u3000", " ")
- # while re.search(pattern1, con0):
- # con0 = re.sub(pattern1, r"【\1\2】", con0) # 去掉 【】 中的图片和空格
- #
- # big_struc_dict = {"item_topic_name": item_topic_name, "topic_no": topic_no}
- # if re.search("【(答案|[解分][析答]|详解)】", con0):
- # label = re.findall("【(答案|[解分][析答]|详解|点[评睛])】", con0)
- # label_split = re.split(r"【答案】|【[解分][析答]】|【详解】|【点[评睛]】", con0)
- # big_struc_dict1 = dict(zip(label, label_split[1:]))
- # big_struc_dict1['con'] = label_split[0]
- # # 将键值为空的键删掉
- # big_struc_dict.update({k: v for k, v in big_struc_dict1.items() if v.replace("#&#", "").strip() != ""})
- # else:
- # big_struc_dict['con'] = con0
- #
- # return big_struc_dict
- if __name__ == '__main__':
- # oneitem = {'content': '7.化简<img src="files/image7.jpeg" style="width: 28.8pt; height: '
- # '28.8pt" width="38" height="38" />÷<img src="files/image8.jpeg" '
- # 'style="width: 47.95pt; height: 30.7pt" width="64" height="41" '
- # '/>的结果是(\u3000\u3000)\n'
- # 'A.<img src="files/image9.jpeg" style="width: 21.6pt; height: '
- # '28.8pt" width="29" height="38" />B.<img src="files/image10.jpeg" '
- # 'style="width: 21.7pt; height: 26.2pt" width="29" height="35" '
- # '/>C.<img src="files/image11.jpeg" style="width: 21.6pt; height: '
- # '28.8pt" width="29" height="38" />D.<img src="files/image12.jpeg" '
- # 'style="width: 21.7pt; height: 26.2pt" width="29" height="35" />\n'}
- # res = one_item_parse(oneitem, '单选题')
- # oneitem = ["1.下列说法正确的是( )","A.射线是高速运动的电子流","B.氢原子可通过吸收光子实现从低能级向高能级跃迁",
- # "C.太阳辐射能量的主要来源是太阳内部发生的重核裂变","D.的半衰期是3.8天,1克经过7.6天后全部发生衰变",
- # "【答案】B","【解析】","【分析】","【详解】","A.γ射线是电磁波,故A错误;","B.按照波尔理论,氢原子吸收光子后,将从低能级向高能级跃迁,故B正确;",
- # "C.太阳辐射能量的主要来源是太阳中发生的轻核聚变,故C错误;","D.的半衰期是3.8天,7.6天是2个半衰期,根据可知,有发生衰",
- # "变,还剩下克没有衰变,故D错误。","故选B。"]
- # num = 1
- # item_type = '选择题'
- # res = con_ans_split(oneitem, item_type, num)
- # pprint(res)
- one_ans = '故答案为:3.\n'
- parse = only_parse_split(one_ans, "填空题")
- print(parse)
- # a = dict(zip(["2","2","3"], [2,4,6]))
- # print(a)
- con = '【分析】 首先根据分式值为零的条件,可得<img src="999image136.png" style="width: 77.35pt; height: 33.85pt" width="103" height="45" />;然后根据因式分解法解一元二次方程的步骤,求出x的值为多少即可.\n【解答】 解:∵分式<img src="999image137.png" style="width: 57.5pt; height: 31.7pt" width="77" height="42" />的值为0,\n<p>∴<img src="999image138.png" style="width: 77.35pt; height: 33.85pt" width="103" height="45" />\n<p>解得x=3,\n<p>即x的值为3.\n<p>故答案为:3.\n【点评】 (1)此题主要考查了分式值为零的条件'
- con = re.search(r'(故|因[而此]|所以)\s*[::]?\s*(答案分?别?[为是填]?|填)\s*[::]?\s*(((?!(<img)).)+?)[..]?\s*(\n|$)', con)
- print(':',con.group(3))
|