123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101 |
- """
- 单题再解析、结构化
- """
- import re
- from structure.option import option_structure
- from utils.equation_extract import get_simpstr2eqn, get_equation_instr
- from utils.html_again_parse import css_label_wash
- from utils.washutil import base642img, css_conflict_deal
- def single_parse(one_item, item_type, wordid):
- """
- rtype:题型
- :return:
- """
- # if re.search("选择|不定选择|多选|单选", rtype):
- # if "【选项】" not in one_item:
- # return "请不要将本编辑框自带的“【选项】、【答案】、【解析】”字段删除"
- # if "【答案】" not in one_item or "【解析】" not in one_item:
- # return "请不要将本编辑框自带的“【答案】、【解析】”字段删除"
- print("原始:")
- print(one_item)
- one_item = css_label_wash(one_item)
- one_item = re.sub("</?p>|<h[12345]", "", one_item)
- print("去css:")
- print(one_item)
- one_item = base642img(one_item, wordid)
- one_item = css_conflict_deal(one_item)
- one_item = re.sub(r"\\\(|\\\)", "$", one_item)
- res_list = re.split(r"(\n+【答案】|\n+【解析】)", one_item)
- if len(res_list) > 5:
- return "编辑后的文本出现多个【答案】或【解析】字段,请删除,每个字段只保留唯一且单独成行"
- elif len(res_list) < 5:
- res_list = re.split(r"(【答案】|【解析】)", one_item)
- if len(res_list) > 5:
- return "编辑后的文本出现多个【答案】或【解析】字段,请删除,每个字段只保留唯一且单独成行"
- # elif len(res_list) < 5:
- # return "编辑后的文本掉了【答案】或【解析】字段,请添加,每个字段保留唯一"
- new_item_struct = {}
- new_item_struct["key"] = ""
- new_item_struct["parse"] = ""
- new_item_struct["stem"] = res_list[0]
- if len(res_list) == 5:
- if res_list[1].strip() == "【答案】" and res_list[3].strip() == "【解析】":
- new_item_struct["key"] = res_list[2]
- new_item_struct["parse"] = res_list[4]
- elif res_list[1].strip() == "【解析】" and res_list[3].strip() == "【答案】":
- new_item_struct["key"] = res_list[4]
- new_item_struct["parse"] = res_list[2]
- else:
- return "编辑后的文本出现多个【答案】或【解析】字段,请修改,每个字段只保留唯一且单独成行"
- elif len(res_list) > 2:
- if res_list[1].strip() == "【答案】":
- new_item_struct["key"] = res_list[2]
- elif res_list[1].strip() == "【解析】":
- new_item_struct["parse"] = res_list[2]
- # 选项再解析
- new_item_struct["errmsgs"] = []
- if re.search("选择|不定选择|多选|单选", item_type):
- new_item_struct = option_structure(new_item_struct, res_list[0], new_item_struct["key"], 1, 1)
- if new_item_struct["errmsgs"]:
- return ";;".join(new_item_struct["errmsgs"])
- if any([True for i in new_item_struct['options'] if not i.replace(":", "").strip()]): # 空选项中:被当成了内容
- return "存在选项为空,请补充完整"
- item_ids = re.findall("^([1-5][0-9]|[1-9])\s*[..、、]", new_item_struct["stem"].strip())
- new_item_struct["stem"] = re.sub("^([1-5][0-9]|[1-9])\s*[..、、]", "", new_item_struct["stem"].strip())
- if len(new_item_struct["stem"].strip())<3:
- return "题干为空,请补充完整"
- new_item_struct["topic_num"] = item_ids[0] if item_ids else 0
- # 换行符替换
- new_item_struct["stem"] = new_item_struct["stem"].strip().replace("\n\n", "\n").replace("\n", "<br/>") # 2020/4/10 gai
- new_item_struct["key"] = new_item_struct["key"].strip().replace("\n\n", "\n").replace("\n", "<br/>")
- new_item_struct["parse"] = new_item_struct["parse"].strip().replace("\n\n", "\n").replace("\n", "<br/>")
- if "options" in new_item_struct: # 对选项部分进行格式处理 get_equation_instr
- for i in range(len(new_item_struct['options'])):
- new_item_struct['options'][i] = new_item_struct['options'][i].strip().replace("\n\n", "\n")\
- .replace("\n", "<br/>")
- print(new_item_struct)
- return new_item_struct
- if __name__ == '__main__':
- html = '''
- 17.为测定干电池的电动势和内阻,提供的实验器材如下所示:
- A.干电池2节,每节干电池的电动势为1.5 V<img src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zyk/uploadfiles/wording/6167c1360d2da861d11c950a/image34.png" width="26px" height="26px">左右,内阻较小
- B.电流表A(量程为0~0.6 A,内阻约0.5 Ω)
- C.滑动变阻器R1(0~10 Ω,10 A)D.滑动变阻器R2(0~100 Ω,1 A)
- E.电流表G(0~3.0 mA,Rg=10 Ω)F.定值电阻R1=990 Ω
- G.定值电阻R2=90 ΩH.开关S和导线若干
- (1)由于两节干电池的内阻较小,现将定值电阻R0=3.0 Ω与两节干电池串联后作为一个整体进行测量。在进行实验时,滑动变阻器应选用________,定值电阻应选用________。(填写实验器材前的编号)
- '''
- aa = single_parse(html, "解答题", "6167c1360d2da861d11c950a")
- print(aa)
|