""" 单题再解析、结构化 """ import re from structure.option import option_structure from utils.equation_extract import get_simpstr2eqn, get_equation_instr from utils.html_again_parse import css_label_wash from utils.washutil import base642img, css_conflict_deal def single_parse(one_item, item_type, wordid): """ rtype:题型 :return: """ # if re.search("选择|不定选择|多选|单选", rtype): # if "【选项】" not in one_item: # return "请不要将本编辑框自带的“【选项】、【答案】、【解析】”字段删除" # if "【答案】" not in one_item or "【解析】" not in one_item: # return "请不要将本编辑框自带的“【答案】、【解析】”字段删除" print("原始:") print(one_item) one_item = css_label_wash(one_item) one_item = re.sub("| 5: return "编辑后的文本出现多个【答案】或【解析】字段,请删除,每个字段只保留唯一且单独成行" elif len(res_list) < 5: res_list = re.split(r"(【答案】|【解析】)", one_item) if len(res_list) > 5: return "编辑后的文本出现多个【答案】或【解析】字段,请删除,每个字段只保留唯一且单独成行" # elif len(res_list) < 5: # return "编辑后的文本掉了【答案】或【解析】字段,请添加,每个字段保留唯一" new_item_struct = {} new_item_struct["key"] = "" new_item_struct["parse"] = "" new_item_struct["stem"] = res_list[0] if len(res_list) == 5: if res_list[1].strip() == "【答案】" and res_list[3].strip() == "【解析】": new_item_struct["key"] = res_list[2] new_item_struct["parse"] = res_list[4] elif res_list[1].strip() == "【解析】" and res_list[3].strip() == "【答案】": new_item_struct["key"] = res_list[4] new_item_struct["parse"] = res_list[2] else: return "编辑后的文本出现多个【答案】或【解析】字段,请修改,每个字段只保留唯一且单独成行" elif len(res_list) > 2: if res_list[1].strip() == "【答案】": new_item_struct["key"] = res_list[2] elif res_list[1].strip() == "【解析】": new_item_struct["parse"] = res_list[2] # 选项再解析 new_item_struct["errmsgs"] = [] if re.search("选择|不定选择|多选|单选", item_type): new_item_struct = option_structure(new_item_struct, res_list[0], new_item_struct["key"], 1, 1) if new_item_struct["errmsgs"]: return ";;".join(new_item_struct["errmsgs"]) if any([True for i in new_item_struct['options'] if not i.replace(":", "").strip()]): # 空选项中:被当成了内容 return "存在选项为空,请补充完整" item_ids = re.findall("^([1-5][0-9]|[1-9])\s*[..、、]", new_item_struct["stem"].strip()) new_item_struct["stem"] = re.sub("^([1-5][0-9]|[1-9])\s*[..、、]", "", new_item_struct["stem"].strip()) if len(new_item_struct["stem"].strip())<3: return "题干为空,请补充完整" new_item_struct["topic_num"] = item_ids[0] if item_ids else 0 # 换行符替换 new_item_struct["stem"] = new_item_struct["stem"].strip().replace("\n\n", "\n").replace("\n", "
") # 2020/4/10 gai new_item_struct["key"] = new_item_struct["key"].strip().replace("\n\n", "\n").replace("\n", "
") new_item_struct["parse"] = new_item_struct["parse"].strip().replace("\n\n", "\n").replace("\n", "
") if "options" in new_item_struct: # 对选项部分进行格式处理 get_equation_instr for i in range(len(new_item_struct['options'])): new_item_struct['options'][i] = new_item_struct['options'][i].strip().replace("\n\n", "\n")\ .replace("\n", "
") print(new_item_struct) return new_item_struct if __name__ == '__main__': html = ''' 17.为测定干电池的电动势和内阻,提供的实验器材如下所示: A.干电池2节,每节干电池的电动势为1.5 V左右,内阻较小 B.电流表A(量程为0~0.6 A,内阻约0.5 Ω) C.滑动变阻器R1(0~10 Ω,10 A)D.滑动变阻器R2(0~100 Ω,1 A) E.电流表G(0~3.0 mA,Rg=10 Ω)F.定值电阻R1=990 Ω G.定值电阻R2=90 ΩH.开关S和导线若干 (1)由于两节干电池的内阻较小,现将定值电阻R0=3.0 Ω与两节干电池串联后作为一个整体进行测量。在进行实验时,滑动变阻器应选用________,定值电阻应选用________。(填写实验器材前的编号) ''' aa = single_parse(html, "解答题", "6167c1360d2da861d11c950a") print(aa)