danti_structure.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. """
  2. 单题再解析、结构化
  3. """
  4. import re
  5. from structure.option import option_structure
  6. from utils.equation_extract import get_simpstr2eqn, get_equation_instr
  7. from utils.html_again_parse import css_label_wash
  8. from utils.washutil import base642img, css_conflict_deal
  9. def single_parse(one_item, item_type, wordid):
  10. """
  11. rtype:题型
  12. :return:
  13. """
  14. # if re.search("选择|不定选择|多选|单选", rtype):
  15. # if "【选项】" not in one_item:
  16. # return "请不要将本编辑框自带的“【选项】、【答案】、【解析】”字段删除"
  17. # if "【答案】" not in one_item or "【解析】" not in one_item:
  18. # return "请不要将本编辑框自带的“【答案】、【解析】”字段删除"
  19. print("原始:")
  20. print(one_item)
  21. one_item = css_label_wash(one_item)
  22. one_item = re.sub("</?p>|<h[12345]", "", one_item)
  23. print("去css:")
  24. print(one_item)
  25. one_item = base642img(one_item, wordid)
  26. one_item = css_conflict_deal(one_item)
  27. one_item = re.sub(r"\\\(|\\\)", "$", one_item)
  28. res_list = re.split(r"(\n+【答案】|\n+【解析】)", one_item)
  29. if len(res_list) > 5:
  30. return "编辑后的文本出现多个【答案】或【解析】字段,请删除,每个字段只保留唯一且单独成行"
  31. elif len(res_list) < 5:
  32. res_list = re.split(r"(【答案】|【解析】)", one_item)
  33. if len(res_list) > 5:
  34. return "编辑后的文本出现多个【答案】或【解析】字段,请删除,每个字段只保留唯一且单独成行"
  35. # elif len(res_list) < 5:
  36. # return "编辑后的文本掉了【答案】或【解析】字段,请添加,每个字段保留唯一"
  37. new_item_struct = {}
  38. new_item_struct["key"] = ""
  39. new_item_struct["parse"] = ""
  40. new_item_struct["stem"] = res_list[0]
  41. if len(res_list) == 5:
  42. if res_list[1].strip() == "【答案】" and res_list[3].strip() == "【解析】":
  43. new_item_struct["key"] = res_list[2]
  44. new_item_struct["parse"] = res_list[4]
  45. elif res_list[1].strip() == "【解析】" and res_list[3].strip() == "【答案】":
  46. new_item_struct["key"] = res_list[4]
  47. new_item_struct["parse"] = res_list[2]
  48. else:
  49. return "编辑后的文本出现多个【答案】或【解析】字段,请修改,每个字段只保留唯一且单独成行"
  50. elif len(res_list) > 2:
  51. if res_list[1].strip() == "【答案】":
  52. new_item_struct["key"] = res_list[2]
  53. elif res_list[1].strip() == "【解析】":
  54. new_item_struct["parse"] = res_list[2]
  55. # 选项再解析
  56. new_item_struct["errmsgs"] = []
  57. if re.search("选择|不定选择|多选|单选", item_type):
  58. new_item_struct = option_structure(new_item_struct, res_list[0], new_item_struct["key"], 1, 1)
  59. if new_item_struct["errmsgs"]:
  60. return ";;".join(new_item_struct["errmsgs"])
  61. if any([True for i in new_item_struct['options'] if not i.replace(":", "").strip()]): # 空选项中:被当成了内容
  62. return "存在选项为空,请补充完整"
  63. item_ids = re.findall("^([1-5][0-9]|[1-9])\s*[..、、]", new_item_struct["stem"].strip())
  64. new_item_struct["stem"] = re.sub("^([1-5][0-9]|[1-9])\s*[..、、]", "", new_item_struct["stem"].strip())
  65. if len(new_item_struct["stem"].strip())<3:
  66. return "题干为空,请补充完整"
  67. new_item_struct["topic_num"] = item_ids[0] if item_ids else 0
  68. # 换行符替换
  69. new_item_struct["stem"] = new_item_struct["stem"].strip().replace("\n\n", "\n").replace("\n", "<br/>") # 2020/4/10 gai
  70. new_item_struct["key"] = new_item_struct["key"].strip().replace("\n\n", "\n").replace("\n", "<br/>")
  71. new_item_struct["parse"] = new_item_struct["parse"].strip().replace("\n\n", "\n").replace("\n", "<br/>")
  72. if "options" in new_item_struct: # 对选项部分进行格式处理 get_equation_instr
  73. for i in range(len(new_item_struct['options'])):
  74. new_item_struct['options'][i] = new_item_struct['options'][i].strip().replace("\n\n", "\n")\
  75. .replace("\n", "<br/>")
  76. print(new_item_struct)
  77. return new_item_struct
  78. if __name__ == '__main__':
  79. html = '''
  80. 17.为测定干电池的电动势和内阻,提供的实验器材如下所示:
  81. A.干电池2节,每节干电池的电动势为1.5 V<img src="http://zxhx-1302712961.cos.ap-shanghai.myqcloud.com/zyk/uploadfiles/wording/6167c1360d2da861d11c950a/image34.png" width="26px" height="26px">左右,内阻较小
  82. B.电流表A(量程为0~0.6 A,内阻约0.5 Ω)
  83. C.滑动变阻器R1(0~10 Ω,10 A)D.滑动变阻器R2(0~100 Ω,1 A)
  84. E.电流表G(0~3.0 mA,Rg=10 Ω)F.定值电阻R1=990 Ω
  85. G.定值电阻R2=90 ΩH.开关S和导线若干
  86. (1)由于两节干电池的内阻较小,现将定值电阻R0=3.0 Ω与两节干电池串联后作为一个整体进行测量。在进行实验时,滑动变阻器应选用________,定值电阻应选用________。(填写实验器材前的编号)
  87. '''
  88. aa = single_parse(html, "解答题", "6167c1360d2da861d11c950a")
  89. print(aa)