xuanzuoti2slave.py 10 KB


  1. # 针对新高考中选做题的新模式:1道题包含2个小题(填空题+解答题),
  2. # 需要将这俩个小题单独拆分出来,题号用,例如12-1或12(1)
  3. import re
  4. def toslave_aft(one_item):
  5. """
  6. 将一道题拆分为俩道题
  7. :param one_item: 已经按题号结构化好的单道题目
  8. :return:
  9. """
  10. # th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
  11. # "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
  12. # con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
  13. # parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
  14. # parse)
  15. if "is_optional" not in one_item or one_item["is_optional"] != 'true':
  16. return one_item
  17. con, ans, parse = one_item["stem"], one_item["key"], one_item["analysis"]
  18. con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "<br/>" + r"\2", con)
  19. parse = re.sub("(<[/a-z]+>)\s*([((]\s*\d\s*[))])", r"\1" + "<br/>" + r"\2", parse)
  20. parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
  21. parse = re.sub(r"<br/>\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[12]\s*[))])", r"<br/>\1<br/>\3", parse)
  22. # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
  23. con = re.sub(r"<br/>\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[12]\s*[))])", r"<br/>\1<br/>\3", con)
  24. con_list = re.split(r"(?<=<br/>)\s*[((]\s*[12]\s*[))]", "<br/>" + con)
  25. # print(con_list)
  26. def sub1(ss):
  27. return "(1)" + ss.group(1).replace(" ", "") + " (2)"
  28. ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]",
  29. sub1, ans)
  30. ans_list = re.split("(1)\s*([A-G]+)\s*(2)", ans)
  31. if len(con_list)==3:
  32. con_list = con_list[1:]
  33. if ans == "见解析" or len(ans_list)<=2 or len(ans_list)>3:
  34. ans_list = ["见解析"]*2
  35. else:
  36. ans_list = ans_list[1:]
  37. # 解析
  38. if parse != "略":
  39. parse_list = re.split("(?<=<br/>)\s*[((]\s*[12]\s*[))]", "<br/>" + parse)
  40. if len(parse_list)==3:
  41. parse_list = parse_list[1:]
  42. all_slave = []
  43. for n in range(2):
  44. # if "blank_num" in one_item:
  45. # type = "填空题"
  46. one_slave = {"stem": con_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
  47. "key": ans_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
  48. "analysis": parse_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
  49. "type": "填空题" if not n else "解答题",
  50. "item_id": str(one_item['item_id']) + "-" + str(n+1)
  51. }
  52. all_slave.append(one_slave)
  53. one_item["slave"] = all_slave
  54. del one_item['stem'], one_item['key'], one_item['analysis']
  55. else:
  56. if one_item['text_errmsgs']:
  57. one_item['text_errmsgs'] += ";"
  58. one_item['text_errmsgs'] += "本选做题的两小题格式不正确,请将每小题答案换行处理"
  59. else:
  60. if one_item['text_errmsgs']:
  61. one_item['text_errmsgs'] += ";"
  62. one_item['text_errmsgs'] += "本选做题的两小题格式不正确,请将每小题题干换行处理"
  63. # print(one_item)
  64. return one_item
  65. def toslave_bef(one_item):
  66. """
  67. 将一道题拆分为俩道题
  68. :param one_item: 已经按题号结构化好的单道题目
  69. :return:
  70. """
  71. # th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
  72. # "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
  73. # con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
  74. # parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
  75. # parse)
  76. # print(one_item)
  77. con, ans, parse = one_item["stem"], one_item["key"], one_item["parse"]
  78. con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "\n" + r"\2", con)
  79. con = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[123]\s*[))])", r"\n\1\n\3", con, flags=re.S)
  80. # con_list = re.split(r"(?<=\n)\s*[((]\s*[123]\s*[))]", "\n" + con)
  81. # print(con_list)
  82. parse = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1\n\2", parse)
  83. parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
  84. parse = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[123]\s*[))])", r"\n\1\n\3", parse)
  85. # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
  86. # 默认第一题是选择形式的填空题!!!!!
  87. def sub1(ss):
  88. if int(ss.group(1)) in [1, 2] and int(ss.group(4)) - int(ss.group(1)) == 1:
  89. return "({})".format(ss.group(1)) + ss.group(2).replace(" ", "") + "\n" + " ({})".format(ss.group(4))
  90. else:
  91. return ss
  92. ans = re.sub("[((]\s*(\d)\s*[))]\s*(([A-G][、、\s]*)+)\s*[((]\s*(\d)\s*[))]", sub1, ans)
  93. all_slave = []
  94. true_n = 2
  95. if re.search(r"(^|\n)[((]1[))]|(^|\n)[①②]\s*[((].*?(选[修学]|[学考]生).*?[))]", con):
  96. con_list = re.split("\n[((][123][))]", "\n"+con)
  97. if len(con_list) < 2:
  98. con_list = re.split("\n[①②③]\s*[((].*?[选修学考生].*?[))]", "\n" + con)
  99. if re.search(r"\n[((]3[))]", con):
  100. if len(con_list)-1 != 3:
  101. text_errmsgs = "本选做题的小题格式不正确,请将每小题题干换行处理"
  102. one_item['errmsgs'].append(text_errmsgs) # if one_item['errmsgs'] else text_errmsgs
  103. return one_item
  104. else:
  105. true_n = 3
  106. elif len(con_list)-1 != 2:
  107. text_errmsgs = "本选做题的小题格式不正确,请将每小题题干换行处理"
  108. # one_item['errmsgs'] += ";" + text_errmsgs if one_item['errmsgs'] else text_errmsgs
  109. one_item['errmsgs'].append(text_errmsgs)
  110. return one_item
  111. con_list = con_list[1:]
  112. # if len(con_list)-1 == true_n:
  113. # con_list = con_list[1:]
  114. ans_list = re.split("\n[((][123][))]", "\n" + ans)
  115. if "#" in ans:
  116. ans_list = ans.split("#")
  117. else:
  118. if ans == "见解析" or (true_n ==2 and (len(ans_list)<=2 or len(ans_list)>3))\
  119. or (true_n ==3 and (len(ans_list)<=3 or len(ans_list)>4)):
  120. ans_list = ["见解析"]*true_n
  121. else:
  122. ans_list = ans_list[1:]
  123. # 解析
  124. if parse and parse != "略":
  125. parse_list = re.split("(?<=\n)\s*[((]\s*[12]\s*[))]", "\n" + parse, maxsplit=2)
  126. if true_n == 3:
  127. parse_list = re.split("(?<=\n)\s*[((]\s*[123]\s*[))]", "\n" + parse, maxsplit=3)
  128. if len(parse_list) == true_n+1:
  129. parse_list = parse_list[1:]
  130. for n in range(true_n):
  131. item_type = _type_judge(con_list[n], ans_list[n]) # 题型判断
  132. one_slave = {"stem": con_list[n].strip(),
  133. "key": ans_list[n].strip(),
  134. "parse": parse_list[n].strip(),
  135. "type": item_type,
  136. "item_id": str(one_item['item_id']) + "-" + str(n+1),
  137. # "score": 0.0,
  138. "errmsgs": []
  139. }
  140. all_slave.append(one_slave)
  141. else:
  142. if ans_list == ["见解析"]*true_n:
  143. one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题答案和解析换行处理")
  144. else:
  145. one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题的解析换行处理")
  146. return one_item
  147. else:
  148. errmsgs = ["本题缺少解析"]
  149. if ans_list == ["见解析"]*true_n:
  150. errmsgs.append("本选做题缺少答案或答案格式不正确,请将每小题答案换行处理")
  151. for n in range(true_n):
  152. item_type = _type_judge(con_list[n], ans_list[n])
  153. one_slave = {"stem": con_list[n].strip(),
  154. "key": ans_list[n].strip() if ans_list[n]!="见解析" else "",
  155. "parse": "",
  156. "type": item_type,
  157. "item_id": str(one_item['item_id']) + "-" + str(n + 1),
  158. "score": 0.0,
  159. "errmsgs": errmsgs
  160. }
  161. all_slave.append(one_slave)
  162. else:
  163. text_errmsgs = "本选做题的小题格式不正确,请将第1小题的题干换行处理"
  164. one_item['errmsgs'].append(text_errmsgs)
  165. return one_item
  166. return all_slave
  167. def _type_judge(con, ans):
  168. """
  169. # 题型判断
  170. :return:
  171. """
  172. item_type = "解答题"
  173. b1 = re.search(r"选择?对[1一2两3三]个", con)
  174. con0 = re.sub("([A-H]\s*[..、、])+", "A、", "\n" + con)
  175. b21 = len(re.findall(r"[\n\s]\s*[A-H]\s*[..、、]", con0)) >= 3
  176. b22 = len(re.findall(r"[\n\s]\s*[A-H]\s*[..、、]", con0)) >= 5
  177. b31 = re.match(r"[A-H][A-H;;和与、、\s]*?$", ans.strip())
  178. b32 = re.match(r"[A-H][;;和与、、\s][A-H][A-H;;和与、、\s]*?$", ans.strip())
  179. if b1 or b22 or b32:
  180. item_type = "多选题"
  181. elif b31 or b21:
  182. item_type = "单选题"
  183. elif re.findall(r"_{2,}", ans.strip()):
  184. item_type = "填空题"
  185. return item_type
  186. if __name__ == '__main__':
  187. def sub1(ss):
  188. return "(1)" + ss.group(1).replace(" ", "") + "(2)"
  189. ans = "答案:(1)B C E (2)(ⅰ)T0 (ⅱ)"
  190. ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]",
  191. sub1, ans)
  192. print(ans)