xuanzuoti2slave.py 16 KB


  1. # 针对新高考中选做题的新模式:1道题包含2个小题(填空题+解答题),
  2. # 需要将这俩个小题单独拆分出来,题号用,例如12-1或12(1)
  3. import re
  4. def toslave_aft(one_item):
  5. """
  6. 将一道题拆分为2-3道题
  7. 在wash_after函数之后
  8. :param one_item: 已经按题号结构化好的单道题目
  9. :return:
  10. """
  11. # th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
  12. # "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
  13. # con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
  14. # parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
  15. # parse)
  16. # 要求是选做题
  17. if "is_optional" not in one_item or one_item["is_optional"] != 'true':
  18. return one_item
  19. con, ans, parse = one_item["stem"], one_item["key"], one_item["parse"]
  20. con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "<br/>" + r"\2", con)
  21. parse = re.sub("(<[/a-z]+>)\s*([((]\s*\d\s*[))])", r"\1" + "<br/>" + r"\2", parse)
  22. parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
  23. parse = re.sub(r"<br/>\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"<br/>\1<br/>\3", parse)
  24. # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
  25. con = re.sub(r"<br/>\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"<br/>\1<br/>\3", con)
  26. con_list = re.split(r"(?<=<br/>)\s*[((]\s*[1234]\s*[))]", "<br/>" + con)
  27. # print(con_list)
  28. def sub1(ss):
  29. return "(1)" + ss.group(1).replace(" ", "") + " (2)"
  30. ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]", sub1, ans)
  31. ans_list = re.split("(1)\s*([A-G]+)\s*(2)", ans)
  32. if len(con_list)==3:
  33. con_list = con_list[1:]
  34. if ans == "见解析" or len(ans_list)<=2 or len(ans_list)>3:
  35. ans_list = ["见解析"]*2
  36. else:
  37. ans_list = ans_list[1:]
  38. # 解析
  39. if parse != "略":
  40. parse_list = re.split("(?<=<br/>)\s*[((]\s*[12]\s*[))]", "<br/>" + parse)
  41. if len(parse_list)==3:
  42. parse_list = parse_list[1:]
  43. all_slave = []
  44. for n in range(2):
  45. # if "blank_num" in one_item:
  46. # type = "填空题"
  47. one_slave = {"stem": con_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
  48. "key": ans_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
  49. "parse": parse_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
  50. "type": "填空题" if not n else "解答题",
  51. "item_id": str(one_item['item_id']) + "-" + str(n+1)
  52. }
  53. all_slave.append(one_slave)
  54. one_item["slave"] = all_slave
  55. del one_item['stem'], one_item['key'], one_item['parse']
  56. else:
  57. if one_item['errmsgs']:
  58. one_item['errmsgs'] += ";"
  59. one_item['errmsgs'] += "本选做题的两小题格式不正确,请将每小题答案换行处理"
  60. else:
  61. if one_item['errmsgs']:
  62. one_item['errmsgs'] += ";"
  63. one_item['errmsgs'] += "本选做题的两小题格式不正确,请将每小题题干换行处理"
  64. # print(one_item)
  65. return one_item
  66. def toslave_bef(one_item):
  67. """
  68. 将一道题拆分为2-3道题, 先默认是题组形式
  69. :param one_item: 已经按题号结构化好的单道题目
  70. :return:
  71. """
  72. # th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
  73. # "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
  74. # con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
  75. # parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
  76. # parse)
  77. # print(one_item)
  78. con, ans, parse = one_item["stem"], one_item["key"], one_item["parse"]
  79. con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "\n" + r"\2", con)
  80. con = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[123]\s*[))])", r"\n\1\n\3", con, flags=re.S)
  81. # con_list = re.split(r"(?<=\n)\s*[((]\s*[123]\s*[))]", "\n" + con)
  82. # print(con_list)
  83. parse = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1\n\2", parse)
  84. parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
  85. parse = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[123]\s*[))])", r"\n\1\n\3", parse)
  86. # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
  87. # 默认第一题是选择形式的填空题!!!!!
  88. def sub1(ss):
  89. if int(ss.group(1)) in [1, 2] and int(ss.group(4)) - int(ss.group(1)) == 1:
  90. return "({})".format(ss.group(1)) + ss.group(2).replace(" ", "") + "\n" + " ({})".format(ss.group(4))
  91. else:
  92. return ss
  93. ans = re.sub("[((]\s*(\d)\s*[))]\s*(([A-G][、、\s]*)+)\s*[((]\s*(\d)\s*[))]", sub1, ans)
  94. all_slave = []
  95. true_n = 2
  96. if re.search(r"(^|\n)[((]1[))]|(^|\n)[①②]\s*[((].*?(选[修学考]|[学考]生).*?[))]", con):
  97. con_list = re.split("\n[((][123][))]", "\n"+con)
  98. if len(con_list) < 2:
  99. con_list = re.split("\n[①②③]\s*[((].*?[选修学考生].*?[))]", "\n" + con)
  100. if re.search(r"\n[((]3[))]", con):
  101. if len(con_list)-1 != 3:
  102. text_errmsgs = "本选做题的小题格式不正确,请将每小题题干换行处理"
  103. one_item['errmsgs'].append(text_errmsgs) # if one_item['errmsgs'] else text_errmsgs
  104. return one_item
  105. else:
  106. true_n = 3
  107. elif len(con_list)-1 != 2:
  108. text_errmsgs = "本选做题的小题格式不正确,请将每小题题干换行处理"
  109. # one_item['errmsgs'] += ";" + text_errmsgs if one_item['errmsgs'] else text_errmsgs
  110. one_item['errmsgs'].append(text_errmsgs)
  111. return one_item
  112. con_list = con_list[1:]
  113. # if len(con_list)-1 == true_n:
  114. # con_list = con_list[1:]
  115. ans_list = re.split("\n[((][123][))]", "\n" + ans)
  116. if "#" in ans:
  117. ans_list = ans.split("#")
  118. else:
  119. if ans == "见解析" or (true_n ==2 and (len(ans_list)<=2 or len(ans_list)>3))\
  120. or (true_n ==3 and (len(ans_list)<=3 or len(ans_list)>4)):
  121. ans_list = ["见解析"]*true_n
  122. else:
  123. ans_list = ans_list[1:]
  124. # 解析
  125. if parse and parse != "略":
  126. parse_list = re.split("(?<=\n)\s*[((]\s*[12]\s*[))]", "\n" + parse, maxsplit=2)
  127. if true_n == 3:
  128. parse_list = re.split("(?<=\n)\s*[((]\s*[123]\s*[))]", "\n" + parse, maxsplit=3)
  129. if len(parse_list) == true_n+1:
  130. parse_list = parse_list[1:]
  131. for n in range(true_n):
  132. item_type = _type_judge(con_list[n], ans_list[n]) # 题型判断
  133. one_slave = {"stem": con_list[n].strip(),
  134. "key": ans_list[n].strip(),
  135. "parse": parse_list[n].strip(),
  136. "type": item_type,
  137. "item_id": str(one_item['item_id']) + "-" + str(n+1),
  138. "score": 0.0,
  139. "errmsgs": []
  140. }
  141. all_slave.append(one_slave)
  142. else:
  143. if ans_list == ["见解析"]*true_n:
  144. one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题答案和解析换行处理")
  145. else:
  146. one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题的解析换行处理")
  147. return one_item
  148. else:
  149. errmsgs = ["本题缺少解析"]
  150. if ans_list == ["见解析"]*true_n:
  151. errmsgs.append("本选做题缺少答案或答案格式不正确,请将每小题答案换行处理")
  152. for n in range(true_n):
  153. item_type = _type_judge(con_list[n], ans_list[n])
  154. one_slave = {"stem": con_list[n].strip(),
  155. "key": ans_list[n].strip() if ans_list[n]!="见解析" else "",
  156. "parse": "",
  157. "type": item_type,
  158. "item_id": str(one_item['item_id']) + "-" + str(n + 1),
  159. "score": 0.0,
  160. "errmsgs": errmsgs
  161. }
  162. all_slave.append(one_slave)
  163. else:
  164. text_errmsgs = "本选做题的小题格式不正确,请将第1小题的题干换行处理"
  165. one_item['errmsgs'].append(text_errmsgs)
  166. return one_item
  167. return all_slave
  168. def toslave(one_item, is_need_parse=0):
  169. """
  170. 线上使用的老接口函数
  171. 将一道题拆分为2-3道题, 先默认是题组形式
  172. :param one_item: 已经按题号结构化好的单道题目
  173. :return:
  174. """
  175. # th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
  176. # "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
  177. # con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
  178. # parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
  179. # parse)
  180. # if "is_optional" not in one_item or one_item["is_optional"] != 'true':
  181. if one_item["item_topic_name"] != "选做题":
  182. return [one_item]
  183. con, ans, parse = one_item["content"], one_item["answer"], one_item["parse"]
  184. parse = re.sub("(<[/a-z]+>)\s*([((]\s*\d\s*[))])", r"\1\n\2", parse)
  185. parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
  186. parse = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"\n\1\n\3", parse)
  187. # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
  188. con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1\n\2", con)
  189. con = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"\n\1\n\3", con)
  190. con_list = re.split(r"(?<=\n)\s*[((]\s*[1234]\s*[))]", "\n" + con)
  191. # print(con_list)
  192. ans = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"\n\1\n\3", ans)
  193. # def sub1(ss):
  194. # return "(1)" + ss.group(1).replace(" ", "") + " (2)"
  195. # ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]",
  196. # sub1, ans)
  197. ans_list = re.split(r"\n\s*[((][1234][))]", "\n"+ans)
  198. if len(con_list) not in [3, 4, 5]:
  199. if len(con_list) >= 2:
  200. one_item["errmsgs"].append("本选做题(题组)的小题格式不正确,请将每小题题干换行处理")
  201. else:
  202. return [one_item]
  203. elif "".join(re.findall("\n\s*[((]\s*([1234])\s*[))]", "\n" + con)) not in ['1234', '123', '12']:
  204. one_item["errmsgs"].append("本选做题(题组)的小题题号不连续,请将每小题题干中的题号连续设置")
  205. else: # 以题干的拆分为主
  206. # new_errors = []
  207. for right_num in range(3, 6): # 题组由2-3个小组组成
  208. if len(con_list)==right_num: # 只有一个条件满足
  209. con_list = con_list[1:]
  210. if ans == "见解析" or len(ans_list)<=right_num-1 or len(ans_list)>right_num:
  211. ans_list = ["见解析"]*(right_num-1)
  212. else:
  213. ans_list = ans_list[1:]
  214. all_slave = []
  215. # 解析
  216. if parse and parse != "略":
  217. if "".join(re.findall("\n\s*[((]\s*([1234])\s*[))]", "\n" + parse)) not in ['1234', '123', '12']:
  218. one_item["errmsgs"].append("本选做题(题组)的小题题号不连续,请将每小题解析中的题号连续设置")
  219. return [one_item]
  220. parse_list = re.split("(?<=\n)\s*[((]\s*[1234]\s*[))]", "\n" + parse)
  221. if len(parse_list)==right_num:
  222. parse_list = parse_list[1:]
  223. for n in range(right_num-1):
  224. item_type = _type_judge(con_list[n], ans_list[n]) # 题型判断
  225. one_slave = {"content": con_list[n].strip(),
  226. "answer": ans_list[n].strip(),
  227. "parse": parse_list[n].strip(),
  228. "item_topic_name": item_type,
  229. "item_id": str(one_item['item_id']) + "-" + str(n+1),
  230. "errmsgs": one_item["errmsgs"]
  231. }
  232. all_slave.append(one_slave)
  233. return all_slave
  234. else:
  235. if ans_list == ["见解析"] * (right_num - 1):
  236. one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题答案和解析换行处理")
  237. else:
  238. one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题的解析换行处理")
  239. else: # 无解析
  240. parse_list = [""]*(right_num-1)
  241. if ans_list == ["见解析"]*(right_num-1):
  242. one_item["errmsgs"].append("本选做题缺少答案或答案格式不正确,请将每小题答案换行处理")
  243. if is_need_parse:
  244. one_item["errmsgs"].append("本选做题缺少解析")
  245. else: # 有答案
  246. if "".join(re.findall("\n\s*[((]\s*([1234])\s*[))]", "\n" + ans)) not in ['1234', '123', '12']:
  247. one_item["errmsgs"].append("本选做题(题组)的小题题号不连续,请将每小题答案中的题号连续设置")
  248. return [one_item]
  249. for n in range(right_num-1):
  250. item_type = _type_judge(con_list[n], ans_list[n]) # 题型判断
  251. one_slave = {"content": con_list[n].strip(),
  252. "answer": ans_list[n].strip(),
  253. "parse": parse_list[n].strip(),
  254. "item_topic_name": item_type,
  255. "item_id": str(one_item['item_id']) + "-" + str(n + 1),
  256. "errmsgs": one_item["errmsgs"]
  257. }
  258. all_slave.append(one_slave)
  259. return all_slave
  260. return [one_item]
  261. def _type_judge(con, ans):
  262. """
  263. # 题型判断
  264. :return:
  265. """
  266. item_type = "解答题"
  267. b1 = re.search(r"选择?对[1一2两3三]个", con)
  268. con0 = re.sub("([A-H]\s*[..、、])+", "A、", "\n" + con)
  269. b21 = len(re.findall(r"[\n\s]\s*[A-H]\s*[..、、]", con0)) >= 3
  270. b22 = len(re.findall(r"[\n\s]\s*[A-H]\s*[..、、]", con0)) >= 5
  271. b31 = re.match(r"[A-H][A-H;;和与、、\s]*?$", ans.strip())
  272. b32 = re.match(r"[A-H][;;和与、、\s][A-H][A-H;;和与、、\s]*?$", ans.strip())
  273. if b1 or b22 or b32:
  274. item_type = "多选题"
  275. elif b31 or b21:
  276. item_type = "单选题"
  277. elif re.findall(r"_{2,}", ans.strip()):
  278. item_type = "填空题"
  279. return item_type
  280. if __name__ == '__main__':
  281. def sub1(ss):
  282. return "(1)" + ss.group(1).replace(" ", "") + "(2)"
  283. ans = "答案:(1)B C E (2)(ⅰ)T0 (ⅱ)"
  284. ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]",
  285. sub1, ans)
  286. print(ans)