dati2slave.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. import re
  4. from structure.ans_structure import only_parse_split
  5. from structure.option import option_structure
  6. def get_slave(one_item, con, parse, ans, parse_split=1):
  7. """
  8. 带小问的大题 按小问切分
  9. parse_split=1:解析拆
  10. :return:
  11. """
  12. # if re.search(r"[;;]", ans) and len(re.findall(r"[((]\s*\d\s*[))]", con)) > 1:
  13. # 模板要求老师小题题号(1)(2)
  14. th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
  15. "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
  16. con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*([1-9]|1[0-9])\s*[))])", r"\1" + "\n" + r"\2", con)
  17. parse = re.sub("(<[/a-z]+>)\s*([((]\s*([1-9]|1[0-9])\s*[))])", r"\1" + "\n" + r"\2", parse)
  18. # 小题干先按\n(\d)拆,拆不出来时再考虑按其他格式拆
  19. pattern1 = re.compile(r"(?<=[\n::;;。])\s*([((]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[))]"
  20. r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])")
  21. # pattern11 = re.compile(r"\n([((](\d|i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])[))]"
  22. # r"|[①②③④⑤⑥⑦⑧⑨⑩])(?![+-])")
  23. pattern12 = re.compile(r"\n([((](i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])[))]|[①②③④⑤⑥⑦⑧⑨⑩])(?![+-])")
  24. pattern13 = re.compile(r"(?<=[\n::;;。求])\s*([((]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[))]"
  25. r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])")
  26. pattern2 = re.compile(r"((?<=[\n::;;。])\s*|\s{2,})([((]\s*([1-9]|1[0-9])\s*[))])\s*(?!小?[题问]中)")
  27. # pattern22 = re.compile(r"(?<=[\n::;;。求])\s*([((]\s*[1-9]\s*[))])\s*(?!小?[题问]中)|\n\s*[((]\s*\d{2}\s*[))]")
  28. pattern22 = re.compile(r"(?<=[\n::;;。求])\s*([((]\s*([1-9]|1[0-9])\s*[))])\s*(?!小?[题问]中)")
  29. # ①②③④⑤⑥⑦⑧⑨⑩
  30. split_style = 1
  31. if len(re.findall(r"\n\s*[((]\s*\d\s*[))]\s*(?!小?[题问]中)", "\n"+con)) > 1:
  32. con = re.sub(pattern22, "【ⅳ】", "\n" + con)
  33. else:
  34. con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]",
  35. lambda x: x.group(1) + th1[x.group(2)], con)
  36. parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]",
  37. lambda x: x.group(1) + th1[x.group(2)], parse)
  38. if len(re.findall(r"\n\s*[((]\s*\d\s*[))]\s*(?!小?[题问]中)", "\n"+con)) > 1:
  39. con = re.sub(pattern22, "【ⅳ】", "\n" + con)
  40. elif len(re.findall(pattern12, "\n" + con.replace(" ", ""))) > 1:
  41. con = re.sub(pattern13, "【ⅳ】", "\n" + con)
  42. split_style = 2
  43. else:
  44. split_style = 0
  45. if split_style:
  46. con_list = re.split(r"【ⅳ】", con)
  47. # ---------------答案和解析拆分---------------------------------------
  48. # ans_list = [] if ans != "见解析" else "见解析"
  49. ans_list = []
  50. parse_list = []
  51. # syn_list = []
  52. analy_comment = []
  53. # parse_common = ""
  54. raw_ans = ans
  55. ans_summarize = ""
  56. if parse_split: # 解析需拆分
  57. # 答案 不能只用空格隔开,答案序号要与题干序号保持一致
  58. if ans.strip() == "见解析":
  59. ans_list = ["见解析"] * (len(con_list) - 1)
  60. else:
  61. if re.search(r"(\n\s*|\s{2,})[((]\s*\d\s*[))]", "\n" + ans):
  62. if len(re.findall(pattern2, "\n" + ans)) > 1: # 优先按(\d)拆分
  63. ans = re.sub(pattern2, "【ⅳ】", "\n" + ans)
  64. elif len(re.findall(pattern1, "\n"+ans)) > 1:
  65. ans = re.sub(pattern1, "【ⅳ】", "\n" + ans)
  66. elif len(re.findall(pattern12, "\n" + ans.replace(" ", ""))) > 1:
  67. ans = re.sub(pattern1, "【ⅳ】", "\n" + ans)
  68. ans_list.extend(re.split(r"【ⅳ】", ans))
  69. ans_summarize = ans_list[0]
  70. ans_list = ans_list[1:]
  71. while ans_list and not ans_list[0]:
  72. ans_list = ans_list[1:]
  73. if len(con_list) - len(ans_list) != 1:
  74. may_ans_list = re.split(r"[((]\s*\d\s*[))]", raw_ans)
  75. if len(may_ans_list) == len(con_list):
  76. ans_summarize = may_ans_list[0]
  77. ans_list = may_ans_list[1:]
  78. # 解析
  79. if parse:
  80. if re.search('【(详解|解析|解答|分析)】', parse): # 2020-6-10
  81. temp_parse = re.split('【详解】|【解析】|【解答】', parse, maxsplit=1)
  82. if len(temp_parse) == 1: # 无【详解】|【解析】|【解答】字段
  83. temp_parse = re.split('【分析】', parse, maxsplit=1)
  84. parse = "【分析】" + temp_parse[1].strip()
  85. else:
  86. parse = temp_parse[1].strip()
  87. analy_comment.append(temp_parse[0])
  88. if re.search("【(点评|点睛)】", parse):
  89. comment = re.split('(【点评】|【点睛】)', parse, maxsplit=1)
  90. analy_comment.append(comment[-2] + comment[-1])
  91. parse = comment[0]
  92. # \d、(1)xxxx
  93. if "topic_num" in one_item:
  94. one_item["item_id"] = one_item["topic_num"]
  95. if one_item["item_id"]:
  96. other_parse_info = re.search("\n\s*" + str(one_item["item_id"]) + "\s*[、..、]\s*[((]\s*1\s*[))]",
  97. "\n" + parse)
  98. if other_parse_info:
  99. parse = "(1)" + parse[other_parse_info.end():]
  100. analy_comment.append(parse[:other_parse_info.start()])
  101. # ------解析拆分小问------
  102. if len(re.findall(r"[((]\d[))]", parse.replace(" ", ""))) > 1:
  103. parse = re.sub(r"(?<=[\n::;;。])\s*([((]\s*([1-9]|1[0-9])\s*[))])", "【ⅳ】", "\n" + parse)
  104. parse = re.sub(r"(/>|【解】)\s*([((]\s*([1-9]|1[0-9])\s*[))])", r"\1【ⅳ】", parse)
  105. else:
  106. pattern3 = re.compile(r"(/>)\s*([((]\s*(i{1,3}|[ⅰⅱⅲⅳ①②③④⑤⑥⑦⑧⑨⑩])\s*[))]"
  107. r"|[①②③④⑤⑥⑦⑧⑨⑩])\s*(?![+-])")
  108. parse = re.sub(pattern1, "【ⅳ】", "\n"+parse)
  109. parse = re.sub(pattern3, r"\1【ⅳ】", parse)
  110. # 将解析末尾出现的‘故答案为’在成功slave后删掉,容易判断错误
  111. # if re.search('(故|因[而此]|所以)\s*[::]?\s*答案分?别?([为是]|填)?\s*[::]\s*(.+?)(\n|$)', parse):
  112. # ans_s = re.search('(\n.*?|^.*?|<p>)((故|因[而此]|所以)\s*[::]?\s*答案分?别?([为是]|填)?\s*[::]\s*(.+?))(\n|$)', parse)
  113. # # print("ans_s:",ans_s.group(5))
  114. # # print(ans_s.group(0))
  115. # if ans_s.group(5) and ans_s.group(5).count("【ⅳ】") > 1:
  116. # ans_summarize = ans_s.group(2)
  117. # ans_s_index = parse.index(ans_summarize) if ans_s.group(1) == '<p>' or not ans_s.group(1).strip() \
  118. # or ans_s.group(1).strip() is None else parse.index(ans_s.group(1))
  119. # # ans_summarize = [ans_s.group(2), ans_s_index]
  120. # parse = parse.replace(ans_summarize, "")
  121. # # elif ans_s.group(5) and "【ⅳ】" in ans_s.group(5): 小问答案里也可以有“故答案为”
  122. # # aa5 = ans_s.group(5).replace("【ⅳ】", "")
  123. # # parse = parse.replace(ans_s.group(5), aa5)
  124. # -----------------------------------------------
  125. parse = re.sub("(【ⅳ】\s*解答?\s[::])\s*【ⅳ】", r"\1", parse)
  126. little_parse = re.split(r"【ⅳ】", parse)
  127. if len(little_parse) > 1:
  128. # if re.search("\n\s*(【分析】|分析\s*[::]).+?", "\n"+little_parse[0].strip()):
  129. # analy_comment.append(little_parse[0])
  130. if len(little_parse[0].strip()) >= 5:
  131. analy_comment.append(little_parse[0])
  132. parse_list = little_parse[1:]
  133. # print(analy_comment)
  134. # ---------------------------拆分后组合----------------------------------
  135. # 解析不拆分时,小问也要拆; # 小问解析个数与小问一致时才组合的
  136. one_item = split2little_con(con_list, ans_list, parse_list, one_item)
  137. if parse_split and "slave" in one_item: # 解析需拆分
  138. # 里层答案/解析存在时,外层就不需要了
  139. if one_item["slave"][0]["parse"].strip():
  140. one_item['parse'] = "\n".join(analy_comment).strip()
  141. if one_item["slave"][0]["key"].strip():
  142. one_item['key'] = ans_summarize.strip()
  143. else: # 不存在多问时
  144. if re.findall(r"_{2,}", one_item["stem"]):
  145. one_item["blank_num"] = len(re.findall(r"_{2,}", one_item["stem"]))
  146. one_item["answer_type"] = "填空题"
  147. # if parse_split and re.search("^[A-Z]{2,}$", re.sub("\W", "", ans)):
  148. # one_item["type"] = "多选题"
  149. elif len(re.findall(r"[\n\s\u4e00-\u9fa5]\s*[A-D]\s*[..、、]", one_item["stem"])) >= 3: # 增加对选项的拆分处理
  150. one_item = option_structure(one_item, con, ans, 1)
  151. one_item["answer_type"] = "选择题"
  152. if 'options' not in one_item:
  153. one_item["options"] = []
  154. elif re.search("[((]\s+[))]\s*$", one_item["stem"]) or one_item["type"] == "判断题":
  155. one_item["answer_type"] = "判断题"
  156. if re.match("【?(对的?|正确的?|[T√])】?$", one_item["key"].strip()):
  157. one_item["key"] = "正确"
  158. elif re.match("【?(错误?的?|不对的?|不正确的?|[F×])】?$", one_item["key"].strip()):
  159. one_item["key"] = "错误"
  160. elif re.search("[横划画]线处填写", one_item["stem"]) and "com_stem" in one_item:
  161. one_item["answer_type"] = "填空题"
  162. if "com_stem" in one_item:
  163. blank_num = len(re.findall(r"_{2,}", one_item["com_stem"]))
  164. if blank_num > 0:
  165. one_item["blank_num"] = blank_num
  166. else:
  167. one_item["answer_type"] = "解答题"
  168. else:
  169. one_item["answer_type"] = "解答题"
  170. return one_item
  171. def split2little_con(con_list, ans_list, parse_list, one_item):
  172. """
  173. 将按小问切分开的题干、答案、解析 进行 【结构化组合】
  174. :param con_list:切开了小问的题干
  175. :param ans_list:切开了小问的答案
  176. :param parse_list:切开了小问的解析
  177. :param one_item: 初步切开的一道题目
  178. :param is_sub_item: 答案是否按小题号获取(还是按照空的个数获取)的标志
  179. :param ans_summarize: 解析中的综述 [内容,索引]
  180. :return:
  181. """
  182. # print(con_list)
  183. # print(ans_list)
  184. # print(parse_list)
  185. # print('***********************')
  186. if len(con_list) > 1:
  187. if con_list[0] == "": # 说明全是小题,没有总题文
  188. one_item["stem"] = ""
  189. else:
  190. one_item["stem"] = con_list[0]
  191. slave = []
  192. for index, s in enumerate(con_list[1:]): # 以题干拆分为主
  193. blank_num = len(re.findall(r"_{2,}", s))
  194. s = re.sub(r"[((]\d+分[))]", "", s[:9]) + s[9:]
  195. one_slave = {"slave_no": "(%s)" % (index + 1),
  196. "stem": s,
  197. "key": "",
  198. "parse": "",
  199. "answer_type": "解答题",
  200. "errmsgs": [],
  201. }
  202. if len(con_list) - len(parse_list) == 1:
  203. one_slave["parse"] = parse_list[index] # 按索引取解析
  204. if isinstance(ans_list, list) and len(con_list) - len(ans_list) == 1:
  205. one_slave["key"] = ans_list[index]
  206. # 判断小题干是否可以是选择题(含选择题时,作答类型也不一定是选择题)
  207. if len(re.findall(r"[\n\s\u4e00-\u9fa5]\s*[A-D]\s*[..、、]", s)) >= 3:
  208. raw_ans = one_slave["key"]
  209. raw_stem = one_slave["stem"]
  210. one_slave = option_structure(one_slave, s, one_slave["key"], 1, is_slave=1)
  211. one_slave["answer_type"] = "选择题"
  212. if "options" not in one_slave or not one_slave["options"]:
  213. one_slave["key"] = raw_ans # 选择题解析不成功时,答案还原
  214. elif blank_num > 1:
  215. one_slave["answer_type"] = "填空题"
  216. one_slave["key"] = raw_ans
  217. one_slave["stem"] = raw_stem
  218. del one_slave["options"], one_slave["options_rank"]
  219. if "options" not in one_slave or not one_slave["options"]:
  220. if blank_num > 0:
  221. one_slave["blank_num"] = blank_num
  222. one_slave["answer_type"] = "填空题"
  223. elif re.search("[((]\s+[))]\s*$", s):
  224. one_slave["answer_type"] = "判断题"
  225. elif re.search("[横划画]线处填写", s):
  226. one_slave["answer_type"] = "填空题"
  227. blank_num = len(re.findall(r"_{2,}", one_item["stem"]))
  228. if blank_num > 0:
  229. one_slave["blank_num"] = blank_num
  230. else:
  231. one_slave["answer_type"] = "解答题"
  232. # if "errmsgs" in one_slave:
  233. # del one_slave["errmsgs"]
  234. # 对带小题的大题,对每个小题的答案重新再提取一次
  235. # if one_slave["parse"].strip() and (not ans_list or "key" not in one_slave or not one_slave["key"]): # or one_slave["key"] == '见解析'
  236. # new_ans = only_parse_split(one_slave["parse"], one_item["type"], one_slave["stem"],
  237. # reparse_n=2) # 再解析
  238. # if new_ans["key"] == "见解析":
  239. # new_ans["key"] = ""
  240. # if new_ans["key"] and not new_ans["parse"]:
  241. # pass
  242. # else:
  243. # one_slave["key"] = new_ans["key"]
  244. # one_slave["parse"] = new_ans["parse"]
  245. slave.append(one_slave)
  246. one_item["slave"] = slave
  247. one_item["slave_no"] = "1-{}".format(len(slave)) if len(slave) > 1 else "1"
  248. # 有slave的话,就把外面的ans和parse字段给删除掉
  249. # if slave:
  250. # pass
  251. # # del one_item["key"]
  252. # # if parse_list:
  253. # # del one_item["parse"]
  254. # else:
  255. # one_item["stem"] = old_con
  256. # if ans_summarize:
  257. # one_item["parse"] = one_item["parse"][:ans_summarize[1]] + '\n' + ans_summarize[0] + '\n' + one_item["parse"][ans_summarize[1]:]
  258. return one_item