123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313 |
- # 针对新高考中选做题的新模式:1道题包含2个小题(填空题+解答题),
- # 需要将这俩个小题单独拆分出来,题号用,例如12-1或12(1)
- import re
- def toslave_aft(one_item):
- """
- 将一道题拆分为2-3道题
- 在wash_after函数之后
- :param one_item: 已经按题号结构化好的单道题目
- :return:
- """
- # th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
- # "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
- # con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
- # parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
- # parse)
- # 要求是选做题
- if "is_optional" not in one_item or one_item["is_optional"] != 'true':
- return one_item
- con, ans, parse = one_item["stem"], one_item["key"], one_item["parse"]
- con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "<br/>" + r"\2", con)
- parse = re.sub("(<[/a-z]+>)\s*([((]\s*\d\s*[))])", r"\1" + "<br/>" + r"\2", parse)
- parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
- parse = re.sub(r"<br/>\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"<br/>\1<br/>\3", parse)
- # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
- con = re.sub(r"<br/>\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"<br/>\1<br/>\3", con)
- con_list = re.split(r"(?<=<br/>)\s*[((]\s*[1234]\s*[))]", "<br/>" + con)
- # print(con_list)
- def sub1(ss):
- return "(1)" + ss.group(1).replace(" ", "") + " (2)"
- ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]", sub1, ans)
- ans_list = re.split("(1)\s*([A-G]+)\s*(2)", ans)
- if len(con_list)==3:
- con_list = con_list[1:]
- if ans == "见解析" or len(ans_list)<=2 or len(ans_list)>3:
- ans_list = ["见解析"]*2
- else:
- ans_list = ans_list[1:]
- # 解析
- if parse != "略":
- parse_list = re.split("(?<=<br/>)\s*[((]\s*[12]\s*[))]", "<br/>" + parse)
- if len(parse_list)==3:
- parse_list = parse_list[1:]
- all_slave = []
- for n in range(2):
- # if "blank_num" in one_item:
- # type = "填空题"
- one_slave = {"stem": con_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
- "key": ans_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
- "parse": parse_list[n].replace("<br/>", "\n").strip().replace("\n", "<br/>"),
- "type": "填空题" if not n else "解答题",
- "item_id": str(one_item['item_id']) + "-" + str(n+1)
- }
- all_slave.append(one_slave)
- one_item["slave"] = all_slave
- del one_item['stem'], one_item['key'], one_item['parse']
- else:
- if one_item['errmsgs']:
- one_item['errmsgs'] += ";"
- one_item['errmsgs'] += "本选做题的两小题格式不正确,请将每小题答案换行处理"
- else:
- if one_item['errmsgs']:
- one_item['errmsgs'] += ";"
- one_item['errmsgs'] += "本选做题的两小题格式不正确,请将每小题题干换行处理"
- # print(one_item)
- return one_item
- def toslave_bef(one_item):
- """
- 将一道题拆分为2-3道题, 先默认是题组形式
- :param one_item: 已经按题号结构化好的单道题目
- :return:
- """
- # th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
- # "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
- # con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
- # parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
- # parse)
- # print(one_item)
- con, ans, parse = one_item["stem"], one_item["key"], one_item["parse"]
- con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "\n" + r"\2", con)
- con = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[123]\s*[))])", r"\n\1\n\3", con, flags=re.S)
- # con_list = re.split(r"(?<=\n)\s*[((]\s*[123]\s*[))]", "\n" + con)
- # print(con_list)
- parse = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1\n\2", parse)
- parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
- parse = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[123]\s*[))])", r"\n\1\n\3", parse)
- # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
- # 默认第一题是选择形式的填空题!!!!!
- def sub1(ss):
- if int(ss.group(1)) in [1, 2] and int(ss.group(4)) - int(ss.group(1)) == 1:
- return "({})".format(ss.group(1)) + ss.group(2).replace(" ", "") + "\n" + " ({})".format(ss.group(4))
- else:
- return ss
- ans = re.sub("[((]\s*(\d)\s*[))]\s*(([A-G][、、\s]*)+)\s*[((]\s*(\d)\s*[))]", sub1, ans)
- all_slave = []
- true_n = 2
- if re.search(r"(^|\n)[((]1[))]|(^|\n)[①②]\s*[((].*?(选[修学考]|[学考]生).*?[))]", con):
- con_list = re.split("\n[((][123][))]", "\n"+con)
- if len(con_list) < 2:
- con_list = re.split("\n[①②③]\s*[((].*?[选修学考生].*?[))]", "\n" + con)
- if re.search(r"\n[((]3[))]", con):
- if len(con_list)-1 != 3:
- text_errmsgs = "本选做题的小题格式不正确,请将每小题题干换行处理"
- one_item['errmsgs'].append(text_errmsgs) # if one_item['errmsgs'] else text_errmsgs
- return one_item
- else:
- true_n = 3
- elif len(con_list)-1 != 2:
- text_errmsgs = "本选做题的小题格式不正确,请将每小题题干换行处理"
- # one_item['errmsgs'] += ";" + text_errmsgs if one_item['errmsgs'] else text_errmsgs
- one_item['errmsgs'].append(text_errmsgs)
- return one_item
- con_list = con_list[1:]
- # if len(con_list)-1 == true_n:
- # con_list = con_list[1:]
- ans_list = re.split("\n[((][123][))]", "\n" + ans)
- if "#" in ans:
- ans_list = ans.split("#")
- else:
- if ans == "见解析" or (true_n ==2 and (len(ans_list)<=2 or len(ans_list)>3))\
- or (true_n ==3 and (len(ans_list)<=3 or len(ans_list)>4)):
- ans_list = ["见解析"]*true_n
- else:
- ans_list = ans_list[1:]
- # 解析
- if parse and parse != "略":
- parse_list = re.split("(?<=\n)\s*[((]\s*[12]\s*[))]", "\n" + parse, maxsplit=2)
- if true_n == 3:
- parse_list = re.split("(?<=\n)\s*[((]\s*[123]\s*[))]", "\n" + parse, maxsplit=3)
- if len(parse_list) == true_n+1:
- parse_list = parse_list[1:]
- for n in range(true_n):
- item_type = _type_judge(con_list[n], ans_list[n]) # 题型判断
- one_slave = {"stem": con_list[n].strip(),
- "key": ans_list[n].strip(),
- "parse": parse_list[n].strip(),
- "type": item_type,
- "item_id": str(one_item['item_id']) + "-" + str(n+1),
- "score": 0.0,
- "errmsgs": []
- }
- all_slave.append(one_slave)
- else:
- if ans_list == ["见解析"]*true_n:
- one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题答案和解析换行处理")
- else:
- one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题的解析换行处理")
- return one_item
- else:
- errmsgs = ["本题缺少解析"]
- if ans_list == ["见解析"]*true_n:
- errmsgs.append("本选做题缺少答案或答案格式不正确,请将每小题答案换行处理")
- for n in range(true_n):
- item_type = _type_judge(con_list[n], ans_list[n])
- one_slave = {"stem": con_list[n].strip(),
- "key": ans_list[n].strip() if ans_list[n]!="见解析" else "",
- "parse": "",
- "type": item_type,
- "item_id": str(one_item['item_id']) + "-" + str(n + 1),
- "score": 0.0,
- "errmsgs": errmsgs
- }
- all_slave.append(one_slave)
- else:
- text_errmsgs = "本选做题的小题格式不正确,请将第1小题的题干换行处理"
- one_item['errmsgs'].append(text_errmsgs)
- return one_item
- return all_slave
- def toslave(one_item, is_need_parse=0):
- """
- 线上使用的老接口函数
- 将一道题拆分为2-3道题, 先默认是题组形式
- :param one_item: 已经按题号结构化好的单道题目
- :return:
- """
- # th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
- # "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
- # con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
- # parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
- # parse)
- # if "is_optional" not in one_item or one_item["is_optional"] != 'true':
- if one_item["item_topic_name"] != "选做题":
- return [one_item]
- con, ans, parse = one_item["content"], one_item["answer"], one_item["parse"]
- parse = re.sub("(<[/a-z]+>)\s*([((]\s*\d\s*[))])", r"\1\n\2", parse)
- parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
- parse = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"\n\1\n\3", parse)
- # parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
- con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1\n\2", con)
- con = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"\n\1\n\3", con)
- con_list = re.split(r"(?<=\n)\s*[((]\s*[1234]\s*[))]", "\n" + con)
- # print(con_list)
- ans = re.sub(r"\n\s*((<img src=.*?[/\"]>\s*)+)\s*([((]\s*[1234]\s*[))])", r"\n\1\n\3", ans)
- # def sub1(ss):
- # return "(1)" + ss.group(1).replace(" ", "") + " (2)"
- # ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]",
- # sub1, ans)
- ans_list = re.split(r"\n\s*[((][1234][))]", "\n"+ans)
- if len(con_list) not in [3, 4, 5]:
- if len(con_list) >= 2:
- one_item["errmsgs"].append("本选做题(题组)的小题格式不正确,请将每小题题干换行处理")
- else:
- return [one_item]
- elif "".join(re.findall("\n\s*[((]\s*([1234])\s*[))]", "\n" + con)) not in ['1234', '123', '12']:
- one_item["errmsgs"].append("本选做题(题组)的小题题号不连续,请将每小题题干中的题号连续设置")
- else: # 以题干的拆分为主
- # new_errors = []
- for right_num in range(3, 6): # 题组由2-3个小组组成
- if len(con_list)==right_num: # 只有一个条件满足
- con_list = con_list[1:]
- if ans == "见解析" or len(ans_list)<=right_num-1 or len(ans_list)>right_num:
- ans_list = ["见解析"]*(right_num-1)
- else:
- ans_list = ans_list[1:]
- all_slave = []
- # 解析
- if parse and parse != "略":
- if "".join(re.findall("\n\s*[((]\s*([1234])\s*[))]", "\n" + parse)) not in ['1234', '123', '12']:
- one_item["errmsgs"].append("本选做题(题组)的小题题号不连续,请将每小题解析中的题号连续设置")
- return [one_item]
- parse_list = re.split("(?<=\n)\s*[((]\s*[1234]\s*[))]", "\n" + parse)
- if len(parse_list)==right_num:
- parse_list = parse_list[1:]
- for n in range(right_num-1):
- item_type = _type_judge(con_list[n], ans_list[n]) # 题型判断
- one_slave = {"content": con_list[n].strip(),
- "answer": ans_list[n].strip(),
- "parse": parse_list[n].strip(),
- "item_topic_name": item_type,
- "item_id": str(one_item['item_id']) + "-" + str(n+1),
- "errmsgs": one_item["errmsgs"]
- }
- all_slave.append(one_slave)
- return all_slave
- else:
- if ans_list == ["见解析"] * (right_num - 1):
- one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题答案和解析换行处理")
- else:
- one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题的解析换行处理")
- else: # 无解析
- parse_list = [""]*(right_num-1)
- if ans_list == ["见解析"]*(right_num-1):
- one_item["errmsgs"].append("本选做题缺少答案或答案格式不正确,请将每小题答案换行处理")
- if is_need_parse:
- one_item["errmsgs"].append("本选做题缺少解析")
- else: # 有答案
- if "".join(re.findall("\n\s*[((]\s*([1234])\s*[))]", "\n" + ans)) not in ['1234', '123', '12']:
- one_item["errmsgs"].append("本选做题(题组)的小题题号不连续,请将每小题答案中的题号连续设置")
- return [one_item]
- for n in range(right_num-1):
- item_type = _type_judge(con_list[n], ans_list[n]) # 题型判断
- one_slave = {"content": con_list[n].strip(),
- "answer": ans_list[n].strip(),
- "parse": parse_list[n].strip(),
- "item_topic_name": item_type,
- "item_id": str(one_item['item_id']) + "-" + str(n + 1),
- "errmsgs": one_item["errmsgs"]
- }
- all_slave.append(one_slave)
- return all_slave
- return [one_item]
- def _type_judge(con, ans):
- """
- # 题型判断
- :return:
- """
- item_type = "解答题"
- b1 = re.search(r"选择?对[1一2两3三]个", con)
- con0 = re.sub("([A-H]\s*[..、、])+", "A、", "\n" + con)
- b21 = len(re.findall(r"[\n\s]\s*[A-H]\s*[..、、]", con0)) >= 3
- b22 = len(re.findall(r"[\n\s]\s*[A-H]\s*[..、、]", con0)) >= 5
- b31 = re.match(r"[A-H][A-H;;和与、、\s]*?$", ans.strip())
- b32 = re.match(r"[A-H][;;和与、、\s][A-H][A-H;;和与、、\s]*?$", ans.strip())
- if b1 or b22 or b32:
- item_type = "多选题"
- elif b31 or b21:
- item_type = "单选题"
- elif re.findall(r"_{2,}", ans.strip()):
- item_type = "填空题"
- return item_type
- if __name__ == '__main__':
- def sub1(ss):
- return "(1)" + ss.group(1).replace(" ", "") + "(2)"
- ans = "答案:(1)B C E (2)(ⅰ)T0 (ⅱ)"
- ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]",
- sub1, ans)
- print(ans)
|