# 针对新高考中选做题的新模式:1道题包含2个小题(填空题+解答题),
# 需要将这俩个小题单独拆分出来,题号用,例如12-1或12(1)
import re
def toslave_aft(one_item):
"""
将一道题拆分为俩道题
:param one_item: 已经按题号结构化好的单道题目
:return:
"""
# th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
# "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
# con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
# parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
# parse)
if "is_optional" not in one_item or one_item["is_optional"] != 'true':
return one_item
con, ans, parse = one_item["stem"], one_item["key"], one_item["analysis"]
con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "
" + r"\2", con)
parse = re.sub("(<[/a-z]+>)\s*([((]\s*\d\s*[))])", r"\1" + "
" + r"\2", parse)
parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
parse = re.sub(r"
\s*((\s*)+)\s*([((]\s*[12]\s*[))])", r"
\1
\3", parse)
# parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
con = re.sub(r"
\s*((\s*)+)\s*([((]\s*[12]\s*[))])", r"
\1
\3", con)
con_list = re.split(r"(?<=
)\s*[((]\s*[12]\s*[))]", "
" + con)
# print(con_list)
def sub1(ss):
return "(1)" + ss.group(1).replace(" ", "") + " (2)"
ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]",
sub1, ans)
ans_list = re.split("(1)\s*([A-G]+)\s*(2)", ans)
if len(con_list)==3:
con_list = con_list[1:]
if ans == "见解析" or len(ans_list)<=2 or len(ans_list)>3:
ans_list = ["见解析"]*2
else:
ans_list = ans_list[1:]
# 解析
if parse != "略":
parse_list = re.split("(?<=
)\s*[((]\s*[12]\s*[))]", "
" + parse)
if len(parse_list)==3:
parse_list = parse_list[1:]
all_slave = []
for n in range(2):
# if "blank_num" in one_item:
# type = "填空题"
one_slave = {"stem": con_list[n].replace("
", "\n").strip().replace("\n", "
"),
"key": ans_list[n].replace("
", "\n").strip().replace("\n", "
"),
"analysis": parse_list[n].replace("
", "\n").strip().replace("\n", "
"),
"type": "填空题" if not n else "解答题",
"item_id": str(one_item['item_id']) + "-" + str(n+1)
}
all_slave.append(one_slave)
one_item["slave"] = all_slave
del one_item['stem'], one_item['key'], one_item['analysis']
else:
if one_item['text_errmsgs']:
one_item['text_errmsgs'] += ";"
one_item['text_errmsgs'] += "本选做题的两小题格式不正确,请将每小题答案换行处理"
else:
if one_item['text_errmsgs']:
one_item['text_errmsgs'] += ";"
one_item['text_errmsgs'] += "本选做题的两小题格式不正确,请将每小题题干换行处理"
# print(one_item)
return one_item
def toslave_bef(one_item):
"""
将一道题拆分为俩道题
:param one_item: 已经按题号结构化好的单道题目
:return:
"""
# th1 = {"(Ⅰ)": "(1)", "(Ⅱ)": "(2)", "(Ⅲ)": "(3)", "(IV)": "(4)", "(Ⅳ)": "(4)", "(Ⅴ)": "(5)",
# "Ⅰ": "(1)", "Ⅱ": "(2)", "Ⅲ": "(3)", "IV": "(4)", "Ⅳ": "(4)", "Ⅴ": "(5)"}
# con = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)], con)
# parse = re.sub(r"([\n】])\s*[((]\s*(" + "|".join(th1.keys()) + ")\s*[))]", lambda x: x.group(1) + th1[x.group(2)],
# parse)
# print(one_item)
con, ans, parse = one_item["stem"], one_item["key"], one_item["parse"]
con = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1" + "\n" + r"\2", con)
con = re.sub(r"\n\s*((\s*)+)\s*([((]\s*[123]\s*[))])", r"\n\1\n\3", con, flags=re.S)
# con_list = re.split(r"(?<=\n)\s*[((]\s*[123]\s*[))]", "\n" + con)
# print(con_list)
parse = re.sub("(<[/a-z]+>|[((]\s*\d+\s*分\s*[))])\s*([((]\s*\d\s*[))])", r"\1\n\2", parse)
parse = re.sub(r"^\s*(【解[答析]】|解[答析]\s*[::])", "", parse)
parse = re.sub(r"\n\s*((\s*)+)\s*([((]\s*[123]\s*[))])", r"\n\1\n\3", parse)
# parse = re.sub("(答案分?别?[为是]?\s*[::])\s*[((]\s*(\d)\s*[))]", r"\1[#[\2]#]", parse)
# 默认第一题是选择形式的填空题!!!!!
def sub1(ss):
if int(ss.group(1)) in [1, 2] and int(ss.group(4)) - int(ss.group(1)) == 1:
return "({})".format(ss.group(1)) + ss.group(2).replace(" ", "") + "\n" + " ({})".format(ss.group(4))
else:
return ss
ans = re.sub("[((]\s*(\d)\s*[))]\s*(([A-G][、、\s]*)+)\s*[((]\s*(\d)\s*[))]", sub1, ans)
all_slave = []
true_n = 2
if re.search(r"(^|\n)[((]1[))]|(^|\n)[①②]\s*[((].*?(选[修学]|[学考]生).*?[))]", con):
con_list = re.split("\n[((][123][))]", "\n"+con)
if len(con_list) < 2:
con_list = re.split("\n[①②③]\s*[((].*?[选修学考生].*?[))]", "\n" + con)
if re.search(r"\n[((]3[))]", con):
if len(con_list)-1 != 3:
text_errmsgs = "本选做题的小题格式不正确,请将每小题题干换行处理"
one_item['errmsgs'].append(text_errmsgs) # if one_item['errmsgs'] else text_errmsgs
return one_item
else:
true_n = 3
elif len(con_list)-1 != 2:
text_errmsgs = "本选做题的小题格式不正确,请将每小题题干换行处理"
# one_item['errmsgs'] += ";" + text_errmsgs if one_item['errmsgs'] else text_errmsgs
one_item['errmsgs'].append(text_errmsgs)
return one_item
con_list = con_list[1:]
# if len(con_list)-1 == true_n:
# con_list = con_list[1:]
ans_list = re.split("\n[((][123][))]", "\n" + ans)
if "#" in ans:
ans_list = ans.split("#")
else:
if ans == "见解析" or (true_n ==2 and (len(ans_list)<=2 or len(ans_list)>3))\
or (true_n ==3 and (len(ans_list)<=3 or len(ans_list)>4)):
ans_list = ["见解析"]*true_n
else:
ans_list = ans_list[1:]
# 解析
if parse and parse != "略":
parse_list = re.split("(?<=\n)\s*[((]\s*[12]\s*[))]", "\n" + parse, maxsplit=2)
if true_n == 3:
parse_list = re.split("(?<=\n)\s*[((]\s*[123]\s*[))]", "\n" + parse, maxsplit=3)
if len(parse_list) == true_n+1:
parse_list = parse_list[1:]
for n in range(true_n):
item_type = _type_judge(con_list[n], ans_list[n]) # 题型判断
one_slave = {"stem": con_list[n].strip(),
"key": ans_list[n].strip(),
"parse": parse_list[n].strip(),
"type": item_type,
"item_id": str(one_item['item_id']) + "-" + str(n+1),
# "score": 0.0,
"errmsgs": []
}
all_slave.append(one_slave)
else:
if ans_list == ["见解析"]*true_n:
one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题答案和解析换行处理")
else:
one_item["errmsgs"].append("本选做题的小题格式不正确,请将每小题的解析换行处理")
return one_item
else:
errmsgs = ["本题缺少解析"]
if ans_list == ["见解析"]*true_n:
errmsgs.append("本选做题缺少答案或答案格式不正确,请将每小题答案换行处理")
for n in range(true_n):
item_type = _type_judge(con_list[n], ans_list[n])
one_slave = {"stem": con_list[n].strip(),
"key": ans_list[n].strip() if ans_list[n]!="见解析" else "",
"parse": "",
"type": item_type,
"item_id": str(one_item['item_id']) + "-" + str(n + 1),
"score": 0.0,
"errmsgs": errmsgs
}
all_slave.append(one_slave)
else:
text_errmsgs = "本选做题的小题格式不正确,请将第1小题的题干换行处理"
one_item['errmsgs'].append(text_errmsgs)
return one_item
return all_slave
def _type_judge(con, ans):
"""
# 题型判断
:return:
"""
item_type = "解答题"
b1 = re.search(r"选择?对[1一2两3三]个", con)
con0 = re.sub("([A-H]\s*[..、、])+", "A、", "\n" + con)
b21 = len(re.findall(r"[\n\s]\s*[A-H]\s*[..、、]", con0)) >= 3
b22 = len(re.findall(r"[\n\s]\s*[A-H]\s*[..、、]", con0)) >= 5
b31 = re.match(r"[A-H][A-H;;和与、、\s]*?$", ans.strip())
b32 = re.match(r"[A-H][;;和与、、\s][A-H][A-H;;和与、、\s]*?$", ans.strip())
if b1 or b22 or b32:
item_type = "多选题"
elif b31 or b21:
item_type = "单选题"
elif re.findall(r"_{2,}", ans.strip()):
item_type = "填空题"
return item_type
if __name__ == '__main__':
def sub1(ss):
return "(1)" + ss.group(1).replace(" ", "") + "(2)"
ans = "答案:(1)B C E (2)(ⅰ)T0 (ⅱ)"
ans = re.sub("[((]\s*1\s*[))]\s*(([A-G]\s*)+)\s*[((]\s*2\s*[))]",
sub1, ans)
print(ans)