#!/usr/bin/env/python # -*- coding:utf-8 -*- import re from pprint import pprint def resplit(item_res): """ 对题目的切分结果,判断是否还有题目切分到一起的情况,并进行继续拆分 :param item_res: :return: """ new_res = [] for k, one_res in enumerate(item_res): p1 = re.search(r"((?<=\n)|(?<=))\s*([1-9]|1[0-9])\s*[..、、].+?([是为有]|等于)[((]\s*[))]", one_res['content'], re.S) # 大题出现2个相同序号如16A、16B p2 = re.compile(r"\n\s*([1-9]|1[0-9])\s*[ABC]\s*[..、、].+?(求|试问|[是为等于]+多少)", re.S) if p1 and one_res['item_topic_name'].replace("题", "") in ['选择', '单选', '多选', '双向选择']: # 主要针对2个题合在一起的情况 con1 = one_res['content'][:p1.start()] con2 = one_res['content'][p1.start():] upletter_num1 = re.findall("[A-E]\s*[..、、]|[A-E]\s*=3 and len(upletter_num2)>=3: new_one = one_res.copy() new_one['item_id'] = int(re.search("^\n*\s*([1-9]|1[0-9])\s*[..、、]", con2).group(1)) one_res['content'] = con1 new_one['content'] = con2 new_res.append(one_res) new_res.append(new_one) else: new_res.append(one_res) elif re.search(p2, one_res['content']) and one_res['item_topic_name'].replace("题", "") not in ['选择', '单选', '多选', '双向选择', '填空']: temp = one_res['content'] split_p = [i.start() for i in re.finditer(p2, one_res['content'])] one_res['content'] = temp[:split_p[0]] new_res.append(one_res) for i, j in zip(split_p, split_p[1:] + [None]): new_one = one_res.copy() new_one['content'] = temp[i:j] new_one['item_id'] = re.search("\n\s*([1-9]\s*[ABC]|1[0-9]\s*[ABC])", new_one['content'][:10]).group(1) new_res.append(new_one) else: new_res.append(one_res) return new_res