item_resplit.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. import re
  4. from pprint import pprint
  5. def resplit(item_res):
  6. """
  7. 对题目的切分结果,判断是否还有题目切分到一起的情况,并进行继续拆分
  8. :param item_res:
  9. :return:
  10. """
  11. new_res = []
  12. for k, one_res in enumerate(item_res):
  13. p1 = re.search(r"((?<=\n)|(?<=</su[pb]>))\s*([1-9]|1[0-9])\s*[..、、].+?([是为有]|等于)[((]\s*[))]", one_res['content'], re.S)
  14. # 大题出现2个相同序号如16A、16B
  15. p2 = re.compile(r"\n\s*([1-9]|1[0-9])\s*[ABC]\s*[..、、].+?(求|试问|[是为等于]+多少)", re.S)
  16. if p1 and one_res['item_topic_name'].replace("题", "") in ['选择', '单选', '多选', '双向选择']: # 主要针对2个题合在一起的情况
  17. con1 = one_res['content'][:p1.start()]
  18. con2 = one_res['content'][p1.start():]
  19. upletter_num1 = re.findall("[A-E]\s*[..、、]|[A-E]\s*<imgsrc", con1)
  20. upletter_num1 = set([re.findall("[A-E]", i)[0] for i in upletter_num1])
  21. upletter_num2 = re.findall("[A-E]\s*[..、、]|[A-E]\s*<imgsrc", con2)
  22. upletter_num2 = set([re.findall("[A-E]", i)[0] for i in upletter_num2])
  23. if len(upletter_num1)>=3 and len(upletter_num2)>=3:
  24. new_one = one_res.copy()
  25. new_one['item_id'] = int(re.search("^\n*\s*([1-9]|1[0-9])\s*[..、、]", con2).group(1))
  26. one_res['content'] = con1
  27. new_one['content'] = con2
  28. new_res.append(one_res)
  29. new_res.append(new_one)
  30. else:
  31. new_res.append(one_res)
  32. elif re.search(p2, one_res['content']) and one_res['item_topic_name'].replace("题", "") not in ['选择', '单选', '多选', '双向选择', '填空']:
  33. temp = one_res['content']
  34. split_p = [i.start() for i in re.finditer(p2, one_res['content'])]
  35. one_res['content'] = temp[:split_p[0]]
  36. new_res.append(one_res)
  37. for i, j in zip(split_p, split_p[1:] + [None]):
  38. new_one = one_res.copy()
  39. new_one['content'] = temp[i:j]
  40. new_one['item_id'] = re.search("\n\s*([1-9]\s*[ABC]|1[0-9]\s*[ABC])", new_one['content'][:10]).group(1)
  41. new_res.append(new_one)
  42. else:
  43. new_res.append(one_res)
  44. return new_res