stems_to_groups.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. #!/usr/bin/env/python
  2. # -*- coding:utf-8 -*-
  3. import re
  4. def regroup(res_list, item_groups, ans_groups):
  5. """
  6. 将多个题共用一个题干的情况进行slave重组,如地理
  7. :param res_list:
  8. :param item_groups:
  9. :param ans_groups:
  10. :return:
  11. """
  12. new_res_dict = []
  13. one_group = {}
  14. groups_data = item_groups["groups_data"]
  15. start_no = list(groups_data.keys())
  16. start_no.sort() # 排序
  17. def takefirst(elem):
  18. return int(elem.split("-")[0])
  19. ans_start_no = []
  20. if ans_groups:
  21. ans_start_no = list(ans_groups.keys())
  22. ans_start_no.sort(key=takefirst) # 排序
  23. added_nos = [] # 已经slave了的题号
  24. for n, group_no in enumerate(start_no):
  25. one_group["com_stem"] = res_list[group_no]["com_stem"]
  26. del res_list[group_no]["com_stem"]
  27. if groups_data[group_no]:
  28. st, end = groups_data[group_no].split("-")
  29. if not added_nos: # 开始
  30. if start_no[n + 1] <= int(end): # 公共题文中的结束序号信息有误
  31. one_group["slave"] = res_list[int(st) - 1: start_no[n + 1]]
  32. added_nos.append(start_no[n + 1])
  33. else:
  34. added_nos.append(int(end))
  35. one_group["slave"] = res_list[int(st) - 1:int(end)]
  36. elif int(st) <= added_nos[-1]: # 公共题文中的初始序号信息有误
  37. if n + 1 < len(start_no): # 不是最后一组
  38. if int(end) < start_no[n + 1]:
  39. one_group["slave"] = res_list[added_nos[-1]:int(end)]
  40. added_nos.append(int(end))
  41. else: # 结束序号有误,以pos为主
  42. one_group["slave"] = res_list[added_nos[-1]: start_no[n + 1]]
  43. added_nos.append(start_no[n + 1])
  44. else:
  45. if int(end) >= added_nos[-1]:
  46. one_group["slave"] = res_list[added_nos[-1]:int(end)]
  47. added_nos.append(int(end))
  48. else: # end值出错
  49. if str(group_no+1) + "-" in "#".join(ans_groups.keys()):
  50. end = re.search("[^#]{}-(\d+)[$#]".format(group_no), "#".join(ans_groups.keys())).group(1)
  51. one_group["slave"] = res_list[group_no: int(end) + 1]
  52. else:
  53. endp = [m for m, j in enumerate(res_list[added_nos[-1]:])
  54. if j["type"] != res_list[added_nos[-1]]["type"]]
  55. if endp:
  56. one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])]
  57. added_nos.append(endp[0] + len(res_list[:added_nos[-1]]))
  58. else:
  59. one_group["slave"] = res_list[group_no:]
  60. else:
  61. added_nos.append(int(end))
  62. one_group["slave"] = res_list[int(st) - 1:int(end)]
  63. if int(st) > added_nos[-1] + 1:
  64. new_res_dict.extend(res_list[added_nos[-1]:int(st) - 1])
  65. else:
  66. if group_no != start_no[-1]: # 不是最后一个
  67. one_group["slave"] = res_list[group_no: start_no[n+1]]
  68. added_nos.append(start_no[n+1])
  69. elif str(group_no+1) + "-" in "#".join(ans_groups.keys()):
  70. aa = ("#" + "#".join(ans_groups.keys())).split("#{}-".format(group_no+1))
  71. end = aa[-1].split("#", maxsplit=1)[-1]
  72. one_group["slave"] = res_list[group_no: int(end)]
  73. added_nos.append(int(end))
  74. # new_res_dict.extend(res_list[int(end)+1:])
  75. else:
  76. endp = [m for m, j in enumerate(res_list[added_nos[-1]:])
  77. if j["type"] != res_list[added_nos[-1]]["type"]]
  78. if endp:
  79. one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])]
  80. added_nos.append(endp[0] + len(res_list[:added_nos[-1]]))
  81. # new_res_dict.extend(res_list[added_nos[-1]:])
  82. else:
  83. one_group["slave"] = res_list[group_no:]
  84. one_group["type"] = one_group["slave"][0]["type"]
  85. one_group["que_num"] = len(one_group["slave"])
  86. one_group["topic_num"] = "{}-{}".format(one_group["slave"][0]["topic_num"], one_group["slave"][-1]["topic_num"])
  87. if ans_start_no:
  88. for k in ans_start_no:
  89. if k == one_group["topic_num"]:
  90. st1, end1 = k.split("-") # 真实题号组
  91. if len(re.findall("【详解】", ans_groups[k]["parse"])) > 1:
  92. parse_list = re.split("【详解】", ans_groups[k]["parse"])[1:]
  93. ans_list = re.split("(?<=[】\s])\d{1,2}\s*[、..、]|^\d{1,2}\s*[、..、]", ans_groups[k]["key"])[1:]
  94. if len(parse_list) == int(end1)+1 - int(st1):
  95. for i in range(len(parse_list)):
  96. one_group["slave"][i]["parse"] = parse_list[i].strip()
  97. else:
  98. # 就将各题解析合在一起
  99. one_group["parse"] = ans_groups[k]["parse"]
  100. if len(ans_list) == int(end1)+1 - int(st1):
  101. for j in range(len(ans_list)):
  102. one_group["slave"][j]["key"] = ans_list[j].strip()
  103. else:
  104. one_group["key"] = ans_groups[k]["key"]
  105. ans_start_no.remove(k)
  106. break
  107. new_res_dict.append(one_group)
  108. one_group = {}
  109. if added_nos[-1] < len(res_list):
  110. new_res_dict.extend(res_list[added_nos[-1]:])
  111. return new_res_dict
  112. def regroup_old(res_list, item_groups):
  113. """
  114. 将多个题共用一个题干的情况进行slave重组,如地理
  115. :param res_list: 拆分为小题后的结果
  116. :return:
  117. """
  118. new_res_dict = []
  119. start_no = [i for i in item_groups.keys() if i != "pos"]
  120. if not start_no:
  121. return res_list
  122. def takefirst(elem):
  123. return int(elem.split("-")[0])
  124. start_no.sort(key=takefirst) # 排序
  125. print(start_no)
  126. one_group = {}
  127. added_nos = [] # 已经slave了的题号
  128. for n, group_no in enumerate(start_no):
  129. one_group["common_stem"] = item_groups[group_no]
  130. st, end = group_no.split("-") # 真实题号组
  131. if not added_nos: # 开始
  132. if item_groups["pos"][n + 1] <= int(end): # 公共题文中的结束序号信息有误
  133. one_group["slave"] = res_list[int(st) - 1:item_groups["pos"][n + 1] - 1]
  134. added_nos.append(item_groups["pos"][n + 1] - 1)
  135. else:
  136. added_nos.append(int(end))
  137. one_group["slave"] = res_list[int(st) - 1:int(end)]
  138. elif int(st) <= added_nos[-1]: # 公共题文中的初始序号信息有误
  139. if n + 1 < len(item_groups["pos"]): # 不是最后一组
  140. if int(end) < item_groups["pos"][n + 1]:
  141. one_group["slave"] = res_list[added_nos[-1]:int(end)]
  142. added_nos.append(int(end))
  143. else: # 结束序号有误,以pos为主
  144. one_group["slave"] = res_list[added_nos[-1]:item_groups["pos"][n + 1] - 1]
  145. added_nos.append(item_groups["pos"][n + 1] - 1)
  146. else:
  147. if int(end) >= added_nos[-1]:
  148. one_group["slave"] = res_list[added_nos[-1]:int(end)]
  149. added_nos.append(int(end))
  150. else: # end值出错
  151. endp = [m for m, j in enumerate(res_list[added_nos[-1]:])
  152. if j["type"] != res_list[added_nos[-1]]["type"]]
  153. if endp:
  154. one_group["slave"] = res_list[added_nos[-1]:endp[0] + len(res_list[:added_nos[-1]])]
  155. added_nos.append(endp[0] + len(res_list[:added_nos[-1]]))
  156. else:
  157. added_nos.append(int(end))
  158. one_group["slave"] = res_list[int(st) - 1:int(end)]
  159. if int(st) > added_nos[-1] + 1:
  160. new_res_dict.extend(res_list[added_nos[-1]:int(st) - 1])
  161. one_group["type"] = one_group["slave"][0]["type"]
  162. one_group["que_num"] = len(one_group["slave"])
  163. new_res_dict.append(one_group)
  164. one_group = {}
  165. if added_nos[-1] < len(res_list):
  166. new_res_dict.extend(res_list[added_nos[-1]:])
  167. return new_res_dict