|
@@ -299,6 +299,7 @@ def cluster_and_anti_abnormal(image, xml_path, choice_n_list, digital_list, char
|
|
digital_list_to_cluster.append(digital_list[i])
|
|
digital_list_to_cluster.append(digital_list[i])
|
|
digital_loc_arr.append(point)
|
|
digital_loc_arr.append(point)
|
|
|
|
|
|
|
|
+ # 得到所有题号区域, 作为后续划分choice_m的依据
|
|
choice_m_numbers_list = []
|
|
choice_m_numbers_list = []
|
|
for ele in choice_n_list:
|
|
for ele in choice_n_list:
|
|
loc = ele['bounding_box']
|
|
loc = ele['bounding_box']
|
|
@@ -418,295 +419,438 @@ def cluster_and_anti_abnormal(image, xml_path, choice_n_list, digital_list, char
|
|
# # cond1 = cond2 = true, 因为infer选择题时已横向排序, 默认这种情况不会出现
|
|
# # cond1 = cond2 = true, 因为infer选择题时已横向排序, 默认这种情况不会出现
|
|
# pass
|
|
# pass
|
|
|
|
|
|
|
|
+ direction180, direction90 = 0, 0
|
|
for ele in choice_m_numbers_list:
|
|
for ele in choice_m_numbers_list:
|
|
loc = ele["loc"]
|
|
loc = ele["loc"]
|
|
- if loc[3] - loc[1] >= loc[2] - loc[0]:
|
|
|
|
|
|
+ if loc[3] - loc[1] >= 2 * (loc[2] - loc[0]):
|
|
direction = 180
|
|
direction = 180
|
|
|
|
+ direction180 += 1
|
|
else:
|
|
else:
|
|
direction = 90
|
|
direction = 90
|
|
|
|
+ direction90 += 1
|
|
ele.update({'direction': direction})
|
|
ele.update({'direction': direction})
|
|
- # tree = ET.parse(xml_path)
|
|
|
|
- # for index, choice_m in enumerate(choice_m_numbers_list):
|
|
|
|
- # name = str(choice_m["numbers"])
|
|
|
|
- # xmin, ymin, xmax, ymax, _, _ = choice_m["loc"]
|
|
|
|
- # tree = create_xml(name, tree, str(xmin + limit_left), str(ymin + limit_top), str(xmax + limit_left), str(ymax + limit_top))
|
|
|
|
- #
|
|
|
|
- # tree.write(xml_path)
|
|
|
|
- choice_m_numbers_list = sorted(choice_m_numbers_list, key=lambda x: x['loc'][3] - x['loc'][1], reverse=True)
|
|
|
|
- choice_m_numbers_right_limit = max([ele['loc'][2] for ele in choice_m_numbers_list])
|
|
|
|
- remain_len = len(choice_m_numbers_list)
|
|
|
|
- choice_m_list = list()
|
|
|
|
- need_revised_choice_m_list = list()
|
|
|
|
- while remain_len > 0:
|
|
|
|
- # 先确定属于同行的数据,然后找字母划分block
|
|
|
|
- # random_index = random.randint(0, len(choice_m_numbers_list)-1)
|
|
|
|
- random_index = 0
|
|
|
|
- # print(random_index)
|
|
|
|
- ymax_limit = choice_m_numbers_list[random_index]["loc"][3]
|
|
|
|
- ymin_limit = choice_m_numbers_list[random_index]["loc"][1]
|
|
|
|
- # choice_m_numbers_list.pop(random_index)
|
|
|
|
-
|
|
|
|
- # 当前行的choice_m
|
|
|
|
- current_row_choice_m_d = [ele for ele in choice_m_numbers_list if ymin_limit < ele["loc"][5] < ymax_limit]
|
|
|
|
- current_row_choice_m_d = sorted(current_row_choice_m_d, key=lambda x: x["loc"][0])
|
|
|
|
- # current_row_choice_m_d.append(choice_m_numbers_list[random_index])
|
|
|
|
- split_pix = sorted([ele["loc"][0] for ele in current_row_choice_m_d]) # xmin排序
|
|
|
|
- split_index = get_split_index(split_pix, dif=choice_s_width * 0.8)
|
|
|
|
- split_pix = [split_pix[ele] for ele in split_index[:-1]]
|
|
|
|
-
|
|
|
|
- block_list = []
|
|
|
|
- for i in range(len(split_index) - 1):
|
|
|
|
- block = current_row_choice_m_d[split_index[i]: split_index[i + 1]]
|
|
|
|
- if len(block) > 1:
|
|
|
|
- remain_len = remain_len - (len(block) - 1)
|
|
|
|
- numbers_new = []
|
|
|
|
- loc_new = [[], [], [], []]
|
|
|
|
- for blk in block:
|
|
|
|
- loc_old = blk["loc"]
|
|
|
|
- numbers_new.extend(blk["numbers"])
|
|
|
|
- for ii in range(4):
|
|
|
|
- loc_new[ii].append(loc_old[ii])
|
|
|
|
-
|
|
|
|
- loc_new[0] = min(loc_new[0])
|
|
|
|
- loc_new[1] = min(loc_new[1])
|
|
|
|
- loc_new[2] = max(loc_new[2])
|
|
|
|
- loc_new[3] = max(loc_new[3])
|
|
|
|
-
|
|
|
|
- loc_new.append(loc_new[0] + (loc_new[2] - loc_new[0]) // 2)
|
|
|
|
- loc_new.append(loc_new[1] + (loc_new[3] - loc_new[1]) // 2)
|
|
|
|
-
|
|
|
|
- block = [{"numbers": sorted(numbers_new), "loc": loc_new, "direction": block[0]["direction"]}]
|
|
|
|
-
|
|
|
|
- block_list.extend(block)
|
|
|
|
-
|
|
|
|
- current_row_choice_m_d = block_list
|
|
|
|
- current_row_chars = [ele for ele in chars_list
|
|
|
|
- if ymin_limit < (ele["location"]["top"] + ele["location"]["height"] // 2) < ymax_limit]
|
|
|
|
-
|
|
|
|
- # split_index.append(row_chars_xmax) # 边界
|
|
|
|
- split_pix.append(round(split_pix[-1] + choice_s_width * 1.75))
|
|
|
|
- for i in range(0, len(split_pix) - 1):
|
|
|
|
- left_limit = split_pix[i]
|
|
|
|
- right_limit = split_pix[i + 1]
|
|
|
|
- block_chars = [ele for ele in current_row_chars
|
|
|
|
- if left_limit < (ele["location"]["left"] + ele["location"]["width"] // 2) < right_limit]
|
|
|
|
-
|
|
|
|
- a_z = '_ABCD_FGHT'
|
|
|
|
- letter_index = [a_z.index(ele['char'].upper()) for ele in block_chars if ele['char'].upper() in a_z]
|
|
|
|
-
|
|
|
|
- letter_index_times = {ele: 0 for ele in set(letter_index)}
|
|
|
|
- for l_index in letter_index:
|
|
|
|
- letter_index_times[l_index] += 1
|
|
|
|
-
|
|
|
|
- if (a_z.index("T") in letter_index) and (a_z.index("F") in letter_index):
|
|
|
|
- choice_option = "T, F"
|
|
|
|
- cols = 2
|
|
|
|
- else:
|
|
|
|
- if len(letter_index) < 1:
|
|
|
|
- tmp = 4
|
|
|
|
- choice_option = 'A,B,C,D'
|
|
|
|
- else:
|
|
|
|
- tmp = max(set(letter_index))
|
|
|
|
-
|
|
|
|
- choice_option = ",".join(a_z[min(letter_index):tmp + 1])
|
|
|
|
- cols = tmp
|
|
|
|
|
|
|
|
- bias = 3 # pix
|
|
|
|
- current_loc = current_row_choice_m_d[i]["loc"]
|
|
|
|
- location = dict(xmin=(current_loc[2] + bias), # 当前数字xmax右边
|
|
|
|
- # xmin=max(current_loc[2] + bias, chars_xmin) + limit_left,
|
|
|
|
- ymin=current_loc[1],
|
|
|
|
-
|
|
|
|
- xmax=(right_limit - bias),
|
|
|
|
- # xmax=min(chars_xmax, right_limit - bias) + limit_left,
|
|
|
|
- ymax=current_loc[3])
|
|
|
|
-
|
|
|
|
- try:
|
|
|
|
- choice_m_img = utils.crop_region(image, location)
|
|
|
|
- if 0 in choice_m_img.shape[:2]:
|
|
|
|
- continue
|
|
|
|
- right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
- if right_loc > 0:
|
|
|
|
- location.update(dict(xmax=right_loc + location['xmin']))
|
|
|
|
- if bottom_loc > 0:
|
|
|
|
- location.update(dict(ymax=bottom_loc + location['ymin']))
|
|
|
|
- except Exception as e:
|
|
|
|
- print(e)
|
|
|
|
- traceback.print_exc()
|
|
|
|
|
|
+ # 判断大多数choice_m的方向
|
|
|
|
+ if direction180 >= direction90: # 横排
|
|
|
|
+
|
|
|
|
+ choice_m_numbers_list = sorted(choice_m_numbers_list, key=lambda x: x['loc'][3] - x['loc'][1], reverse=True)
|
|
|
|
+ choice_m_numbers_right_limit = max([ele['loc'][2] for ele in choice_m_numbers_list])
|
|
|
|
+ remain_len = len(choice_m_numbers_list)
|
|
|
|
+ choice_m_list = list()
|
|
|
|
+ need_revised_choice_m_list = list()
|
|
|
|
+ while remain_len > 0:
|
|
|
|
+ # 先确定属于同行的数据,然后找字母划分block
|
|
|
|
+
|
|
|
|
+ random_index = 0
|
|
|
|
+ # print(random_index)
|
|
|
|
+ ymax_limit = choice_m_numbers_list[random_index]["loc"][3]
|
|
|
|
+ ymin_limit = choice_m_numbers_list[random_index]["loc"][1]
|
|
|
|
+
|
|
|
|
+ # 当前行的choice_m
|
|
|
|
+ current_row_choice_m_d = [ele for ele in choice_m_numbers_list if ymin_limit < ele["loc"][5] < ymax_limit]
|
|
|
|
+ current_row_choice_m_d = sorted(current_row_choice_m_d, key=lambda x: x["loc"][0])
|
|
|
|
+ # current_row_choice_m_d.append(choice_m_numbers_list[random_index])
|
|
|
|
+
|
|
|
|
+ # 对同行的题号区域排序, 得到分割间隔, 两个题号中间的区域为choice_m
|
|
|
|
+ split_pix = sorted([ele["loc"][0] for ele in current_row_choice_m_d]) # xmin排序
|
|
|
|
+ split_index = get_split_index(split_pix, dif=choice_s_width * 0.8)
|
|
|
|
+ split_pix = [split_pix[ele] for ele in split_index[:-1]]
|
|
|
|
+
|
|
|
|
+ block_list = []
|
|
|
|
+ for i in range(len(split_index) - 1):
|
|
|
|
+ block = current_row_choice_m_d[split_index[i]: split_index[i + 1]]
|
|
|
|
+ if len(block) > 1:
|
|
|
|
+ remain_len = remain_len - (len(block) - 1)
|
|
|
|
+ numbers_new = []
|
|
|
|
+ loc_new = [[], [], [], []]
|
|
|
|
+ for blk in block:
|
|
|
|
+ loc_old = blk["loc"]
|
|
|
|
+ numbers_new.extend(blk["numbers"])
|
|
|
|
+ for ii in range(4):
|
|
|
|
+ loc_new[ii].append(loc_old[ii])
|
|
|
|
+
|
|
|
|
+ loc_new[0] = min(loc_new[0])
|
|
|
|
+ loc_new[1] = min(loc_new[1])
|
|
|
|
+ loc_new[2] = max(loc_new[2])
|
|
|
|
+ loc_new[3] = max(loc_new[3])
|
|
|
|
+
|
|
|
|
+ loc_new.append(loc_new[0] + (loc_new[2] - loc_new[0]) // 2)
|
|
|
|
+ loc_new.append(loc_new[1] + (loc_new[3] - loc_new[1]) // 2)
|
|
|
|
+
|
|
|
|
+ block = [{"numbers": sorted(numbers_new), "loc": loc_new, "direction": block[0]["direction"]}]
|
|
|
|
+
|
|
|
|
+ block_list.extend(block)
|
|
|
|
+
|
|
|
|
+ current_row_choice_m_d = block_list
|
|
|
|
+ current_row_chars = [ele for ele in chars_list
|
|
|
|
+ if ymin_limit < (ele["location"]["top"] + ele["location"]["height"] // 2) < ymax_limit]
|
|
|
|
+
|
|
|
|
+ # split_index.append(row_chars_xmax) # 边界
|
|
|
|
+ split_pix.append(limit_right)
|
|
|
|
+ for i in range(0, len(split_pix) - 1):
|
|
|
|
+ left_limit = split_pix[i]
|
|
|
|
+ right_limit = split_pix[i + 1]
|
|
|
|
+ block_chars = [ele for ele in current_row_chars
|
|
|
|
+ if left_limit < (ele["location"]["left"] + ele["location"]["width"] // 2) < right_limit]
|
|
|
|
+
|
|
|
|
+ a_z = '_ABCD_FGHT'
|
|
|
|
+ letter_index = [a_z.index(ele['char'].upper()) for ele in block_chars if ele['char'].upper() in a_z]
|
|
|
|
+
|
|
|
|
+ letter_index_times = {ele: 0 for ele in set(letter_index)}
|
|
|
|
+ for l_index in letter_index:
|
|
|
|
+ letter_index_times[l_index] += 1
|
|
|
|
+
|
|
|
|
+ if (a_z.index("T") in letter_index) and (a_z.index("F") in letter_index):
|
|
|
|
+ choice_option = "T, F"
|
|
|
|
+ cols = 2
|
|
|
|
+ else:
|
|
|
|
+ if len(letter_index) < 1:
|
|
|
|
+ tmp = 4
|
|
|
|
+ choice_option = 'A,B,C,D'
|
|
|
|
+ else:
|
|
|
|
+ tmp = max(set(letter_index))
|
|
|
|
+
|
|
|
|
+ choice_option = ",".join(a_z[min(letter_index):tmp + 1])
|
|
|
|
+ cols = tmp
|
|
|
|
+
|
|
|
|
+ bias = 3 # pix
|
|
|
|
+ current_loc = current_row_choice_m_d[i]["loc"]
|
|
|
|
+ location = dict(xmin=(current_loc[2] + bias), # 当前数字xmax右边
|
|
|
|
+ # xmin=max(current_loc[2] + bias, chars_xmin) + limit_left,
|
|
|
|
+ ymin=current_loc[1],
|
|
|
|
+
|
|
|
|
+ xmax=(right_limit - bias),
|
|
|
|
+ # xmax=min(chars_xmax, right_limit - bias) + limit_left,
|
|
|
|
+ ymax=current_loc[3])
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ # 调整choice-m区域, 避免推断出来的区域过大
|
|
|
|
+ choice_m_img = utils.crop_region(image, location)
|
|
|
|
+ if 0 in choice_m_img.shape[:2]:
|
|
|
|
+ continue
|
|
|
|
+ right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
+ if right_loc > 0:
|
|
|
|
+ location.update(dict(xmax=right_loc + location['xmin']))
|
|
|
|
+ if bottom_loc > 0:
|
|
|
|
+ location.update(dict(ymax=bottom_loc + location['ymin']))
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(e)
|
|
|
|
+ traceback.print_exc()
|
|
|
|
+
|
|
|
|
+ tmp_w, tmp_h = location['xmax'] - location['xmin'], location['ymax'] - location['ymin'],
|
|
|
|
+ numbers = current_row_choice_m_d[i]["numbers"]
|
|
|
|
+
|
|
|
|
+ direction = current_row_choice_m_d[i]["direction"]
|
|
|
|
+ if direction == 180:
|
|
|
|
+ choice_m = dict(class_name='choice_m',
|
|
|
|
+ number=numbers,
|
|
|
|
+ bounding_box=location,
|
|
|
|
+ choice_option=choice_option,
|
|
|
|
+ default_points=[5] * len(numbers),
|
|
|
|
+ direction=direction,
|
|
|
|
+ cols=cols,
|
|
|
|
+ rows=len(numbers))
|
|
|
|
+ else:
|
|
|
|
+ choice_m = dict(class_name='choice_m',
|
|
|
|
+ number=numbers,
|
|
|
|
+ bounding_box=location,
|
|
|
|
+ choice_option=choice_option,
|
|
|
|
+ default_points=[5] * len(numbers),
|
|
|
|
+ direction=direction,
|
|
|
|
+ cols=len(numbers),
|
|
|
|
+ rows=cols)
|
|
|
|
+
|
|
|
|
+ if tmp_w > 2 * choice_s_width:
|
|
|
|
+ need_revised_choice_m_list.append(choice_m)
|
|
|
|
+ else:
|
|
|
|
+ choice_m_list.append(choice_m)
|
|
|
|
+
|
|
|
|
+ remain_len = remain_len - len(current_row_choice_m_d)
|
|
|
|
+ for ele in choice_m_numbers_list.copy():
|
|
|
|
+ if ele in current_row_choice_m_d:
|
|
|
|
+ choice_m_numbers_list.remove(ele)
|
|
|
|
+
|
|
|
|
+ for ele in choice_m_numbers_list.copy():
|
|
|
|
+ if ele in current_row_chars:
|
|
|
|
+ choice_m_numbers_list.remove(ele)
|
|
|
|
+
|
|
|
|
+ # 解决单行问题
|
|
|
|
+ if len(choice_m_list) > 0:
|
|
|
|
+ crt_right_max = max([int(ele['bounding_box']['xmax']) for ele in choice_m_list])
|
|
|
|
+ if limit_right - crt_right_max > choice_s_width:
|
|
|
|
+ # 存在区域
|
|
|
|
+ region_loc = {'xmin': crt_right_max + 10,
|
|
|
|
+ 'ymin': choice_m_list[0]['bounding_box']['ymin'],
|
|
|
|
+ 'xmax': limit_right,
|
|
|
|
+ 'ymax': choice_m_list[0]['bounding_box']['ymax']}
|
|
|
|
+
|
|
|
|
+ contain_dig = []
|
|
|
|
+ for i, ele in enumerate(digital_loc_arr):
|
|
|
|
+ if region_loc['xmin'] < ele[0] < region_loc['xmax'] and region_loc['ymin'] < ele[1] < region_loc['ymax']:
|
|
|
|
+ contain_dig.append(digital_list[i])
|
|
|
|
+
|
|
|
|
+ contain_chars = [ele for ele in chars_list
|
|
|
|
+ if region_loc['xmin'] < (ele["location"]["left"] + ele["location"]["width"] // 2) < region_loc['xmax']
|
|
|
|
+ and
|
|
|
|
+ region_loc['xmin'] < (ele["location"]["top"] + ele["location"]["height"] // 2) < region_loc['ymax']]
|
|
|
|
+ numbers = [-1]
|
|
|
|
+ if contain_dig or contain_chars:
|
|
|
|
+ d_ymin, d_ymax, d_xmin, d_xmax = 9999, 0, 9999, 0
|
|
|
|
+ if contain_dig:
|
|
|
|
+ numbers = [ele["digital"] for ele in contain_dig]
|
|
|
|
+ d_ymin = min([ele['loc'][1] for ele in contain_dig])
|
|
|
|
+ d_ymax = max([ele['loc'][3] for ele in contain_dig])
|
|
|
|
+ d_xmin = min([ele['loc'][0] for ele in contain_dig])
|
|
|
|
+ d_xmax = max([ele['loc'][2] for ele in contain_dig])
|
|
|
|
+
|
|
|
|
+ c_ymin, c_ymax, c_xmin, c_xmax = 9999, 0, 9999, 0
|
|
|
|
+ if contain_chars:
|
|
|
|
+ c_ymin = min([ele["location"]["top"] for ele in contain_chars])
|
|
|
|
+ c_ymax = max([ele["location"]["top"] + ele["location"]["height"] for ele in contain_chars])
|
|
|
|
+ c_xmin = min([ele["location"]["left"] for ele in contain_chars])
|
|
|
|
+ c_xmax = max([ele["location"]["left"] + ele["location"]["width"] for ele in contain_chars])
|
|
|
|
+
|
|
|
|
+ r_ymin, r_ymax = min(d_ymin, c_ymin), max(d_ymax, c_ymax)
|
|
|
|
+ r_xmin, r_xmax = min(d_xmin, c_xmin), max(d_xmax, c_xmax)
|
|
|
|
+
|
|
|
|
+ region_loc['ymin'] = r_ymin - 10
|
|
|
|
+ region_loc['ymax'] = r_ymax + 10
|
|
|
|
+ if d_xmin == r_xmin:
|
|
|
|
+ region_loc['xmin'] = d_xmax + 5
|
|
|
|
+ region_loc['xmax'] = d_xmax + 5 + int(1.2 * choice_s_width)
|
|
|
|
+ else:
|
|
|
|
+ if 1.2 * (r_xmax - r_xmin) > choice_s_width:
|
|
|
|
+ region_loc['xmin'] = r_xmin - 10
|
|
|
|
+ region_loc['xmax'] = r_xmax + 10
|
|
|
|
+ else:
|
|
|
|
+ region_loc['xmin'] = max((r_xmax - r_xmin) // 2 + r_xmin - choice_s_width,
|
|
|
|
+ crt_right_max + 10)
|
|
|
|
+ region_loc['xmax'] = min((r_xmax - r_xmin) // 2 + r_xmin + choice_s_width ,
|
|
|
|
+ limit_right)
|
|
|
|
|
|
- tmp_w, tmp_h = location['xmax'] - location['xmin'], location['ymax'] - location['ymin'],
|
|
|
|
- numbers = current_row_choice_m_d[i]["numbers"]
|
|
|
|
-
|
|
|
|
- direction = current_row_choice_m_d[i]["direction"]
|
|
|
|
- if direction == 180:
|
|
|
|
- choice_m = dict(class_name='choice_m',
|
|
|
|
- number=numbers,
|
|
|
|
- bounding_box=location,
|
|
|
|
- choice_option=choice_option,
|
|
|
|
- default_points=[5] * len(numbers),
|
|
|
|
- direction=direction,
|
|
|
|
- cols=cols,
|
|
|
|
- rows=len(numbers))
|
|
|
|
- else:
|
|
|
|
- choice_m = dict(class_name='choice_m',
|
|
|
|
- number=numbers,
|
|
|
|
- bounding_box=location,
|
|
|
|
- choice_option=choice_option,
|
|
|
|
- default_points=[5] * len(numbers),
|
|
|
|
- direction=direction,
|
|
|
|
- cols=len(numbers),
|
|
|
|
- rows=cols)
|
|
|
|
-
|
|
|
|
- if tmp_w > 2 * choice_s_width:
|
|
|
|
- need_revised_choice_m_list.append(choice_m)
|
|
|
|
- else:
|
|
|
|
- choice_m_list.append(choice_m)
|
|
|
|
-
|
|
|
|
- remain_len = remain_len - len(current_row_choice_m_d)
|
|
|
|
- for ele in choice_m_numbers_list.copy():
|
|
|
|
- if ele in current_row_choice_m_d:
|
|
|
|
- choice_m_numbers_list.remove(ele)
|
|
|
|
-
|
|
|
|
- for ele in choice_m_numbers_list.copy():
|
|
|
|
- if ele in current_row_chars:
|
|
|
|
- choice_m_numbers_list.remove(ele)
|
|
|
|
-
|
|
|
|
- # 解决单行问题
|
|
|
|
- crt_right_max = max([int(ele['bounding_box']['xmax']) for ele in choice_m_list])
|
|
|
|
- if limit_right - crt_right_max > choice_s_width:
|
|
|
|
- # 存在区域
|
|
|
|
- region_loc = {'xmin': crt_right_max + 10,
|
|
|
|
- 'ymin': choice_m_list[0]['bounding_box']['ymin'],
|
|
|
|
- 'xmax': limit_right,
|
|
|
|
- 'ymax': choice_m_list[0]['bounding_box']['ymax']}
|
|
|
|
-
|
|
|
|
- contain_dig = []
|
|
|
|
- for i, ele in enumerate(digital_loc_arr):
|
|
|
|
- if region_loc['xmin'] < ele[0] < region_loc['xmax'] and region_loc['ymin'] < ele[1] < region_loc['ymax']:
|
|
|
|
- contain_dig.append(digital_list[i])
|
|
|
|
-
|
|
|
|
- contain_chars = [ele for ele in chars_list
|
|
|
|
- if region_loc['xmin'] < (ele["location"]["left"] + ele["location"]["width"] // 2) < region_loc['xmax']
|
|
|
|
- and
|
|
|
|
- region_loc['xmin'] < (ele["location"]["top"] + ele["location"]["height"] // 2) < region_loc['ymax']]
|
|
|
|
- numbers = [-1]
|
|
|
|
- if contain_dig or contain_chars:
|
|
|
|
- d_ymin, d_ymax, d_xmin, d_xmax = 9999, 0, 9999, 0
|
|
|
|
- if contain_dig:
|
|
|
|
- numbers = [ele["digital"] for ele in contain_dig]
|
|
|
|
- d_ymin = min([ele['loc'][1] for ele in contain_dig])
|
|
|
|
- d_ymax = max([ele['loc'][3] for ele in contain_dig])
|
|
|
|
- d_xmin = min([ele['loc'][0] for ele in contain_dig])
|
|
|
|
- d_xmax = max([ele['loc'][2] for ele in contain_dig])
|
|
|
|
-
|
|
|
|
- c_ymin, c_ymax, c_xmin, c_xmax = 9999, 0, 9999, 0
|
|
|
|
- if contain_chars:
|
|
|
|
- c_ymin = min([ele["location"]["top"] for ele in contain_chars])
|
|
|
|
- c_ymax = max([ele["location"]["top"] + ele["location"]["height"] for ele in contain_chars])
|
|
|
|
- c_xmin = min([ele["location"]["left"] for ele in contain_chars])
|
|
|
|
- c_xmax = max([ele["location"]["left"] + ele["location"]["width"] for ele in contain_chars])
|
|
|
|
-
|
|
|
|
- r_ymin, r_ymax = min(d_ymin, c_ymin), max(d_ymax, c_ymax)
|
|
|
|
- r_xmin, r_xmax = min(d_xmin, c_xmin), max(d_xmax, c_xmax)
|
|
|
|
-
|
|
|
|
- region_loc['ymin'] = r_ymin - 10
|
|
|
|
- region_loc['ymax'] = r_ymax + 10
|
|
|
|
- if d_xmin == r_xmin:
|
|
|
|
- region_loc['xmin'] = d_xmax + 5
|
|
|
|
- region_loc['xmax'] = d_xmax + 5 + int(1.2 * choice_s_width)
|
|
|
|
|
|
+ else:
|
|
|
|
+ # 默认这种情况只有1行或2行
|
|
|
|
+ numbers = [-1]
|
|
|
|
+ region_xmin = crt_right_max + 5
|
|
|
|
+ region_xmax = int(region_xmin + 1.2 * choice_s_width)
|
|
|
|
+ region_ymin = min([int(ele['bounding_box']['ymin']) for ele in choice_m_list])
|
|
|
|
+ region_ymax = max([int(ele['bounding_box']['ymax']) for ele in choice_m_list])
|
|
|
|
+ region_ymax = region_ymin + (region_ymax - region_ymin) // 2 # 默认这种情况只有1行或2行
|
|
|
|
+ region_loc = {'xmin': region_xmin, 'ymin': region_ymin, 'xmax': region_xmax, 'ymax': region_ymax}
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ choice_m_img = utils.crop_region(image, region_loc)
|
|
|
|
+ if 0 in choice_m_img.shape[:2]:
|
|
|
|
+ continue
|
|
|
|
+ right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
+ if right_loc > 0:
|
|
|
|
+ region_loc.update(dict(xmax=right_loc + region_loc['xmin']))
|
|
|
|
+ if bottom_loc > 0:
|
|
|
|
+ region_loc.update(dict(ymax=bottom_loc + region_loc['ymin']))
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(e)
|
|
|
|
+ traceback.print_exc()
|
|
|
|
+
|
|
|
|
+ choice_m = dict(class_name='choice_m',
|
|
|
|
+ number=numbers,
|
|
|
|
+ bounding_box=region_loc,
|
|
|
|
+ choice_option='A,B,C,D',
|
|
|
|
+ default_points=[5],
|
|
|
|
+ direction=180,
|
|
|
|
+ cols=4,
|
|
|
|
+ rows=1,
|
|
|
|
+ single_width=(region_loc['xmax'] - region_loc['xmin']) // 4,
|
|
|
|
+ )
|
|
|
|
+ choice_m_list.append(choice_m)
|
|
|
|
+
|
|
|
|
+ # 单独一行不聚类(理论上不会再到这一步了, 上个block解决)
|
|
|
|
+ for i, revised_choice_m in enumerate(need_revised_choice_m_list):
|
|
|
|
+ loc = revised_choice_m['bounding_box']
|
|
|
|
+ left_part_loc = loc.copy()
|
|
|
|
+ left_part_loc.update({'xmax': loc['xmin'] + choice_s_width})
|
|
|
|
+ choice_m_img = utils.crop_region(image, left_part_loc)
|
|
|
|
+ right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
+ if right_loc > 0:
|
|
|
|
+ left_part_loc.update(dict(xmax=right_loc + left_part_loc['xmin']))
|
|
|
|
+ if bottom_loc > 0:
|
|
|
|
+ left_part_loc.update(dict(ymax=bottom_loc + left_part_loc['ymin']))
|
|
|
|
+
|
|
|
|
+ left_tmp_height = left_part_loc['ymax'] - left_part_loc['ymin']
|
|
|
|
+
|
|
|
|
+ right_part_loc = loc.copy()
|
|
|
|
+ # right_part_loc.update({'xmin': loc['xmax']-choice_s_width})
|
|
|
|
+ right_part_loc.update({'xmin': left_part_loc['xmax'] + 5})
|
|
|
|
+ choice_m_img = utils.crop_region(image, right_part_loc)
|
|
|
|
+ right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
+ if right_loc > 0:
|
|
|
|
+ right_part_loc.update(dict(xmax=right_loc + right_part_loc['xmin']))
|
|
|
|
+ if bottom_loc > 0:
|
|
|
|
+ right_part_loc.update(dict(ymax=bottom_loc + right_part_loc['ymin']))
|
|
|
|
+
|
|
|
|
+ right_tmp_height = right_part_loc['ymax'] - right_part_loc['ymin']
|
|
|
|
+
|
|
|
|
+ number_len = max(1, int(revised_choice_m['rows'] // (left_tmp_height // right_tmp_height)))
|
|
|
|
+ number = [ele + revised_choice_m['number'][-1] + 1 for ele in range(number_len)]
|
|
|
|
+ rows = len(number)
|
|
|
|
+
|
|
|
|
+ revised_choice_m.update({'bounding_box': left_part_loc})
|
|
|
|
+ choice_m_list.append(revised_choice_m)
|
|
|
|
+
|
|
|
|
+ tmp = revised_choice_m.copy()
|
|
|
|
+ tmp.update({'bounding_box': right_part_loc, 'number': number, 'rows': rows})
|
|
|
|
+ choice_m_list.append(tmp)
|
|
|
|
+
|
|
|
|
+ choice_m_list_copy = choice_m_list.copy()
|
|
|
|
+ for ele in choice_m_list_copy:
|
|
|
|
+ loc = ele["bounding_box"]
|
|
|
|
+ w, h = loc['xmax'] - loc['xmin'], loc['ymax'] - loc['ymin']
|
|
|
|
+ if 2 * w * h < choice_s_width * choice_s_height:
|
|
|
|
+ choice_m_list.remove(ele)
|
|
|
|
+ return choice_m_list
|
|
|
|
+
|
|
|
|
+ else: # 竖排
|
|
|
|
+ # 横向最大
|
|
|
|
+ choice_m_numbers_list = sorted(choice_m_numbers_list, key=lambda x: x['loc'][2] - x['loc'][0], reverse=True)
|
|
|
|
+ remain_len = len(choice_m_numbers_list)
|
|
|
|
+ choice_m_list = list()
|
|
|
|
+ need_revised_choice_m_list = list()
|
|
|
|
+ while remain_len > 0:
|
|
|
|
+ # 先确定属于同列的数据,然后找字母划分block
|
|
|
|
+ random_index = 0
|
|
|
|
+ xmax_limit = choice_m_numbers_list[random_index]["loc"][2]
|
|
|
|
+ xmin_limit = choice_m_numbers_list[random_index]["loc"][0]
|
|
|
|
+ # choice_m_numbers_list.pop(random_index)
|
|
|
|
+
|
|
|
|
+ # 当前行的choice_m
|
|
|
|
+ current_row_choice_m_d = [ele for ele in choice_m_numbers_list if xmin_limit < ele["loc"][4] < xmax_limit]
|
|
|
|
+ current_row_choice_m_d = sorted(current_row_choice_m_d, key=lambda x: x["loc"][1])
|
|
|
|
+ # current_row_choice_m_d.append(choice_m_numbers_list[random_index])
|
|
|
|
+ split_pix = sorted([ele["loc"][1] for ele in current_row_choice_m_d]) # ymin排序
|
|
|
|
+ split_index = get_split_index(split_pix, dif=choice_s_height * 0.8)
|
|
|
|
+ split_pix = [split_pix[ele] for ele in split_index[:-1]]
|
|
|
|
+
|
|
|
|
+ block_list = []
|
|
|
|
+ for i in range(len(split_index) - 1):
|
|
|
|
+ block = current_row_choice_m_d[split_index[i]: split_index[i + 1]]
|
|
|
|
+ if len(block) > 1:
|
|
|
|
+ remain_len = remain_len - (len(block) - 1)
|
|
|
|
+ numbers_new = []
|
|
|
|
+ loc_new = [[], [], [], []]
|
|
|
|
+ for blk in block:
|
|
|
|
+ loc_old = blk["loc"]
|
|
|
|
+ numbers_new.extend(blk["numbers"])
|
|
|
|
+ for ii in range(4):
|
|
|
|
+ loc_new[ii].append(loc_old[ii])
|
|
|
|
+
|
|
|
|
+ loc_new[0] = min(loc_new[0])
|
|
|
|
+ loc_new[1] = min(loc_new[1])
|
|
|
|
+ loc_new[2] = max(loc_new[2])
|
|
|
|
+ loc_new[3] = max(loc_new[3])
|
|
|
|
+
|
|
|
|
+ loc_new.append(loc_new[0] + (loc_new[2] - loc_new[0]) // 2)
|
|
|
|
+ loc_new.append(loc_new[1] + (loc_new[3] - loc_new[1]) // 2)
|
|
|
|
+
|
|
|
|
+ block = [{"numbers": sorted(numbers_new), "loc": loc_new, "direction": block[0]["direction"]}]
|
|
|
|
+
|
|
|
|
+ block_list.extend(block)
|
|
|
|
+
|
|
|
|
+ current_row_choice_m_d = block_list
|
|
|
|
+ current_row_chars = [ele for ele in chars_list
|
|
|
|
+ if xmin_limit < (ele["location"]["top"] + ele["location"]["height"] // 2) < xmax_limit]
|
|
|
|
+
|
|
|
|
+ split_pix.append(limit_bottom)
|
|
|
|
+ for i in range(0, len(split_pix) - 1):
|
|
|
|
+ top_limit = split_pix[i]
|
|
|
|
+ bottom_limit = split_pix[i + 1]
|
|
|
|
+ block_chars = [ele for ele in current_row_chars
|
|
|
|
+ if top_limit < (ele["location"]["left"] + ele["location"]["width"] // 2) < bottom_limit]
|
|
|
|
+
|
|
|
|
+ a_z = '_ABCD_FGHT'
|
|
|
|
+ letter_text = set([ele['char'].upper() for ele in block_chars if ele['char'].upper() in a_z])
|
|
|
|
+ letter_index = [a_z.index(ele['char'].upper()) for ele in block_chars if ele['char'].upper() in a_z]
|
|
|
|
+
|
|
|
|
+ letter_index_times = {ele: 0 for ele in set(letter_index)}
|
|
|
|
+ for l_index in letter_index:
|
|
|
|
+ letter_index_times[l_index] += 1
|
|
|
|
+
|
|
|
|
+ if (a_z.index("T") in letter_index) and (a_z.index("F") in letter_index):
|
|
|
|
+ choice_option = "T, F"
|
|
|
|
+ cols = 2
|
|
else:
|
|
else:
|
|
- if 1.2 * (r_xmax - r_xmin) > choice_s_width:
|
|
|
|
- region_loc['xmin'] = r_xmin - 10
|
|
|
|
- region_loc['xmax'] = r_xmax + 10
|
|
|
|
|
|
+ if len(letter_index) < 1:
|
|
|
|
+ tmp = 4
|
|
|
|
+ choice_option = 'A,B,C,D'
|
|
else:
|
|
else:
|
|
- region_loc['xmin'] = max((r_xmax - r_xmin) // 2 + r_xmin - choice_s_width,
|
|
|
|
- crt_right_max + 10)
|
|
|
|
- region_loc['xmax'] = min((r_xmax - r_xmin) // 2 + r_xmin + choice_s_width ,
|
|
|
|
- limit_right)
|
|
|
|
|
|
+ tmp = max(set(letter_index))
|
|
|
|
+ choice_option = ",".join(a_z[min(letter_index):tmp + 1])
|
|
|
|
+ cols = tmp
|
|
|
|
+
|
|
|
|
+ bias = 3 # pix
|
|
|
|
+ current_loc = current_row_choice_m_d[i]["loc"]
|
|
|
|
+ location = dict(xmin=current_loc[0],
|
|
|
|
+ ymin=current_loc[3] + bias,
|
|
|
|
+ xmax=current_loc[1],
|
|
|
|
+ ymax=bottom_limit - bias)
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ choice_m_img = utils.crop_region(image, location)
|
|
|
|
+ right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
+ if right_loc > 0:
|
|
|
|
+ location.update(dict(xmax=right_loc + location['xmin']))
|
|
|
|
+ if bottom_loc > 0:
|
|
|
|
+ location.update(dict(ymax=bottom_loc + location['ymin']))
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(e)
|
|
|
|
+ traceback.print_exc()
|
|
|
|
+
|
|
|
|
+ tmp_w, tmp_h = location['xmax'] - location['xmin'], location['ymax'] - location['ymin'],
|
|
|
|
+ numbers = current_row_choice_m_d[i]["numbers"]
|
|
|
|
+ direction = current_row_choice_m_d[i]["direction"]
|
|
|
|
+ if direction == 180:
|
|
|
|
+ choice_m = dict(class_name='choice_m',
|
|
|
|
+ number=numbers,
|
|
|
|
+ bounding_box=location,
|
|
|
|
+ choice_option=choice_option,
|
|
|
|
+ default_points=[5] * len(numbers),
|
|
|
|
+ direction=direction,
|
|
|
|
+ cols=cols,
|
|
|
|
+ rows=len(numbers))
|
|
|
|
+ else:
|
|
|
|
+ choice_m = dict(class_name='choice_m',
|
|
|
|
+ number=numbers,
|
|
|
|
+ bounding_box=location,
|
|
|
|
+ choice_option=choice_option,
|
|
|
|
+ default_points=[5] * len(numbers),
|
|
|
|
+ direction=direction,
|
|
|
|
+ cols=len(numbers),
|
|
|
|
+ rows=cols)
|
|
|
|
+
|
|
|
|
+ if tmp_h > 2 * choice_s_height:
|
|
|
|
+ need_revised_choice_m_list.append(choice_m)
|
|
|
|
+ else:
|
|
|
|
+ choice_m_list.append(choice_m)
|
|
|
|
|
|
- else:
|
|
|
|
- # 默认这种情况只有1行或2行
|
|
|
|
- numbers = [-1]
|
|
|
|
- region_xmin = crt_right_max + 5
|
|
|
|
- region_xmax = int(region_xmin + 1.2 * choice_s_width)
|
|
|
|
- region_ymin = min([int(ele['bounding_box']['ymin']) for ele in choice_m_list])
|
|
|
|
- region_ymax = max([int(ele['bounding_box']['ymax']) for ele in choice_m_list])
|
|
|
|
- region_ymax = region_ymin + (region_ymax - region_ymin) // 2 # 默认这种情况只有1行或2行
|
|
|
|
- region_loc = {'xmin': region_xmin, 'ymin': region_ymin, 'xmax': region_xmax, 'ymax': region_ymax}
|
|
|
|
|
|
+ remain_len = remain_len - len(current_row_choice_m_d)
|
|
|
|
+ for ele in choice_m_numbers_list.copy():
|
|
|
|
+ if ele in current_row_choice_m_d:
|
|
|
|
+ choice_m_numbers_list.remove(ele)
|
|
|
|
|
|
- try:
|
|
|
|
- choice_m_img = utils.crop_region(image, region_loc)
|
|
|
|
- if 0 in choice_m_img.shape[:2]:
|
|
|
|
- continue
|
|
|
|
- right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
- if right_loc > 0:
|
|
|
|
- region_loc.update(dict(xmax=right_loc + region_loc['xmin']))
|
|
|
|
- if bottom_loc > 0:
|
|
|
|
- region_loc.update(dict(ymax=bottom_loc + region_loc['ymin']))
|
|
|
|
- except Exception as e:
|
|
|
|
- print(e)
|
|
|
|
- traceback.print_exc()
|
|
|
|
|
|
+ for ele in choice_m_numbers_list.copy():
|
|
|
|
+ if ele in current_row_chars:
|
|
|
|
+ choice_m_numbers_list.remove(ele)
|
|
|
|
|
|
- choice_m = dict(class_name='choice_m',
|
|
|
|
- number=numbers,
|
|
|
|
- bounding_box=region_loc,
|
|
|
|
- choice_option='A,B,C,D',
|
|
|
|
- default_points=[5],
|
|
|
|
- direction=180,
|
|
|
|
- cols=4,
|
|
|
|
- rows=1,
|
|
|
|
- single_width=(region_loc['xmax'] - region_loc['xmin']) // 4,
|
|
|
|
- )
|
|
|
|
- choice_m_list.append(choice_m)
|
|
|
|
-
|
|
|
|
- # 单独一行不聚类(理论上不会再到这一步了, 上个block解决)
|
|
|
|
- for i, revised_choice_m in enumerate(need_revised_choice_m_list):
|
|
|
|
- loc = revised_choice_m['bounding_box']
|
|
|
|
- left_part_loc = loc.copy()
|
|
|
|
- left_part_loc.update({'xmax': loc['xmin'] + choice_s_width})
|
|
|
|
- choice_m_img = utils.crop_region(image, left_part_loc)
|
|
|
|
- right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
- if right_loc > 0:
|
|
|
|
- left_part_loc.update(dict(xmax=right_loc + left_part_loc['xmin']))
|
|
|
|
- if bottom_loc > 0:
|
|
|
|
- left_part_loc.update(dict(ymax=bottom_loc + left_part_loc['ymin']))
|
|
|
|
-
|
|
|
|
- left_tmp_height = left_part_loc['ymax'] - left_part_loc['ymin']
|
|
|
|
-
|
|
|
|
- right_part_loc = loc.copy()
|
|
|
|
- # right_part_loc.update({'xmin': loc['xmax']-choice_s_width})
|
|
|
|
- right_part_loc.update({'xmin': left_part_loc['xmax'] + 5})
|
|
|
|
- choice_m_img = utils.crop_region(image, right_part_loc)
|
|
|
|
- right_loc, bottom_loc = adjust_choice_m(choice_m_img, mean_height, mean_width * 2)
|
|
|
|
- if right_loc > 0:
|
|
|
|
- right_part_loc.update(dict(xmax=right_loc + right_part_loc['xmin']))
|
|
|
|
- if bottom_loc > 0:
|
|
|
|
- right_part_loc.update(dict(ymax=bottom_loc + right_part_loc['ymin']))
|
|
|
|
-
|
|
|
|
- right_tmp_height = right_part_loc['ymax'] - right_part_loc['ymin']
|
|
|
|
-
|
|
|
|
- number_len = max(1, int(revised_choice_m['rows'] // (left_tmp_height // right_tmp_height)))
|
|
|
|
- number = [ele + revised_choice_m['number'][-1] + 1 for ele in range(number_len)]
|
|
|
|
- rows = len(number)
|
|
|
|
-
|
|
|
|
- revised_choice_m.update({'bounding_box': left_part_loc})
|
|
|
|
- choice_m_list.append(revised_choice_m)
|
|
|
|
-
|
|
|
|
- tmp = revised_choice_m.copy()
|
|
|
|
- tmp.update({'bounding_box': right_part_loc, 'number': number, 'rows': rows})
|
|
|
|
- choice_m_list.append(tmp)
|
|
|
|
-
|
|
|
|
- choice_m_list_copy = choice_m_list.copy()
|
|
|
|
- for ele in choice_m_list_copy:
|
|
|
|
- loc = ele["bounding_box"]
|
|
|
|
- w, h = loc['xmax'] - loc['xmin'], loc['ymax'] - loc['ymin']
|
|
|
|
- if 2 * w * h < choice_s_width * choice_s_height:
|
|
|
|
- choice_m_list.remove(ele)
|
|
|
|
- return choice_m_list
|
|
|
|
|
|
+ choice_m_list_copy = choice_m_list.copy()
|
|
|
|
+ for ele in choice_m_list_copy:
|
|
|
|
+ loc = ele["bounding_box"]
|
|
|
|
+ w, h = loc['xmax'] - loc['xmin'], loc['ymax'] - loc['ymin']
|
|
|
|
+ if 2 * w * h < choice_s_width * choice_s_height:
|
|
|
|
+ choice_m_list.remove(ele)
|
|
|
|
+
|
|
|
|
+ return choice_m_list
|
|
|
|
|
|
|
|
|
|
-def infer_choice_m(image, tf_sheet, col_split_x, ocr, xml=None):
|
|
|
|
- infer_box_list = ocr2sheet(image, col_split_x, ocr, xml)
|
|
|
|
|
|
+def infer_choice_m(image, tf_sheet, infer_box_list, col_split_x, xml=None):
|
|
|
|
+ # infer_box_list = ocr2sheet(image, col_split_x, ocr, xml)
|
|
if not infer_box_list:
|
|
if not infer_box_list:
|
|
for ele in tf_sheet:
|
|
for ele in tf_sheet:
|
|
if ele['class_name'] == 'choice':
|
|
if ele['class_name'] == 'choice':
|