123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239 |
- # @Author : lightXu
- # @File : exam_number_box.py
- # @Time : 2018/11/22 0022 下午 15:59
- import cv2
- import numpy as np
- import xml.etree.cElementTree as ET
- from segment.sheet_resolve.tools import utils
- from segment.sheet_resolve.tools.brain_api import get_ocr_text_and_coordinate, get_ocr_text_and_coordinate_direction
- import re
- def preprocess(img, xe, ye):
- scale = 0
- dilate = 1
- blur = 5
- # 预处理图像
- # img = cv2.imread(picture)
- # rescale the image
- if scale != 0:
- img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
- # Convert to gray
- img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- # # Apply dilation and erosion to remove some noise
- # if dilate != 0:
- # kernel = np.ones((dilate, dilate), np.uint8)
- # img = cv2.dilate(img, kernel, iterations=1)
- # img = cv2.erode(img, kernel, iterations=1)
- # Apply blur to smooth out the edges
- # if blur != 0:
- # img = cv2.GaussianBlur(img, (blur, blur), 0)
- # Apply threshold to get image with only b&w (binarization)
- img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
- kernel = np.ones((ye, xe), np.uint8) # y轴膨胀, x轴膨胀
- dst = cv2.dilate(img, kernel, iterations=1)
- # cv2.imshow('dilate', dst)
- # if cv2.waitKey(0) == 27:
- # cv2.destroyAllWindows()
- return dst
- def contours(image):
- _, cnts, hierarchy = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- bboxes = []
- for cnt_id, cnt in enumerate(reversed(cnts)):
- x, y, w, h = cv2.boundingRect(cnt)
- bboxes.append((x, y, x + w, y + h))
- return bboxes
- def box_coordinates(img):
- img_arr = np.asarray(img)
- def axix_break_point(img, tolerance_number, axis):
- sum_x_axis = img.sum(axis=axis)
- sum_x_axis[sum_x_axis > 255 * tolerance_number] = 1 # 白色有字
- sum_x_axis[sum_x_axis != 1] = 0 # 黑色无字
- sum_x_axis_list = list(sum_x_axis)
- sum_x_axis_list.append(0) # 最后几行到结束有字时,使索引值增加最后一位
- split_x_index = []
- num = 1
- for index, ele in enumerate(sum_x_axis_list):
- num = num % 2
- if ele == num:
- # print(i)
- num = num + 1
- split_x_index.append(index)
- # print('length: ', len(split_x_index), split_x_index)
- return split_x_index
- y_break_points_list = axix_break_point(img_arr, 1, axis=1) # y轴分组
- img_arr_upper = img_arr[:y_break_points_list[1], :]
- # cv2.imshow('img_arr_upper', img_arr_upper)
- # if cv2.waitKey(0) == 27:
- # cv2.destroyAllWindows()
- x_break_points_list = axix_break_point(img_arr_upper, 1, axis=0)
- if len(x_break_points_list) <= 4:
- hand_writing = True
- else:
- hand_writing = False
- img_arr_for_x = img_arr
- ocr_region = img_arr_upper
- if hand_writing: # 存在手写考号区域
- ocr_region = img_arr[y_break_points_list[2]:y_break_points_list[3], :]
- y_break_points_list = y_break_points_list[2:]
- img_arr_for_x = img_arr[y_break_points_list[1]:, :]
- x_break_points_list = axix_break_point(img_arr_for_x, 1, axis=0)
- all_coordinates = []
- row_number = 0
- for i in range(0, len(y_break_points_list), 2): # y轴分组
- ymin = y_break_points_list[i]
- ymax = y_break_points_list[i + 1]
- matrix = np.array([0, 0, 0, 0])
- if ymax-ymin > 3: # 过滤噪音
- for j in range(0, len(x_break_points_list), 2):
- xmin = x_break_points_list[j]
- xmax = x_break_points_list[j + 1]
- if xmax - xmin > 3:
- matrix = np.vstack([matrix, np.array([xmin, ymin, xmax, ymax])])
- matrix = matrix[1:, :]
- dif = matrix[1:, 0] - matrix[:-1, 2] # 后一个char的left与起一个char的right的差
- dif[dif < 0] = 0
- dif_length = np.mean(dif) # 小于平均间隔的合并
- block_list = utils.box_by_x_intervel(matrix, dif_length)
- row = {'row': '{}'.format(row_number), 'coordinates': block_list}
- all_coordinates.append(row)
- row_number += 1
- # 识别文字和朝向
- try:
- word_result_list, _ = get_ocr_text_and_coordinate_direction(ocr_region)
- except Exception:
- word_result_list, _ = get_ocr_text_and_coordinate_direction(img_arr_for_x)
- direction = 180
- if len(word_result_list) > 0:
- all_char_list = []
- digital_model = re.compile(r'\d')
- for i, chars_dict in enumerate(word_result_list):
- chars_list = chars_dict['chars']
- for ele in chars_list:
- if digital_model.search(ele['char']):
- all_char_list.append(int(ele['char']))
- if sum(all_char_list) < 45//2:
- direction = 180
- else:
- direction = 90
- return all_coordinates, direction
- def exam_number(left, top, image, xml_path):
- img = preprocess(image, 3, 3)
- box_list, _ = box_coordinates(img)
- exam_bbox_list = []
- tree = ET.parse(xml_path) # xml tree
- for index_num, exam_bbox in enumerate(box_list):
- row_number = exam_bbox['row']
- coordinates = exam_bbox['coordinates']
- ii = 0
- for i, coordinate in enumerate(coordinates):
- area = (coordinate[2] - coordinate[0]) * (coordinate[3] - coordinate[1])
- if area > 400:
- number = '{:02d}_{}'.format(ii, row_number)
- tree = utils.create_xml(number, tree,
- coordinate[0]+left, coordinate[1]+top, coordinate[2]+left, coordinate[3]+top)
- region = [coordinate[0]+left, coordinate[1]+top, coordinate[2]+left, coordinate[3]+top]
- exam_bbox_list.append({'number': number, 'region': region})
- ii = ii + 1
- # print(exam_items_bbox)
- tree.write(xml_path)
- return exam_bbox_list
- def exam_number_column(left, top, image, xml_path):
- img = preprocess(image, 3, 3)
- box_list, _ = box_coordinates(img)
- column_number = len(box_list[0]['coordinates'])
- tree = ET.parse(xml_path) # xml tree
- column_list = []
- for i in range(0, column_number):
- matrix = np.array([0, 0, 0, 0])
- for coord in box_list:
- col = coord['coordinates']
- matrix = np.vstack([matrix, np.array(col[i])])
- combine = matrix[1:]
- min_temp = np.min(combine, axis=0)
- max_temp = np.max(combine, axis=0)
- column_coordinate = {'xmin': min_temp[0]+left, 'ymin': min_temp[1]+top,
- 'xmax': max_temp[2]+left, 'ymax': max_temp[3]+top}
- single_height = np.mean(combine[:, 3]-combine[:, 1])
- single_width = np.mean(combine[:, 2]-combine[:, 0])
- column_dict = {'number': i, 'location': column_coordinate,
- 'single_height': int(single_height),
- 'single_width': int(single_width),
- "choice_option": "0,1,2,3,4,5,6,7,8,9",
- 'row': 10, 'column': 1}
- column_list.append(column_dict)
- tree = utils.create_xml(str(i), tree,
- column_coordinate['xmin'], column_coordinate['ymin'],
- column_coordinate['xmax'], column_coordinate['ymax'])
- return column_list
- def exam_number_whole(left, top, image, xml_path):
- img = preprocess(image, 3, 3)
- box_list, direction = box_coordinates(img)
- coor = [coord['coordinates'] for coord in box_list]
- column_number = len(box_list[0]['coordinates'])
- row_number = len(box_list)
- tensor = np.asarray(coor).reshape(column_number*row_number, 4)
- min_temp = np.min(tensor, axis=0)
- max_temp = np.max(tensor, axis=0)
- column_coordinate = {'xmin': int(min_temp[0] + left), 'ymin': int(min_temp[1] + top),
- 'xmax': int(max_temp[2] + left), 'ymax': int(max_temp[3] + top)}
- single_height = np.mean(tensor[:, 3] - tensor[:, 1])
- single_width = np.mean(tensor[:, 2] - tensor[:, 0])
- column_dict = {'location': column_coordinate,
- 'single_height': int(single_height),
- 'single_width': int(single_width),
- "choice_option": "0,1,2,3,4,5,6,7,8,9",
- 'row': row_number, 'column': column_number,
- 'direction': direction}
- tree = ET.parse(xml_path) # xml tree
- tree = utils.create_xml('exam_number', tree,
- column_coordinate['xmin'], column_coordinate['ymin'],
- column_coordinate['xmax'], column_coordinate['ymax'])
- tree.write(xml_path)
- return column_dict
|