import cv2 import matplotlib.pylab as plt import numpy as np def read_single_img(img_path): try: im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1) except FileNotFoundError as e: raise e return im def pre_process(image, blank_top=20, blank_bottom=-20, blur_size=5, sigma=5, debug=0): # 返回二值逆图 blank_size = 20 if image.ndim == 3: gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) elif image.ndim == 2: gray = image else: raise Exception('The dimension of the image should be 2 or 3!') # 裁边 gray[0:blank_top, :] = 255 gray[blank_bottom:, :] = 255 gray[:, 0:blank_size] = 255 gray[:, -blank_size:] = 255 pre = 255 - gray pre = cv2.GaussianBlur(pre, (blur_size, blur_size), sigma) binary = cv2.threshold(pre, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] if debug == 1: # 显示灰度图和二值图 plt.figure(figsize=(15, 10)) plt.subplot(211) plt.title('gray') plt.imshow(gray, cmap='gray') plt.subplot(212) plt.title('binary') plt.imshow(255 - binary, cmap='gray') plt.show() return binary def pre_process_for_anchors(image, blank_top=20, blank_bottom=-20, h_ratio=(0.1, 0.9), blur_size=3, sigma=5, blank_size=20, debug=0): # 去掉中间内容,返回上下定位点的二值逆图 # h_ration=0 则不裁去中间部分 # assert(image.ndim == 2 or image.ndim == 3) if image.ndim == 3: gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) elif image.ndim == 2: gray = image.copy() else: raise Exception('The dimension of the image should be 2 or 3!') # 裁边 gray[0:blank_top, :] = 255 gray[blank_bottom:, :] = 255 gray[:, 0:blank_size] = 255 gray[:, -blank_size:] = 255 # 去掉中间内容 if h_ratio != 0: h0 = int(image.shape[0] * h_ratio[0]) h1 = int(image.shape[0] * h_ratio[1]) gray[h0:h1, :] = 255 pre = 255 - gray pre = cv2.GaussianBlur(pre, (blur_size, blur_size), sigma) binary = cv2.threshold(pre, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1] if debug == 1: # 显示灰度图和二值图 plt.figure(figsize=(15, 10)) plt.subplot(211) plt.title('gray') plt.imshow(gray, cmap='gray') plt.subplot(212) plt.title('binary') plt.imshow(255 - binary, cmap='gray') plt.show() return binary def extract_feature(binary, method=4, ker_size1=2, ker_size2=10, debug=0): # 对二值图进一步处理 close_size = 3 kernel_height = 5 kernel_width = 1 close_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (close_size, close_size)) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_height, kernel_width)) horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ker_size1, ker_size2)) vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ker_size2, ker_size1)) if method == 1: # ret = cv2.dilate(binary, kernel) ret = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel, iterations=1) ret = cv2.morphologyEx(ret, cv2.MORPH_OPEN, vertical_kernel) elif method == 2: ret = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) ret = cv2.morphologyEx(ret, cv2.MORPH_CLOSE, close_kernel) elif method == 3: ret = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel) ret = cv2.morphologyEx(ret, cv2.MORPH_OPEN, vertical_kernel) elif method == 4: ret = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, close_kernel) ret = cv2.morphologyEx(ret, cv2.MORPH_OPEN, horizontal_kernel) ret = cv2.morphologyEx(ret, cv2.MORPH_OPEN, vertical_kernel) else: ret = binary if debug == 1: # 显示特征提取其后的图像 plt.figure(figsize=(15, 10)) plt.subplot(211) plt.title('before feature extraction') plt.imshow(255 - binary, cmap='gray') # plt.show() # plt.figure(figsize=(15, 10)) plt.subplot(212) plt.title('after feature extraction') plt.imshow(255 - ret, cmap='gray') plt.show() return ret def draw_contour(binary): (major, minor, _) = cv2.__version__.split(".") # check cv version boxes = [] contours = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours = contours[1] if major == '3' else contours[0] for i in range(0, len(contours)): xmin, ymin, w, h = cv2.boundingRect(contours[i]) xmax = xmin + w ymax = ymin + h centroid = [xmin + w // 2, ymin + h // 2] boxes.append([xmin, ymin, xmax, ymax, centroid, w*h]) return boxes def draw_connected_component(binary): connectivity = 8 boxes = [] num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=connectivity) for l in range(1, num_labels): xmin = stats[l, cv2.CC_STAT_LEFT] ymin = stats[l, cv2.CC_STAT_TOP] xmax = stats[l, cv2.CC_STAT_WIDTH] + xmin ymax = stats[l, cv2.CC_STAT_HEIGHT] + ymin area = stats[l, cv2.CC_STAT_AREA] boxes.append([xmin, ymin, xmax, ymax, [int(centroids[l][0]), int(centroids[l][1])], area]) return boxes def find_boxes(binary, method='connected', debug=0): # 寻找轮廓 if method == 'contour': boxes = draw_contour(binary) elif method == 'connected': boxes = draw_connected_component(binary) else: raise Exception('Wrong method for finding boxes!') if debug == 1: # 显示boxes信息 boxes.sort(key=lambda x: x[4][1]) print('The features of the boxes after feature extractions:') for box in boxes: width = box[2] - box[0] height = box[3] - box[1] w_to_h = width / height area = box[-1] centroid = box[-2] area_ratio = area / (width * height) print('width:{}, height:{}, centroid:{}, w_to_h:{}, area:{}, area ratio:{}'. format(width, height, centroid, w_to_h, area, area_ratio)) return boxes def find_marker_by_shape(boxes, shape_para={'height': (80, 10), 'w2h': (3, 0.6), 'area': (6000, 500), 'area_ratio': 0.5}, debug=0): # 通过形状参数寻找定位点 area_ratio_threshold = 0.96 max_height, min_height = shape_para['height'] max_w2h, min_w2h = shape_para['w2h'] max_area, min_area = shape_para['area'] min_area_ratio = shape_para['area_ratio'] markers = [] for box in boxes: w = box[2] - box[0] h = box[3] - box[1] if box[-1] >= area_ratio_threshold*w*h and min_area <= box[-1] <= max_area: markers.append(box) elif min_height <= h <= max_height and min_w2h <= w/h <= max_w2h \ and min_area <= box[-1] <= max_area and box[-1] >= min_area_ratio*w*h: markers.append(box) if debug == 1: # 显示通过形状参数找到的定位点信息 markers.sort(reverse=True, key=lambda x: x[-1]) print('The features of the markers picking up by shapes:') for box in markers: width = box[2] - box[0] height = box[3] - box[1] w_to_h = width / height area = box[-1] centroid = box[-2] area_ratio = area / (width * height) print('width:{}, height:{}, centroid:{}, w_to_h:{}, area:{}, area ratio:{}'. format(width, height, centroid, w_to_h, area, area_ratio)) elif debug == 2: # 显示所有定位点信息 print('The features of the markers without picking up by shapes:') for box in boxes: markers.append(box) for box in markers: width = box[2] - box[0] height = box[3] - box[1] w_to_h = width / height area = box[-1] centroid = box[-2] area_ratio = area / (width * height) print('width:{}, height:{}, centroid:{}, w_to_h:{}, area:{}, area ratio:{}'. format(width, height, centroid, w_to_h, area, area_ratio)) return markers def find_box_list_by_position(box, box_list, method='h', shift_threshold=30, slope_threshold=0.2, area_threshold=0.28): # 根据相近原则将box加入box_list中 if len(box_list) > 0: if method == 'h': # 水平分布 index_flag, distance = -1, shift_threshold for index, bl in enumerate(box_list): d = abs(box[4][1] - bl[-1][4][1]) if d < distance: distance = d index_flag = index if index_flag >= 0: box_list[index_flag].append(box) else: box_list.append([box]) elif method == 'v': # 垂直分布 index_flag, distance = -1, shift_threshold for index, bl in enumerate(box_list): d = abs(box[4][0] - bl[-1][4][0]) if d < distance and d < abs(box[4][1] - bl[-1][4][1]) * slope_threshold: distance = d index_flag = index if index_flag >= 0: box_list[index_flag].append(box) else: box_list.append([box]) elif method == 's': # 面积相近分布 index_flag, area_diff = -1, area_threshold for index, bl in enumerate(box_list): d = abs((box[-1] - bl[-1][-1]) / bl[-1][-1]) if d < area_diff: area_diff = d index_flag = index if index_flag >= 0: box_list[index_flag].append(box) else: box_list.append([box]) else: box_list.append([box]) return box_list def collect_markers_by_position(boxes, method='h', shift_threshold=30, slope_threshold=0.2, area_threshold=0.28, debug=0): # 按照相近位置排列定位点 box_list = [] if method == 'h': # 按水平位置相近排列 boxes.sort(key=lambda x: x[4][0]) for b in boxes: box_list = find_box_list_by_position(b, box_list, method=method, shift_threshold=shift_threshold, slope_threshold=slope_threshold) box_list.sort(key=lambda x: x[0][4][1]) elif method == 'v': # 按垂直位置相近排列 boxes.sort(key=lambda x: x[4][1]) for b in boxes: box_list = find_box_list_by_position(b, box_list, method=method, shift_threshold=shift_threshold, slope_threshold=slope_threshold) box_list.sort(key=lambda x: x[0][4][0]) elif method == 's': # 按面积大小相近排列 boxes.sort(reverse=True, key=lambda x: x[-1]) for b in boxes: box_list = find_box_list_by_position(b, box_list, method=method, shift_threshold=shift_threshold, slope_threshold=slope_threshold, area_threshold=area_threshold) box_list.sort(reverse=True, key=lambda x: x[0][-1]) # if method == 'h': # 按水平位置相近排列 # boxes.sort(key=lambda x: x[4][1]) # for b in boxes: # index_flag, distance = -1, shift_threshold # for index, single_list in enumerate(box_list): # if abs(b[4][1] - single_list[-1][4][1]) < distance: # distance = abs(b[4][1] - single_list[-1][4][1]) # index_flag = index # if index_flag >= 0: # box_list[index_flag].append(b) # else: # box_list.append([b]) # # elif method == 'v': # 按垂直位置相近排列 # boxes.sort(key=lambda x: x[4][0]) # for b in boxes: # index_flag, distance = -1, shift_threshold # for index, single_list in enumerate(box_list): # if abs(b[4][0] - single_list[-1][4][0]) < distance: # distance = abs(b[4][0] - single_list[-1][4][0]) # index_flag = index # if index_flag >= 0: # box_list[index_flag].append(b) # else: # box_list.append([b]) if debug == 1: print('box list slope') if method == 'h': for box in box_list: if len(box) >= 2: for i in range(len(box)-1): slope = (box[i+1][4][1] - box[i][4][1])/(box[i+1][4][0] - box[i][4][0]) print(slope) elif method == 'v': for box in box_list: if len(box) >= 2: for i in range(len(box) - 1): slope = (box[i + 1][4][0] - box[i][4][0]) / (box[i + 1][4][1] - box[i][4][1]) print(slope) return box_list def collect_markers_by_area(boxes, area_threshold=2): # 去除面积相差过大的定位点 boxes.sort(reverse=True, key=lambda x: x[-1]) # print(boxes[0]) min_area = boxes[0][-1] / area_threshold boxes = [b for b in boxes if b[-1] > min_area] return boxes def check_with_anchor(problem_markers, top_anchors, page_width, column_num): # 根据top_anchors位置去除异常markers min_shift = 100 column_pos = [] if len(top_anchors) == column_num + 1: column_pos.append(top_anchors[0][4][0]) if top_anchors[1][4][0] - top_anchors[0][4][0] < top_anchors[-1][4][0] - top_anchors[-2][4][0]: for i in range(2, column_num+1): column_pos.append(top_anchors[i][4][0] - page_width) else: for i in range(1, column_num): column_pos.append(top_anchors[i][4][0]) for index, markers in enumerate(problem_markers): remove_list = [] for i in range(len(markers)//2): if abs(markers[2*i][4][0]-column_pos[index]) > min_shift or \ abs(markers[2*i+1][4][0]-column_pos[index]-page_width) > min_shift: remove_list.extend([2*i, 2*i+1]) problem_markers[index] = [problem_markers[index][i] for i in range(len(problem_markers[index])) if i not in remove_list] return problem_markers def remove_abnormal_marker(markers, page_width, debug=0): # 从markers中剔除异常点 error = 10 max_std = 3 min_std = 0.1 min_area_ratio = 0.9 min_distance = 60 min_slope = 0.2 distance_list = [] remove_list = [] for i in range(len(markers)//2-1): min_flag = i distance_flag = abs(markers[2*i+1][4][0] - markers[2*i][4][0] - page_width) + abs(markers[2*i+1][4][1] - markers[2*i][4][1]) for j in range(i+1, len(markers)//2): if abs(markers[2*i+1][4][0] - markers[2*j+1][4][0]) + abs(markers[2*i+1][4][1] - markers[2*j+1][4][1]) \ < error: if distance_flag < abs(markers[2*j+1][4][0] - markers[2*j][4][0] - page_width) + \ abs(markers[2*j+1][4][1] - markers[2*j][4][1]): remove_list.extend([2*j, 2*j+1]) else: distance_flag = abs(markers[2*j+1][4][0] - markers[2*j][4][0] - page_width) + \ abs(markers[2*j+1][4][1] - markers[2*j][4][1]) remove_list.extend([2*min_flag, 2*min_flag+1]) min_flag = j markers = [markers[i] for i in range(len(markers)) if i not in remove_list] remove_list = [] if len(markers) >= 6: left_slope_list = np.asarray([abs((markers[2 * i][4][0] - markers[2 * i + 2][4][0]) / (markers[2 * i][4][1] - markers[2 * i + 2][4][1])) for i in range(len(markers)//2-1)]) right_slope_list = np.asarray([abs((markers[2 * i + 1][4][0] - markers[2 * i + 3][4][0]) / (markers[2 * i + 1][4][1] - markers[2 * i + 3][4][1])) for i in range(len(markers) // 2 - 1)]) left_slope_list = left_slope_list > min_slope right_slope_list = right_slope_list > min_slope for i in range(len(left_slope_list)): if left_slope_list[i]: if i == len(left_slope_list) - 1: if not left_slope_list[i-1]: remove_list.extend([2*(i+1), 2*(i+1)+1]) elif left_slope_list[i+1]: remove_list.extend([2*(i+1), 2*(i+1)+1]) elif not left_slope_list[i+1]: remove_list.extend([2*i, 2*i+1]) for i in range(len(right_slope_list)): if right_slope_list[i]: if i == len(right_slope_list) - 1: if not right_slope_list[i-1]: remove_list.extend([2*(i+1), 2*(i+1)+1]) elif right_slope_list[i+1]: remove_list.extend([2*(i+1), 2*(i+1)+1]) elif not right_slope_list[i+1]: remove_list.extend([2*i, 2*i+1]) markers = [markers[i] for i in range(len(markers)) if i not in set(remove_list)] remove_list = [] if len(markers) >= 2: left_x_list = np.asarray([markers[2*i][4][0] for i in range(len(markers)//2)]) left_y_list = np.asarray([markers[2*i][4][1] for i in range(len(markers)//2)]) right_x_list = np.asarray([markers[2*i+1][4][0] for i in range(len(markers)//2)]) right_y_list = np.asarray([markers[2*i+1][4][1] for i in range(len(markers)//2)]) distance_list = right_x_list - left_x_list shift_list = right_y_list - left_y_list left_x_mean = left_x_list.mean() distance_mean = distance_list.mean() shift_mean = shift_list.mean() left_x_std = left_x_list.std() distance_std = distance_list.std() shift_std = shift_list.std() if len(markers) >= 4: for i in range(len(markers)//2): if left_x_std > min_std and abs(left_x_list[i] - left_x_mean) / left_x_std > max_std: remove_list.extend([2*i, 2*i+1]) elif shift_std > min_std and abs(shift_list[i] - shift_mean) / shift_std > max_std: remove_list.extend([2 * i, 2 * i + 1]) elif distance_std > min_std and abs(distance_list[i] - distance_mean) / distance_std > max_std: remove_list.extend([2 * i, 2 * i + 1]) elif len(markers) == 2: # area_ratio_list = np.asarray([m[-1]/((m[2]-m[0])*(m[3]-m[1])) for m in markers]) if abs(distance_list-page_width) + abs(shift_list) > min_distance: remove_list.extend([0, 1]) markers = [markers[i] for i in range(len(markers)) if i not in remove_list] if len(markers) >= 2: new_page_width = markers[1][4][0] - markers[0][4][0] else: new_page_width = page_width if debug == 1: print(len(markers)) if len(markers) >= 4: print('left', left_x_mean, left_x_std) print(left_x_list) for i, x in enumerate(left_x_list): delta = abs(x - left_x_mean) / left_x_std print(delta, left_y_list[i]) print('shift', shift_mean, shift_std) print(shift_list) for i, shift in enumerate(shift_list): delta = abs(shift - shift_mean) / shift_std print(delta, left_y_list[i]) print('distance', distance_mean, distance_std) print(distance_list) for i, distance in enumerate(distance_list): delta = abs(distance - distance_mean) / distance_std print(delta, left_y_list[i]) print('total') for i in range(len(left_x_list)): delta = abs(left_x_list[i] - left_x_mean) / left_x_std + abs(shift_list[i] - shift_mean) / shift_std \ + abs(distance_list[i] - distance_mean) / distance_std d = abs(left_x_list[i] - left_x_mean) + abs(shift_list[i] - shift_mean) + \ abs(distance_list[i] - distance_mean) print(delta, d, left_y_list[i]) if debug == 2: if len(markers) >= 2: print('page width', page_width, 'new page width:', new_page_width, 'distant difference:', abs(new_page_width - page_width) + abs(markers[1][4][1]-markers[0][4][1])) return markers, new_page_width def draw_box(image, boxes, color=(0, 255, 0), debug=0): # 生成定位点标注框图 for box in boxes: if len(box) > 0: cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), color, 5) if debug == 1: # 显示定位点信息 for box in boxes: if len(box) == 4: width = box[2] - box[0] height = box[3] - box[1] w_to_h = width / height area = width * height # centroid = (box[0]+box[1])/2, (box[1]+box[3])/2 print('width:{}, height:{}, w_to_h:{}, area:{}, top_left:{}, bottom_right:{},'. format(width, height, w_to_h, area, box[0:2], box[2:4])) elif len(box) > 4: width = box[2] - box[0] height = box[3] - box[1] w_to_h = width / height position_ratio = box[4][0] / image.shape[1] area = box[-1] centroid = box[4] area_ratio = area / (width * height) print('width:{}, height:{}, centroid:{}, position ratio:{}, w_to_h:{}, area:{}, area ratio:{}'. format(width, height, centroid, position_ratio, w_to_h, area, area_ratio)) def find_pair(marker, boxes, page_width, threshold=100): # 若page_width为正,在boxes中找到marker的右配对, 若page_width为负, 在在boxes中找到marker的左配对 distance = threshold pair_index = -1 for i in range(len(boxes)): if abs(marker[4][1] - boxes[i][4][1]) + abs(boxes[i][4][0] - marker[4][0] - page_width) <= threshold: if abs(marker[4][1] - boxes[i][4][1]) + abs(boxes[i][4][0] - marker[4][0] - page_width) < distance: distance = abs(marker[4][1] - boxes[i][4][1]) + abs(boxes[i][4][0] - marker[4][0] - page_width) pair_index = i if pair_index >= 0: return boxes[pair_index], pair_index, distance else: return [], pair_index, distance def find_pair_list(marker_list, all_list, page_width, horizontal_threshold=100, debug=0): # all_list 中找到与marker_list最接近的配对list max_count = 0 index_flag = -1 min_distance = horizontal_threshold for index, l in enumerate(all_list): count = 0 distance = 0 for m in marker_list: if find_pair(m, l, page_width, horizontal_threshold)[1] >= 0: count += 1 distance += find_pair(m, l, page_width, horizontal_threshold)[2] if count > max_count: max_count = count index_flag = index min_distance = distance / count elif count == max_count and count > 0: distance /= count if distance < min_distance: min_distance = distance index_flag = index if debug == 1: if index_flag >= 0: print('page width:', abs(marker_list[0][4][0] - all_list[index_flag][0][4][0]), 'anchor width:', page_width) return all_list[index_flag], index_flag, min_distance def find_column(anchors, width, column_num=2, debug=0): # 确定栏数,单栏宽度及第一栏和最后一栏的定位 double_page_width_ratio = 0.42 # 默认双栏宽度比例 three_page_width_ratio = 0.29 # 默认三栏宽度比例 double_page_separation = 250 # 默认双栏栏间间距 three_page_separation = 100 # 默认三栏栏间间距 horizontal_threshold = 80 # 单栏宽度比例阈值 top_anchors, bottom_anchors = anchors[:2] page_width = width if len(top_anchors) >= 2: for i in range(len(top_anchors)-1): page_width_0 = top_anchors[i+1][4][0] - top_anchors[i][4][0] page_width_1 = (top_anchors[i + 1][4][0] - top_anchors[i][4][0]) // 2 page_width_2 = (top_anchors[i + 1][4][0] - top_anchors[i][4][0]) // 3 if abs(page_width_0 - width * double_page_width_ratio) < horizontal_threshold: column_num = 2 if page_width_0 < page_width: page_width = page_width_0 elif abs(page_width_0 - width * three_page_width_ratio) < horizontal_threshold: column_num = 3 if page_width_0 < page_width: page_width = page_width_0 elif abs(page_width_1 - width * double_page_width_ratio) < horizontal_threshold: column_num = 2 if page_width_1 < page_width: page_width = page_width_1 elif abs(page_width_1 - width * three_page_width_ratio) < horizontal_threshold: column_num = 3 if page_width_1 < page_width: page_width = page_width_1 elif abs(page_width_2 - width * double_page_width_ratio) < horizontal_threshold: column_num = 2 if page_width_2 < page_width: page_width = page_width_2 elif abs(page_width_2 - width * three_page_width_ratio) < horizontal_threshold: column_num = 3 if page_width_2 < page_width: page_width = page_width_2 if page_width == width: if column_num == 2: page_width = int(width * double_page_width_ratio) # 如果没有找到合适的大定位点,使用默认的双栏宽度 elif column_num == 3: page_width = int(width * three_page_width_ratio) # 如果没有找到合适的大定位点,使用默认的三栏宽度 # 寻找第一栏和最后一栏的定位 column_pos = [] if len(top_anchors) >= 1: for i in range(4): if top_anchors[0][4][0] - (i + 1) * page_width < 0: column_pos.append(top_anchors[0][4][0] - i * page_width) break for i in range(4): if top_anchors[-1][4][0] + (i + 1) * page_width > width: column_pos.append(top_anchors[-1][4][0] + (i - 1) * page_width) break elif len(bottom_anchors) == 2: column_pos = [bottom_anchors[0][4][0], bottom_anchors[-1][4][0] - page_width] elif column_num == 2: column_pos = [(width - double_page_separation) // 2 - page_width, (width + double_page_separation) // 2] elif column_num == 3: column_pos = [width // 2 - three_page_separation - page_width * 3 // 2, width // 2 + three_page_separation + page_width // 2] if debug == 1: print('top anchors') for t in top_anchors: print(t[4]) print('bottom anchors') for b in bottom_anchors: print(b[4]) print('page width:', page_width, 'column number:', column_num, 'column position:', column_pos) return page_width, column_num, column_pos