123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298 |
- # @Author : lightXu
- # @File : pre_segment.py
- import time
- import numpy as np
- import cv2
- from numpy import asarray
- import base64
- import scipy.signal
- from segment.image_operation import utils
- def hough_rotate_cv(image):
- """ not Long time consuming, not Strong generalization ability, not high accuracy, more super parameters"""
- img_np = utils.resize_by_percent(asarray(image), 1)
- if len(img_np.shape) == 3:
- img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
- canny_image = cv2.Canny(img_np, 0, 255, apertureSize=3)
- # cv2.imshow('canny', canny_image)
- # cv2.waitKey(10)
- lines = cv2.HoughLinesP(canny_image, 1, np.pi / 180, 160, minLineLength=500, maxLineGap=65)
- # lines = cv2.HoughLines(canny_image, 1, np.pi / 180, 160, max_theta=30, min_theta=0)
- # 寻找长度最长的线
- distance = []
- for line in lines:
- x1, y1, x2, y2 = line[0]
- dis = np.sqrt(pow((x2 - x1), 2) + pow((y2 - y1), 2))
- distance.append(dis)
- max_dis_index = distance.index(max(distance))
- max_line = lines[max_dis_index]
- x1, y1, x2, y2 = max_line[0]
- # 获取旋转角度
- angle = cv2.fastAtan2((y2 - y1), (x2 - x1))
- print(angle)
- if 0.5 <= angle <= 7: # 因为识别误差问题,根据实际情况设置旋转阈值
- centerpoint = (image.shape[1] / 2, image.shape[0] / 2)
- rotate_mat = cv2.getRotationMatrix2D(centerpoint, angle, 1.0) # 获取旋转矩阵
- correct_image = cv2.warpAffine(image, rotate_mat, (image.shape[1], image.shape[0]),
- borderValue=(255, 255, 255))
- # cv2.imshow('test', resize_by_percent(correct_image, 0.1))
- # cv2.waitKey(10)
- return correct_image
- else:
- return image
- def array_latter_subtracts_precious(nparray):
- array1 = nparray[:-1]
- array2 = nparray[1:]
- return array2 - array1
- def split_by_index(im_raw, index):
- y_raw, x_raw, _ = im_raw.shape
- img_left = im_raw[1:y_raw, 1:index]
- img_right = im_raw[1:y_raw, index + 1:x_raw]
- return img_left, img_right
- def split_img_at_middle_by_y_axis(img_path, radio=0.10, thresh_std=5000):
- im_raw = utils.read_img(img_path)
- im_resize = utils.resize_by_percent(im_raw, radio)
- ry, rx, _ = im_resize.shape
- img_mtx0 = np.asarray(utils.rgb2binary(im_resize))
- y_sum_array0 = img_mtx0.sum(axis=0)
- tmp = array_latter_subtracts_precious(y_sum_array0 / ry)
- std0 = np.std(tmp) # 计算标准差
- # # plt.bar(range(len(y_sum_array0)), y_sum_array0)
- # # plt.show()
- # plt.plot(range(len(y_sum_array0)-1), tmp)
- # plt.show()
- y, x, _z = im_resize.shape
- x_bias = int(x * 0.15)
- y_bias = int(y * 0.30)
- middle_x = int(x / 2)
- middle_area_img = im_resize[y_bias:y, middle_x - x_bias:middle_x + x_bias]
- img_mtx = np.asarray(utils.rgb2binary(middle_area_img))
- y_sum_array = img_mtx.sum(axis=0)
- std = np.std(y_sum_array) # 计算标准差
- y_sum_list = list(y_sum_array)
- if std <= thresh_std:
- index = y_sum_list.index(max(y_sum_list))
- else:
- index = y_sum_list.index(min(y_sum_list))
- split_index = middle_x + index - int(len(y_sum_list) / 2)
- split_index = int(split_index / radio)
- y_raw, x_raw, _ = im_raw.shape
- img_left = im_raw[1:y_raw, 1:split_index]
- img_right = im_raw[1:y_raw, split_index + 1:x_raw]
- left_path = img_path.replace('.jpg', '_left.jpg')
- right_path = img_path.replace('.jpg', '_right.jpg')
- cv2.imencode('.jpg', img_left)[1].tofile(left_path)
- cv2.imencode('.jpg', img_right)[1].tofile(right_path)
- print(left_path)
- print(right_path)
- def smart_split_img_at_middle_by_x_axis(img_path, resize_radio=0.1):
- im_raw = utils.read_img(img_path)
- im_resize = utils.resize_by_percent(im_raw, resize_radio)
- bin_img = utils.rgb2binary(im_resize)
- ry, rx = bin_img.shape
- img_mtx0 = np.asarray(bin_img)
- y_sum_array0 = img_mtx0.sum(axis=0) # y轴求和
- subtracts_arr = np.abs(array_latter_subtracts_precious(y_sum_array0 / ry)) # 长度减1
- subtracts_arr_index = np.argsort(subtracts_arr, kind='quicksort', order=None)
- subtracts_arr_index = subtracts_arr_index[-10:]
- index_middle_distance_list = list(np.abs(subtracts_arr_index - int(rx / 2)))
- split_index = subtracts_arr_index[index_middle_distance_list.index(min(index_middle_distance_list))] + 1
- split_index = int(split_index / resize_radio)
- img_left, img_right = split_by_index(im_raw, split_index)
- left_path = img_path.replace('.jpg', '_left.jpg')
- right_path = img_path.replace('.jpg', '_right.jpg')
- cv2.imencode('.jpg', img_left)[1].tofile(left_path)
- cv2.imencode('.jpg', img_right)[1].tofile(right_path)
- print(left_path)
- print(right_path)
- def segment2parts_by_pix(crop_img):
- p_image = utils.preprocess(crop_img)
- height, width = p_image.shape
- sum_x_axis = p_image.sum(axis=0) / (height*255)
- # sum_x_axis = (sum_x_axis / (255*height)).astype(float)
- kernel = np.array([-2, 0, 2])
- sobel_filter = scipy.signal.convolve(sum_x_axis, kernel) # 一维卷积运算
- temp = np.abs(sobel_filter[1:-1])/np.max(np.abs(sobel_filter[1:-1]))
- temp[temp < 0.6] = 0
- temp[temp != 0] = 1
- index = np.where(temp == 1)[0]
- width1 = width // 9
- intervals = [(0, width1), (4 * width1, 5 * width1), (8 * width1, width)] # 左开右闭
- index_list = []
- for i, interval in enumerate(intervals):
- index_sec_list = []
- for ele in index:
- if interval[0] < ele <= interval[1]:
- index_sec_list.append(ele)
- index_list.append(index_sec_list)
- left_x_point, middle_x_point, right_x_point = 9999, 9999, 9999
- left_del_part = (0, left_x_point)
- middle_part = (left_x_point, middle_x_point)
- right_part = (middle_x_point, right_x_point)
- right_del_part = (right_x_point, width)
- # left
- if index_list[0]:
- left_x_point = index_list[0][-1]
- left_del_part = (0, left_x_point)
- # middle
- if index_list[1]:
- value_list = [abs(sobel_filter[index]) for index in index_list[1]]
- middle_x_point = index_list[1][value_list.index(max(value_list))]
- middle_part = (left_x_point, middle_x_point)
- # right
- if index_list[2]:
- right_x_point = index_list[2][0]
- right_part = (middle_x_point, right_x_point)
- right_del_part = (right_x_point, width)
- split_point = sorted(list(set(sorted(list(left_del_part + middle_part + right_part + right_del_part))) - {9999}))
- split_pairs = []
- if len(split_point) > 2:
- a = split_point[:-1]
- b = split_point[1:]
- for i, ele in enumerate(a):
- if b[i] - ele > width1:
- split_pairs.append((ele, b[i]))
- return split_pairs
- def segment2parts(im_raw, save_path):
- img_parts_dict_list = []
- # randon_img = radon_rotate_ski(im_raw)
- # 试卷顶部可能有黑边,切去3%
- yy, xx = im_raw.shape[0], im_raw.shape[1]
- y_crop_pix = int(yy*0.03)
- # x_crop_pix = int(xx*0.03)
- x_crop_pix = 0
- im_crop = im_raw[y_crop_pix:yy-y_crop_pix, x_crop_pix:xx-x_crop_pix]
- split_pairs = segment2parts_by_pix(im_crop)
- if len(split_pairs) >= 2:
- for index, ele in enumerate(split_pairs):
- dst = im_raw[:, ele[0]:ele[1]]
- save_path_final = save_path.replace('.jpg', '') + '_{}_{}_{}.jpg'.format(ele[0], 0, index)
- cv2.imencode('.jpg', dst)[1].tofile(save_path_final)
- image = cv2.imencode('.jpg', dst)[1]
- base64_data = str(base64.b64encode(image))[2:-1]
- part_dict = {'img_part': base64_data,
- 'x_bias': ele[0] + x_crop_pix,
- 'y_bias': 0}
- img_parts_dict_list.append(part_dict)
- else:
- img = im_crop[:, split_pairs[0][0]:split_pairs[0][1]]
- resize_ratio = 0.3
- im_resize = utils.resize_by_percent(img, resize_ratio)
- # gray
- if len(im_resize.shape) >= 3:
- gray_img = cv2.cvtColor(im_resize, cv2.COLOR_BGR2GRAY)
- else:
- gray_img = im_resize
- ry, rx = gray_img.shape
- # 高斯
- glur_img = cv2.GaussianBlur(gray_img, (5, 5), 0)
- # otsu
- _ret, threshed_img = cv2.threshold(glur_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
- if ry < rx:
- x_kernel = int(10*resize_ratio)
- else:
- x_kernel = int(10 * resize_ratio)
- kernel = np.ones((glur_img.shape[0], x_kernel), np.uint8) # height, width
- dilation = cv2.dilate(threshed_img, kernel, iterations=1)
- # cv2.imshow(' ', dilation)
- # if cv2.waitKey(0) == 27:
- # cv2.destroyAllWindows()
- # _, cnts, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- (major, minor, _) = cv2.__version__.split(".")
- contours = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- cnts = contours[0] if int(major) > 3 else contours[1]
- box_list = [cv2.boundingRect(cnt) for cnt in cnts]
- box_array = np.asarray(box_list)
- box_array[:, 2] = box_array[:, 0] + box_array[:, 2]
- box_array[:, 3] = box_array[:, 1] + box_array[:, 3]
- middle_x = rx // 2
- left_box = np.asarray([0, 0, 0, 0])
- right_box = np.asarray([0, 0, 0, 0])
- for box in box_array:
- x, y, xmax, ymax = box
- if x + (xmax-x)//2 <= middle_x:
- left_box = np.vstack([left_box, box])
- else:
- right_box = np.vstack([right_box, box])
- left_box_list = []
- right_box_list = []
- try:
- left_box_list = left_box[1:, :][:, :2].min(axis=0).tolist() + left_box[1:, :][:, 2:].max(axis=0).tolist()
- except Exception:
- pass # 单面的情况
- try:
- right_box_list = right_box[1:, :][:, :2].min(axis=0).tolist() + right_box[1:, :][:, 2:].max(axis=0).tolist()
- except Exception:
- pass
- box_list = [left_box_list, right_box_list]
- bias = int(70 * resize_ratio)
- for index, box in enumerate(box_list):
- if len(box) > 0:
- xmin, ymin, xmax, ymax = box
- if xmin - bias > 0:
- xmin = xmin - bias
- else:
- xmin = 0
- dst = im_crop[int(ymin / resize_ratio):int(ymax / resize_ratio),
- int(xmin / resize_ratio):int(xmax / resize_ratio)]
- save_path_final = save_path.replace('.jpg', '') + '_{}_{}_{}.jpg'.format(xmin, ymin, index)
- cv2.imencode('.jpg', dst)[1].tofile(save_path_final)
- image = cv2.imencode('.jpg', dst)[1]
- base64_data = str(base64.b64encode(image))[2:-1]
- part_dict = {'img_part': base64_data,
- 'x_bias': int(xmin/resize_ratio) + x_crop_pix + split_pairs[0][0],
- 'y_bias': int(ymin/resize_ratio) + y_crop_pix + 0}
- if (xmax - xmin)/resize_ratio > 100: # 去掉竖长条
- img_parts_dict_list.append(part_dict)
- return img_parts_dict_list
|