# @Author : lightXu # @File : split_lines.py import os import cv2 import numpy as np from segment.image_operation import utils from django.conf import settings def find_contours(resized_img, ex_x, ex_y): threshed = utils.rgb2binary(resized_img) kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ex_x, ex_y)) # 膨胀系数 # morphed = cv2.morphologyEx(threshed, cv2.MORPH_CLOSE, kernel) morphed = cv2.dilate(threshed, kernel, iterations=1) _, cnts, hierarchy = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cnt = sorted(cnts, key=cv2.contourArea)[-1] x, y, w, h = cv2.boundingRect(cnt) x = x + int(ex_x * 0.5) w = w - int(ex_x * 0.5) dst = threshed[y:y + h, x:x + w] return dst, (y, y + h, x, x + w), cnts def save_lines_by_index_without_white_line(path, split_img, split_index, resize_radio): img_y = split_img.shape[0] img_x = split_img.shape[1] lines_list = [] for i in range(1, len(split_index)): if i % 2 != 1: start0 = int((split_index[i - 1] - 2) / resize_radio) # 0,1间隔, 交替相减a2-a1, 每行上下的白多一点 end0 = int((split_index[i] - 1 + 2) / resize_radio) # 前一个索引 start = start0 if (start0 >= 0) else 0 end = end0 if (end0 <= img_y) else img_y line = split_img[start:end, 1:img_x] if len(line) < 1: continue _, _, cnts = find_contours(line, 500, 70) # x轴膨胀,去掉每行的白色, 第二个参数按行膨胀,第三个参数按列膨胀 for cnt_id, cnt in enumerate(reversed(cnts)): x, y, w, h = cv2.boundingRect(cnt) # print(x, y, w, h) if w * h > 100: cj_out = line[y:y + h, x:x + w] # line_list.append(cj_out) save_path = os.path.join(path, '{:04d}_{:04d}_{:04d}_{:04d}_{}.jpg'.format(start, end, x, x+w, cnt_id)) cv2.imencode('.jpg', cj_out)[1].tofile(save_path) # print(save_path) filename = os.path.abspath(save_path) lines_list.append(filename) return lines_list def line_split(path, save_path, tolerance_pix_number): resize_radio = settings.RESIZE_RADIO images = utils.read_img(path) # raw_y = images.shape[0] # raw_x = images.shape[1] # images = images[:raw_y, int(raw_x * 0.05):raw_x - int(raw_x * 0.05)] resize_img = utils.resize_by_percent(images, resize_radio) resize_crop_imgs, max_bbox, _ = find_contours(resize_img, 10, 200) # y轴膨胀,整体去掉白色,去掉扫描后图像边界的黑色线条 bbox = [int(ele / resize_radio) for ele in max_bbox] img_arr = np.asarray(resize_crop_imgs) img_size = img_arr.shape width = img_size[1] sum_x_axis = img_arr.sum(axis=1) / width # hei[hei <= 254] = 0 # black sum_x_axis[sum_x_axis > 255 * tolerance_pix_number / width] = 1 # white sum_x_axis[sum_x_axis != 1] = 0 sum_x_axis_list = list(sum_x_axis) split_index0 = [] num = 0 for i, ele in enumerate(sum_x_axis_list): num = num % 2 if ele == num: # print(i) num = num + 1 split_index0.append(i) split_img0 = images[bbox[0]:bbox[1], bbox[2]:bbox[3]] lines_list = save_lines_by_index_without_white_line(save_path, split_img0, split_index0, resize_radio) return bbox, lines_list