12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- # @Author : lightXu
- # @File : split_lines.py
- import os
- import cv2
- import numpy as np
- from segment.image_operation import utils
- from django.conf import settings
- def find_contours(resized_img, ex_x, ex_y):
- threshed = utils.rgb2binary(resized_img)
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ex_x, ex_y)) # 膨胀系数
- # morphed = cv2.morphologyEx(threshed, cv2.MORPH_CLOSE, kernel)
- morphed = cv2.dilate(threshed, kernel, iterations=1)
- _, cnts, hierarchy = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- cnt = sorted(cnts, key=cv2.contourArea)[-1]
- x, y, w, h = cv2.boundingRect(cnt)
- x = x + int(ex_x * 0.5)
- w = w - int(ex_x * 0.5)
- dst = threshed[y:y + h, x:x + w]
- return dst, (y, y + h, x, x + w), cnts
- def save_lines_by_index_without_white_line(path, split_img, split_index, resize_radio):
- img_y = split_img.shape[0]
- img_x = split_img.shape[1]
- lines_list = []
- for i in range(1, len(split_index)):
- if i % 2 != 1:
- start0 = int((split_index[i - 1] - 2) / resize_radio) # 0,1间隔, 交替相减a2-a1, 每行上下的白多一点
- end0 = int((split_index[i] - 1 + 2) / resize_radio) # 前一个索引
- start = start0 if (start0 >= 0) else 0
- end = end0 if (end0 <= img_y) else img_y
- line = split_img[start:end, 1:img_x]
- if len(line) < 1:
- continue
- _, _, cnts = find_contours(line, 500, 70) # x轴膨胀,去掉每行的白色, 第二个参数按行膨胀,第三个参数按列膨胀
- for cnt_id, cnt in enumerate(reversed(cnts)):
- x, y, w, h = cv2.boundingRect(cnt)
- # print(x, y, w, h)
- if w * h > 100:
- cj_out = line[y:y + h, x:x + w]
- # line_list.append(cj_out)
- save_path = os.path.join(path,
- '{:04d}_{:04d}_{:04d}_{:04d}_{}.jpg'.format(start, end, x, x+w, cnt_id))
- cv2.imencode('.jpg', cj_out)[1].tofile(save_path)
- # print(save_path)
- filename = os.path.abspath(save_path)
- lines_list.append(filename)
- return lines_list
- def line_split(path, save_path, tolerance_pix_number):
- resize_radio = settings.RESIZE_RADIO
- images = utils.read_img(path)
- # raw_y = images.shape[0]
- # raw_x = images.shape[1]
- # images = images[:raw_y, int(raw_x * 0.05):raw_x - int(raw_x * 0.05)]
- resize_img = utils.resize_by_percent(images, resize_radio)
- resize_crop_imgs, max_bbox, _ = find_contours(resize_img, 10, 200) # y轴膨胀,整体去掉白色,去掉扫描后图像边界的黑色线条
- bbox = [int(ele / resize_radio) for ele in max_bbox]
- img_arr = np.asarray(resize_crop_imgs)
- img_size = img_arr.shape
- width = img_size[1]
- sum_x_axis = img_arr.sum(axis=1) / width
- # hei[hei <= 254] = 0 # black
- sum_x_axis[sum_x_axis > 255 * tolerance_pix_number / width] = 1 # white
- sum_x_axis[sum_x_axis != 1] = 0
- sum_x_axis_list = list(sum_x_axis)
- split_index0 = []
- num = 0
- for i, ele in enumerate(sum_x_axis_list):
- num = num % 2
- if ele == num:
- # print(i)
- num = num + 1
- split_index0.append(i)
- split_img0 = images[bbox[0]:bbox[1], bbox[2]:bbox[3]]
- lines_list = save_lines_by_index_without_white_line(save_path, split_img0, split_index0, resize_radio)
- return bbox, lines_list
|