lightxu
/
exam-segment-django


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
							# @Author  : lightXu
# @File    : split_lines.py
import os

import cv2
import numpy as np

from segment.image_operation import utils

from django.conf import settings


def find_contours(resized_img, ex_x, ex_y):
    threshed = utils.rgb2binary(resized_img)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ex_x, ex_y))  # 膨胀系数
    # morphed = cv2.morphologyEx(threshed, cv2.MORPH_CLOSE, kernel)
    morphed = cv2.dilate(threshed, kernel, iterations=1)

    _, cnts, hierarchy = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    cnt = sorted(cnts, key=cv2.contourArea)[-1]
    x, y, w, h = cv2.boundingRect(cnt)
    x = x + int(ex_x * 0.5)
    w = w - int(ex_x * 0.5)
    dst = threshed[y:y + h, x:x + w]
    return dst, (y, y + h, x, x + w), cnts


def save_lines_by_index_without_white_line(path, split_img, split_index, resize_radio):
    img_y = split_img.shape[0]
    img_x = split_img.shape[1]
    lines_list = []
    for i in range(1, len(split_index)):
        if i % 2 != 1:
            start0 = int((split_index[i - 1] - 2) / resize_radio)  # 0,1间隔, 交替相减a2-a1， 每行上下的白多一点
            end0 = int((split_index[i] - 1 + 2) / resize_radio)  # 前一个索引
            start = start0 if (start0 >= 0) else 0
            end = end0 if (end0 <= img_y) else img_y
            line = split_img[start:end, 1:img_x]
            if len(line) < 1:
                continue

            _, _, cnts = find_contours(line, 500, 70)  # x轴膨胀，去掉每行的白色， 第二个参数按行膨胀，第三个参数按列膨胀
            for cnt_id, cnt in enumerate(reversed(cnts)):
                x, y, w, h = cv2.boundingRect(cnt)

                # print(x, y, w, h)
                if w * h > 100:
                    cj_out = line[y:y + h, x:x + w]
                    # line_list.append(cj_out)
                    save_path = os.path.join(path,
                                             '{:04d}_{:04d}_{:04d}_{:04d}_{}.jpg'.format(start, end, x, x+w, cnt_id))
                    cv2.imencode('.jpg', cj_out)[1].tofile(save_path)
                    # print(save_path)
                    filename = os.path.abspath(save_path)
                    lines_list.append(filename)
    return lines_list


def line_split(path, save_path, tolerance_pix_number):
    resize_radio = settings.RESIZE_RADIO
    images = utils.read_img(path)
    # raw_y = images.shape[0]
    # raw_x = images.shape[1]
    # images = images[:raw_y, int(raw_x * 0.05):raw_x - int(raw_x * 0.05)]

    resize_img = utils.resize_by_percent(images, resize_radio)
    resize_crop_imgs, max_bbox, _ = find_contours(resize_img, 10, 200)  # y轴膨胀，整体去掉白色，去掉扫描后图像边界的黑色线条

    bbox = [int(ele / resize_radio) for ele in max_bbox]

    img_arr = np.asarray(resize_crop_imgs)
    img_size = img_arr.shape
    width = img_size[1]

    sum_x_axis = img_arr.sum(axis=1) / width
    # hei[hei <= 254] = 0  # black
    sum_x_axis[sum_x_axis > 255 * tolerance_pix_number / width] = 1  # white
    sum_x_axis[sum_x_axis != 1] = 0
    sum_x_axis_list = list(sum_x_axis)

    split_index0 = []
    num = 0
    for i, ele in enumerate(sum_x_axis_list):
        num = num % 2
        if ele == num:
            # print(i)
            num = num + 1
            split_index0.append(i)

    split_img0 = images[bbox[0]:bbox[1], bbox[2]:bbox[3]]
    lines_list = save_lines_by_index_without_white_line(save_path, split_img0, split_index0, resize_radio)
    return bbox, lines_list