meimeiking
/
myOCR


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
							import cv2
import math
import pandas as pd
import numpy as np
import os
from TextModel import text_model


def get_range(bboxs):
    ranges = []
    for i in bboxs:
        xs = i[::2]
        ys = i[1::2]
        box = [min(xs), min(ys), max(xs), max(ys)]
        ranges.append(box)
    return ranges


def check_range(pixel_points, ranges):
    pixel_point = pixel_points[0]
    for i in ranges:
        if i[2] >= pixel_point[0] >= i[0] and i[3] >= pixel_point[1] >= i[1]:
            return 1
    return 0


def is_text(bboxs, contours):
    ranges = get_range(bboxs)
    illustration_box = []
    for region in contours:
        contain = [check_range(pixel_points, ranges) for pixel_points in region]
        contain = sum(contain) / len(contain)
        if contain < 0.2:
            pixel = [math.inf, math.inf, -1, -1]
            for i in region:
                if i[0][0] < pixel[0]:
                    pixel[0] = i[0][0]
                if i[0][0] > pixel[2]:
                    pixel[2] = i[0][0]
                if i[0][1] < pixel[1]:
                    pixel[1] = i[0][1]
                if i[0][1] > pixel[3]:
                    pixel[3] = i[0][1]

            x_min, y_min, x_max, y_max = pixel
            if (x_max - x_min) * (y_max - y_min) > 30:
                illustration_box.append(pixel)
                # pts = np.array([[x_min,y_min], [x_max,y_min], [x_max, y_max], [x_min, y_max]], np.int32)
                # # 顶点个数：4，矩阵变成4*1*2维
                # # OpenCV中需要将多边形的顶点坐标变成顶点数×1×2维的矩阵
                # # 这里 reshape 的第一个参数为-1, 表示“任意”，意思是这一维的值是根据后面的维度的计算出来的
                # pts = pts.reshape((-1, 1, 2))
                # cv2.polylines(img, [pts], True, (0, 0, 255)) #画轮廓图
    return illustration_box


def processed(text_box, i_box):
    i_box = np.array(i_box)
    np.sort(i_box, axis=0)
    np.sort(i_box, axis=1)
    i_box = i_box[::-1][1:]

    x_min, y_min, x_max, y_max = 0, 1, 2, 3
    tmp_bbx = []
    for i in range(len(i_box)):
        for j in range(len(i_box)):
            if i_box[i][x_min] > i_box[j][x_min] and i_box[i][y_min] > i_box[j][y_min] \
                    and i_box[i][x_max] < i_box[j][x_max] and i_box[i][y_max] < i_box[j][y_max]:
                tmp_bbx.append(i)

    inx = [i for i in range(len(i_box)) if i not in tmp_bbx]

    i_box_c = [i_box[i] for i in inx]

    return i_box_c


def run_cut(path):
    if os.path.exists(r'D:\试卷切割\result\image'):
        os.system(r'rd /s/q D:\试卷切割\result\image')
    if os.path.exists(r'D:\试卷切割\result\text_img'):
        os.system(r'rd /s/q D:\试卷切割\result\text_img')

    if not os.path.exists(r'D:\试卷切割\result\image'):
        os.makedirs(r'D:\试卷切割\result\image')
    if not os.path.exists(r'D:\试卷切割\result\text_img'):
        os.makedirs(r'D:\试卷切割\result\text_img')
    text_bbx = text_model(path)
    img = cv2.imread(path)

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret, binary = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)

    contours, hierarchy = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_TC89_L1)

    i_box = is_text(text_bbx, contours)
    i_box = processed(1, i_box)
    for pixel in i_box:
        x_min, y_min, x_max, y_max = pixel
        if (x_max - x_min) * (y_max - y_min) > 30:
            pts = np.array([[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]], np.int32)
            # 顶点个数：4，矩阵变成4*1*2维
            # OpenCV中需要将多边形的顶点坐标变成顶点数×1×2维的矩阵
            # 这里 reshape 的第一个参数为-1, 表示“任意”，意思是这一维的值是根据后面的维度的计算出来的
            # pts = pts.reshape((-1, 1, 2))
            # cv2.polylines(img, [pts], True, (0, 0, 255)) #画文字图

            cv2.imwrite('./result/image/%d-%d-%d-%d.png' % (y_min, y_max, x_min, x_max), img[y_min:y_max, x_min:x_max])

    for pixel in get_range(text_bbx):
        x_min, y_min, x_max, y_max = pixel

        pts = np.array([[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]], np.int32)
        # 顶点个数：4，矩阵变成4*1*2维
        # OpenCV中需要将多边形的顶点坐标变成顶点数×1×2维的矩阵
        # 这里 reshape 的第一个参数为-1, 表示“任意”，意思是这一维的值是根据后面的维度的计算出来的
        # pts = pts.reshape((-1, 1, 2))
        # cv2.polylines(img, [pts], True, (0, 255, 0)) #画插图

        cv2.imwrite('./result/text_img/%d-%d-%d-%d.png' % (y_min, y_max, x_min, x_max), img[y_min:y_max, x_min:x_max])

    # contours 轮廓所有的点
    # cv2.drawContours(img, contours, -1, (0, 0, 255), 3)

    # cv2.imshow("img", img)
    # cv2.waitKey(0)

    # cv2.imwrite('bbb.png', img)


if __name__ == '__main__':
    run_cut('./img/5.png')