123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132 |
- import cv2
- import math
- import pandas as pd
- import numpy as np
- import os
- from TextModel import text_model
- def get_range(bboxs):
- ranges = []
- for i in bboxs:
- xs = i[::2]
- ys = i[1::2]
- box = [min(xs), min(ys), max(xs), max(ys)]
- ranges.append(box)
- return ranges
- def check_range(pixel_points, ranges):
- pixel_point = pixel_points[0]
- for i in ranges:
- if i[2] >= pixel_point[0] >= i[0] and i[3] >= pixel_point[1] >= i[1]:
- return 1
- return 0
- def is_text(bboxs, contours):
- ranges = get_range(bboxs)
- illustration_box = []
- for region in contours:
- contain = [check_range(pixel_points, ranges) for pixel_points in region]
- contain = sum(contain) / len(contain)
- if contain < 0.2:
- pixel = [math.inf, math.inf, -1, -1]
- for i in region:
- if i[0][0] < pixel[0]:
- pixel[0] = i[0][0]
- if i[0][0] > pixel[2]:
- pixel[2] = i[0][0]
- if i[0][1] < pixel[1]:
- pixel[1] = i[0][1]
- if i[0][1] > pixel[3]:
- pixel[3] = i[0][1]
- x_min, y_min, x_max, y_max = pixel
- if (x_max - x_min) * (y_max - y_min) > 30:
- illustration_box.append(pixel)
- # pts = np.array([[x_min,y_min], [x_max,y_min], [x_max, y_max], [x_min, y_max]], np.int32)
- # # 顶点个数:4,矩阵变成4*1*2维
- # # OpenCV中需要将多边形的顶点坐标变成顶点数×1×2维的矩阵
- # # 这里 reshape 的第一个参数为-1, 表示“任意”,意思是这一维的值是根据后面的维度的计算出来的
- # pts = pts.reshape((-1, 1, 2))
- # cv2.polylines(img, [pts], True, (0, 0, 255)) #画轮廓图
- return illustration_box
- def processed(text_box, i_box):
- i_box = np.array(i_box)
- np.sort(i_box, axis=0)
- np.sort(i_box, axis=1)
- i_box = i_box[::-1][1:]
- x_min, y_min, x_max, y_max = 0, 1, 2, 3
- tmp_bbx = []
- for i in range(len(i_box)):
- for j in range(len(i_box)):
- if i_box[i][x_min] > i_box[j][x_min] and i_box[i][y_min] > i_box[j][y_min] \
- and i_box[i][x_max] < i_box[j][x_max] and i_box[i][y_max] < i_box[j][y_max]:
- tmp_bbx.append(i)
- inx = [i for i in range(len(i_box)) if i not in tmp_bbx]
- i_box_c = [i_box[i] for i in inx]
- return i_box_c
- def run_cut(path):
- if os.path.exists(r'D:\试卷切割\result\image'):
- os.system(r'rd /s/q D:\试卷切割\result\image')
- if os.path.exists(r'D:\试卷切割\result\text_img'):
- os.system(r'rd /s/q D:\试卷切割\result\text_img')
- if not os.path.exists(r'D:\试卷切割\result\image'):
- os.makedirs(r'D:\试卷切割\result\image')
- if not os.path.exists(r'D:\试卷切割\result\text_img'):
- os.makedirs(r'D:\试卷切割\result\text_img')
- text_bbx = text_model(path)
- img = cv2.imread(path)
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- ret, binary = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)
- contours, hierarchy = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_TC89_L1)
- i_box = is_text(text_bbx, contours)
- i_box = processed(1, i_box)
- for pixel in i_box:
- x_min, y_min, x_max, y_max = pixel
- if (x_max - x_min) * (y_max - y_min) > 30:
- pts = np.array([[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]], np.int32)
- # 顶点个数:4,矩阵变成4*1*2维
- # OpenCV中需要将多边形的顶点坐标变成顶点数×1×2维的矩阵
- # 这里 reshape 的第一个参数为-1, 表示“任意”,意思是这一维的值是根据后面的维度的计算出来的
- # pts = pts.reshape((-1, 1, 2))
- # cv2.polylines(img, [pts], True, (0, 0, 255)) #画文字图
- cv2.imwrite('./result/image/%d-%d-%d-%d.png' % (y_min, y_max, x_min, x_max), img[y_min:y_max, x_min:x_max])
- for pixel in get_range(text_bbx):
- x_min, y_min, x_max, y_max = pixel
- pts = np.array([[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]], np.int32)
- # 顶点个数:4,矩阵变成4*1*2维
- # OpenCV中需要将多边形的顶点坐标变成顶点数×1×2维的矩阵
- # 这里 reshape 的第一个参数为-1, 表示“任意”,意思是这一维的值是根据后面的维度的计算出来的
- # pts = pts.reshape((-1, 1, 2))
- # cv2.polylines(img, [pts], True, (0, 255, 0)) #画插图
- cv2.imwrite('./result/text_img/%d-%d-%d-%d.png' % (y_min, y_max, x_min, x_max), img[y_min:y_max, x_min:x_max])
- # contours 轮廓所有的点
- # cv2.drawContours(img, contours, -1, (0, 0, 255), 3)
- # cv2.imshow("img", img)
- # cv2.waitKey(0)
- # cv2.imwrite('bbb.png', img)
- if __name__ == '__main__':
- run_cut('./img/5.png')
|