123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- # -*- coding:utf-8 -*-
- import math
- import numpy as np
- # from neighbor import neighbor_change
- from Near import neighbor_change
- import numba as nb
- def get_range(bboxs):
- # 坐标转化
- ranges = []
- for i in bboxs:
- xs = i[::2]
- ys = i[1::2]
- box = [min(xs), min(ys), max(xs), max(ys)]
- ranges.append(box)
- return ranges
- # @nb.jit
- def check_range(pixel_points, ranges):
- """
- :param pixel_points: 一团联通坐标
- :param ranges: 所有textbox坐标
- :return:
- """
- pixel_point = pixel_points[0]
- for i in ranges:
- if i[2] >= pixel_point[0] >= i[0] and i[3] >= pixel_point[1] >= i[1]:
- return 1
- return 0
- def is_text(bboxs, contours):
- '''
- doc
- 判断一个实物点在不在textbox中
- :param bboxs: text的区域
- :param contours: 所有区域,[array[一个x坐标,一个y坐标]]
- :return: 插图区域
- '''
- ranges = bboxs
- illustration_box = []
- for region in contours:
- contain = [check_range(pixel_points, ranges) for pixel_points in region]
- contain = sum(contain) / len(contain)
- if contain < 0.2: # 交并比 iou
- # 坐标结构化
- pixel = [math.inf, math.inf, -1, -1]
- for i in region:
- if i[0][0] < pixel[0]:
- pixel[0] = i[0][0]
- if i[0][0] > pixel[2]:
- pixel[2] = i[0][0]
- if i[0][1] < pixel[1]:
- pixel[1] = i[0][1]
- if i[0][1] > pixel[3]:
- pixel[3] = i[0][1]
- x_min, y_min, x_max, y_max = pixel
- if (x_max - x_min) * (y_max - y_min) > 30: # 30是一个参数 像素点超过30
- illustration_box.append(pixel)
- return illustration_box
- def processed(text_boxes, i_box):
- # 去除大图片中的小图片
- x_min, y_min, x_max, y_max = 0, 1, 2, 3 # index
- if len(i_box):
- W = i_box[-1][2] - i_box[-1][0] * 0.9
- i_box = [i for i in i_box if i[2] - i[0] < W]
- tmp_bbx = []
- for i in range(len(i_box)):
- for j in range(len(i_box)):
- if i_box[i][x_min] > i_box[j][x_min] and i_box[i][y_min] > i_box[j][y_min] \
- and i_box[i][x_max] < i_box[j][x_max] and i_box[i][y_max] < i_box[j][y_max]:
- tmp_bbx.append(i)
- inx = [i for i in range(len(i_box)) if i not in tmp_bbx]
- i_box_c = [i_box[i] for i in inx]
- text_boxes, i_box_c = neighbor_change(text_boxes, i_box_c)
- return i_box_c, text_boxes
|