123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- # @Author : lightXu
- # @File : utils.py
- import os
- import cv2
- import numpy as np
- import xml.etree.cElementTree as ET
- from PIL import Image
- def read_img(img_path):
- try:
- im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
- except FileNotFoundError as e:
- raise e
- return im
- def write_img(img_to_wwite, save_path):
- try:
- cv2.imencode('.jpg', img_to_wwite)[1].tofile(save_path)
- except FileNotFoundError as e:
- raise e
- def crop_region_direct(im, bbox):
- xmin = bbox[0]
- ymin = bbox[1]
- xmax = bbox[2]
- ymax = bbox[3]
- region = im[ymin:ymax, xmin:xmax]
- return region
- def resize_by_percent(im, percent):
- """
- :param im:
- :param percent:
- :return: resize_img
- interpolation - 插值方法。共有5种:
- 1)INTER_NEAREST - 最近邻插值法
- 2)INTER_LINEAR - 双线性插值法(默认)
- 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。
- 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。
- 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
- 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
- """
- height = im.shape[0]
- width = im.shape[1]
- new_x = int(width * percent)
- new_y = int(height * percent)
- res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
- return res
- def resize_by_fixed_size(im, new_x, new_y):
- """
- :param new_y: y轴像素
- :param new_x: x轴像素
- :param im:
- :return: resize_img
- interpolation - 插值方法。共有5种:
- 1)INTER_NEAREST - 最近邻插值法
- 2)INTER_LINEAR - 双线性插值法(默认)
- 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。
- 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。
- 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
- 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
- """
- res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
- return res
- def resize_by_radio(im):
- """
- :param im:
- :return: resize_img
- interpolation - 插值方法。共有5种:
- 1)INTER_NEAREST - 最近邻插值法
- 2)INTER_LINEAR - 双线性插值法(默认)
- 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。
- 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。
- 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
- 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
- """
- # res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
- longer = 750
- shorter = 500
- im_shape = im.shape
- im_size_min = np.min(im_shape[0:2])
- res = im
- if im_size_min > 500:
- im_size_max = np.max(im_shape[0:2])
- im_scale = float(shorter) / float(im_size_min)
- # Prevent the biggest axis from being more than MAX_SIZE
- if np.round(im_scale * im_size_max) > longer:
- im_scale = float(longer) / float(im_size_max)
- res = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
- interpolation=cv2.INTER_AREA)
- return res
- def rgb2binary(im):
- gray_img = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
- _ret, thresh_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
- return thresh_img
- def create_xml(obj_name, tree, xmin, ymin, xmax, ymax):
- root = tree.getroot()
- pobject = ET.SubElement(root, 'object', {})
- pname = ET.SubElement(pobject, 'name')
- pname.text = obj_name
- ppose = ET.SubElement(pobject, 'pose')
- ppose.text = 'Unspecified'
- ptruncated = ET.SubElement(pobject, 'truncated')
- ptruncated.text = '0'
- pdifficult = ET.SubElement(pobject, 'difficult')
- pdifficult.text = '0'
- # add bndbox
- pbndbox = ET.SubElement(pobject, 'bndbox')
- pxmin = ET.SubElement(pbndbox, 'xmin')
- pxmin.text = str(xmin)
- pymin = ET.SubElement(pbndbox, 'ymin')
- pymin.text = str(ymin)
- pxmax = ET.SubElement(pbndbox, 'xmax')
- pxmax.text = str(xmax)
- pymax = ET.SubElement(pbndbox, 'ymax')
- pymax.text = str(ymax)
- return tree
- def preprocess(img, binary_inv=True):
- dilate = 1
- blur = 1
- if len(img.shape) >= 3:
- gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
- else:
- gray_img = img
- # # Apply dilation and erosion to remove some noise
- if dilate != 0:
- kernel = np.ones((dilate, dilate), np.uint8)
- img = cv2.dilate(gray_img, kernel, iterations=1)
- img = cv2.erode(img, kernel, iterations=1)
- # Apply blur to smooth out the edges
- if blur != 0:
- img = cv2.GaussianBlur(img, (blur, blur), 0)
- # Apply threshold to get image with only b&w (binarization)
- if binary_inv:
- img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
- else:
- img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
- return img
- def write_single_img(dst, save_path):
- try:
- cv2.imencode('.jpg', dst)[1].tofile(save_path)
- except FileNotFoundError as e:
- raise e
- def png2jpg(png_path):
- try:
- im = Image.open(png_path)
- jpg_path = png_path.replace('.png', '.jpg')
- bg = Image.new("RGB", im.size, (255, 255, 255))
- bg.paste(im, im)
- bg.save(jpg_path)
- return jpg_path
- except Exception as e:
- print("PNG转换JPG 错误", e)
- def png_read(img_file):
- raw_img = Image.open(img_file) # 读取上传的网络图像
- channels = raw_img.split()
- if len(channels) > 3:
- img = Image.merge("RGB", (channels[1], channels[2], channels[3]))
- open_cv_image = np.array(img)
- else:
- img = raw_img
- open_cv_image = np.array(img)
- return open_cv_image
|