# @Author : lightXu # @File : utils.py import os import cv2 import numpy as np import xml.etree.cElementTree as ET from PIL import Image def read_img(img_path): try: im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1) except FileNotFoundError as e: raise e return im def write_img(img_to_wwite, save_path): try: cv2.imencode('.jpg', img_to_wwite)[1].tofile(save_path) except FileNotFoundError as e: raise e def crop_region_direct(im, bbox): xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] region = im[ymin:ymax, xmin:xmax] return region def resize_by_percent(im, percent): """ :param im: :param percent: :return: resize_img interpolation - 插值方法。共有5种: 1)INTER_NEAREST - 最近邻插值法 2)INTER_LINEAR - 双线性插值法(默认) 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值 """ height = im.shape[0] width = im.shape[1] new_x = int(width * percent) new_y = int(height * percent) res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA) return res def resize_by_fixed_size(im, new_x, new_y): """ :param new_y: y轴像素 :param new_x: x轴像素 :param im: :return: resize_img interpolation - 插值方法。共有5种: 1)INTER_NEAREST - 最近邻插值法 2)INTER_LINEAR - 双线性插值法(默认) 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值 """ res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA) return res def resize_by_radio(im): """ :param im: :return: resize_img interpolation - 插值方法。共有5种: 1)INTER_NEAREST - 最近邻插值法 2)INTER_LINEAR - 双线性插值法(默认) 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值 """ # res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA) longer = 750 shorter = 500 im_shape = im.shape im_size_min = np.min(im_shape[0:2]) res = im if im_size_min > 500: im_size_max = np.max(im_shape[0:2]) im_scale = float(shorter) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > longer: im_scale = float(longer) / float(im_size_max) res = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_AREA) return res def rgb2binary(im): gray_img = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) _ret, thresh_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) return thresh_img def create_xml(obj_name, tree, xmin, ymin, xmax, ymax): root = tree.getroot() pobject = ET.SubElement(root, 'object', {}) pname = ET.SubElement(pobject, 'name') pname.text = obj_name ppose = ET.SubElement(pobject, 'pose') ppose.text = 'Unspecified' ptruncated = ET.SubElement(pobject, 'truncated') ptruncated.text = '0' pdifficult = ET.SubElement(pobject, 'difficult') pdifficult.text = '0' # add bndbox pbndbox = ET.SubElement(pobject, 'bndbox') pxmin = ET.SubElement(pbndbox, 'xmin') pxmin.text = str(xmin) pymin = ET.SubElement(pbndbox, 'ymin') pymin.text = str(ymin) pxmax = ET.SubElement(pbndbox, 'xmax') pxmax.text = str(xmax) pymax = ET.SubElement(pbndbox, 'ymax') pymax.text = str(ymax) return tree def preprocess(img, binary_inv=True): dilate = 1 blur = 1 if len(img.shape) >= 3: gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) else: gray_img = img # # Apply dilation and erosion to remove some noise if dilate != 0: kernel = np.ones((dilate, dilate), np.uint8) img = cv2.dilate(gray_img, kernel, iterations=1) img = cv2.erode(img, kernel, iterations=1) # Apply blur to smooth out the edges if blur != 0: img = cv2.GaussianBlur(img, (blur, blur), 0) # Apply threshold to get image with only b&w (binarization) if binary_inv: img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] else: img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] return img def write_single_img(dst, save_path): try: cv2.imencode('.jpg', dst)[1].tofile(save_path) except FileNotFoundError as e: raise e def png2jpg(png_path): try: im = Image.open(png_path) jpg_path = png_path.replace('.png', '.jpg') bg = Image.new("RGB", im.size, (255, 255, 255)) bg.paste(im, im) bg.save(jpg_path) return jpg_path except Exception as e: print("PNG转换JPG 错误", e) def png_read(img_file): raw_img = Image.open(img_file) # 读取上传的网络图像 channels = raw_img.split() if len(channels) > 3: img = Image.merge("RGB", (channels[1], channels[2], channels[3])) open_cv_image = np.array(img) else: img = raw_img open_cv_image = np.array(img) return open_cv_image