utils.py 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. # @Author : lightXu
  2. # @File : utils.py
  3. import os
  4. import cv2
  5. import numpy as np
  6. import xml.etree.cElementTree as ET
  7. from PIL import Image
  8. def read_img(img_path):
  9. try:
  10. im = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
  11. except FileNotFoundError as e:
  12. raise e
  13. return im
  14. def write_img(img_to_wwite, save_path):
  15. try:
  16. cv2.imencode('.jpg', img_to_wwite)[1].tofile(save_path)
  17. except FileNotFoundError as e:
  18. raise e
  19. def crop_region_direct(im, bbox):
  20. xmin = bbox[0]
  21. ymin = bbox[1]
  22. xmax = bbox[2]
  23. ymax = bbox[3]
  24. region = im[ymin:ymax, xmin:xmax]
  25. return region
  26. def resize_by_percent(im, percent):
  27. """
  28. :param im:
  29. :param percent:
  30. :return: resize_img
  31. interpolation - 插值方法。共有5种:
  32. 1)INTER_NEAREST - 最近邻插值法
  33. 2)INTER_LINEAR - 双线性插值法(默认)
  34. 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。
  35. 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。
  36. 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
  37. 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
  38. """
  39. height = im.shape[0]
  40. width = im.shape[1]
  41. new_x = int(width * percent)
  42. new_y = int(height * percent)
  43. res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
  44. return res
  45. def resize_by_fixed_size(im, new_x, new_y):
  46. """
  47. :param new_y: y轴像素
  48. :param new_x: x轴像素
  49. :param im:
  50. :return: resize_img
  51. interpolation - 插值方法。共有5种:
  52. 1)INTER_NEAREST - 最近邻插值法
  53. 2)INTER_LINEAR - 双线性插值法(默认)
  54. 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。
  55. 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。
  56. 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
  57. 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
  58. """
  59. res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
  60. return res
  61. def resize_by_radio(im):
  62. """
  63. :param im:
  64. :return: resize_img
  65. interpolation - 插值方法。共有5种:
  66. 1)INTER_NEAREST - 最近邻插值法
  67. 2)INTER_LINEAR - 双线性插值法(默认)
  68. 3)INTER_AREA - 基于局部像素的重采样(resampling using pixel area relation)。
  69. 对于图像抽取(image decimation)来说,这可能是一个更好的方法。但如果是放大图像时,它和最近邻法的效果类似。
  70. 4)INTER_CUBIC - 基于4x4像素邻域的3次插值法
  71. 5)INTER_LANCZOS4 - 基于8x8像素邻域的Lanczos插值
  72. """
  73. # res = cv2.resize(im, (new_x, new_y), interpolation=cv2.INTER_AREA)
  74. longer = 750
  75. shorter = 500
  76. im_shape = im.shape
  77. im_size_min = np.min(im_shape[0:2])
  78. res = im
  79. if im_size_min > 500:
  80. im_size_max = np.max(im_shape[0:2])
  81. im_scale = float(shorter) / float(im_size_min)
  82. # Prevent the biggest axis from being more than MAX_SIZE
  83. if np.round(im_scale * im_size_max) > longer:
  84. im_scale = float(longer) / float(im_size_max)
  85. res = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
  86. interpolation=cv2.INTER_AREA)
  87. return res
  88. def rgb2binary(im):
  89. gray_img = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
  90. _ret, thresh_img = cv2.threshold(gray_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
  91. return thresh_img
  92. def create_xml(obj_name, tree, xmin, ymin, xmax, ymax):
  93. root = tree.getroot()
  94. pobject = ET.SubElement(root, 'object', {})
  95. pname = ET.SubElement(pobject, 'name')
  96. pname.text = obj_name
  97. ppose = ET.SubElement(pobject, 'pose')
  98. ppose.text = 'Unspecified'
  99. ptruncated = ET.SubElement(pobject, 'truncated')
  100. ptruncated.text = '0'
  101. pdifficult = ET.SubElement(pobject, 'difficult')
  102. pdifficult.text = '0'
  103. # add bndbox
  104. pbndbox = ET.SubElement(pobject, 'bndbox')
  105. pxmin = ET.SubElement(pbndbox, 'xmin')
  106. pxmin.text = str(xmin)
  107. pymin = ET.SubElement(pbndbox, 'ymin')
  108. pymin.text = str(ymin)
  109. pxmax = ET.SubElement(pbndbox, 'xmax')
  110. pxmax.text = str(xmax)
  111. pymax = ET.SubElement(pbndbox, 'ymax')
  112. pymax.text = str(ymax)
  113. return tree
  114. def preprocess(img, binary_inv=True):
  115. dilate = 1
  116. blur = 1
  117. if len(img.shape) >= 3:
  118. gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  119. else:
  120. gray_img = img
  121. # # Apply dilation and erosion to remove some noise
  122. if dilate != 0:
  123. kernel = np.ones((dilate, dilate), np.uint8)
  124. img = cv2.dilate(gray_img, kernel, iterations=1)
  125. img = cv2.erode(img, kernel, iterations=1)
  126. # Apply blur to smooth out the edges
  127. if blur != 0:
  128. img = cv2.GaussianBlur(img, (blur, blur), 0)
  129. # Apply threshold to get image with only b&w (binarization)
  130. if binary_inv:
  131. img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
  132. else:
  133. img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
  134. return img
  135. def write_single_img(dst, save_path):
  136. try:
  137. cv2.imencode('.jpg', dst)[1].tofile(save_path)
  138. except FileNotFoundError as e:
  139. raise e
  140. def png2jpg(png_path):
  141. try:
  142. im = Image.open(png_path)
  143. jpg_path = png_path.replace('.png', '.jpg')
  144. bg = Image.new("RGB", im.size, (255, 255, 255))
  145. bg.paste(im, im)
  146. bg.save(jpg_path)
  147. return jpg_path
  148. except Exception as e:
  149. print("PNG转换JPG 错误", e)
  150. def png_read(img_file):
  151. raw_img = Image.open(img_file) # 读取上传的网络图像
  152. channels = raw_img.split()
  153. if len(channels) > 3:
  154. img = Image.merge("RGB", (channels[1], channels[2], channels[3]))
  155. open_cv_image = np.array(img)
  156. else:
  157. img = raw_img
  158. open_cv_image = np.array(img)
  159. return open_cv_image