pre_segment.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. # @Author : lightXu
  2. # @File : pre_segment.py
  3. import time
  4. import numpy as np
  5. import cv2
  6. from numpy import asarray
  7. import base64
  8. import scipy.signal
  9. from segment.image_operation import utils
  10. def hough_rotate_cv(image):
  11. """ not Long time consuming, not Strong generalization ability, not high accuracy, more super parameters"""
  12. img_np = utils.resize_by_percent(asarray(image), 1)
  13. if len(img_np.shape) == 3:
  14. img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
  15. canny_image = cv2.Canny(img_np, 0, 255, apertureSize=3)
  16. # cv2.imshow('canny', canny_image)
  17. # cv2.waitKey(10)
  18. lines = cv2.HoughLinesP(canny_image, 1, np.pi / 180, 160, minLineLength=500, maxLineGap=65)
  19. # lines = cv2.HoughLines(canny_image, 1, np.pi / 180, 160, max_theta=30, min_theta=0)
  20. # 寻找长度最长的线
  21. distance = []
  22. for line in lines:
  23. x1, y1, x2, y2 = line[0]
  24. dis = np.sqrt(pow((x2 - x1), 2) + pow((y2 - y1), 2))
  25. distance.append(dis)
  26. max_dis_index = distance.index(max(distance))
  27. max_line = lines[max_dis_index]
  28. x1, y1, x2, y2 = max_line[0]
  29. # 获取旋转角度
  30. angle = cv2.fastAtan2((y2 - y1), (x2 - x1))
  31. print(angle)
  32. if 0.5 <= angle <= 7: # 因为识别误差问题,根据实际情况设置旋转阈值
  33. centerpoint = (image.shape[1] / 2, image.shape[0] / 2)
  34. rotate_mat = cv2.getRotationMatrix2D(centerpoint, angle, 1.0) # 获取旋转矩阵
  35. correct_image = cv2.warpAffine(image, rotate_mat, (image.shape[1], image.shape[0]),
  36. borderValue=(255, 255, 255))
  37. # cv2.imshow('test', resize_by_percent(correct_image, 0.1))
  38. # cv2.waitKey(10)
  39. return correct_image
  40. else:
  41. return image
  42. def array_latter_subtracts_precious(nparray):
  43. array1 = nparray[:-1]
  44. array2 = nparray[1:]
  45. return array2 - array1
  46. def split_by_index(im_raw, index):
  47. y_raw, x_raw, _ = im_raw.shape
  48. img_left = im_raw[1:y_raw, 1:index]
  49. img_right = im_raw[1:y_raw, index + 1:x_raw]
  50. return img_left, img_right
  51. def split_img_at_middle_by_y_axis(img_path, radio=0.10, thresh_std=5000):
  52. im_raw = utils.read_img(img_path)
  53. im_resize = utils.resize_by_percent(im_raw, radio)
  54. ry, rx, _ = im_resize.shape
  55. img_mtx0 = np.asarray(utils.rgb2binary(im_resize))
  56. y_sum_array0 = img_mtx0.sum(axis=0)
  57. tmp = array_latter_subtracts_precious(y_sum_array0 / ry)
  58. std0 = np.std(tmp) # 计算标准差
  59. # # plt.bar(range(len(y_sum_array0)), y_sum_array0)
  60. # # plt.show()
  61. # plt.plot(range(len(y_sum_array0)-1), tmp)
  62. # plt.show()
  63. y, x, _z = im_resize.shape
  64. x_bias = int(x * 0.15)
  65. y_bias = int(y * 0.30)
  66. middle_x = int(x / 2)
  67. middle_area_img = im_resize[y_bias:y, middle_x - x_bias:middle_x + x_bias]
  68. img_mtx = np.asarray(utils.rgb2binary(middle_area_img))
  69. y_sum_array = img_mtx.sum(axis=0)
  70. std = np.std(y_sum_array) # 计算标准差
  71. y_sum_list = list(y_sum_array)
  72. if std <= thresh_std:
  73. index = y_sum_list.index(max(y_sum_list))
  74. else:
  75. index = y_sum_list.index(min(y_sum_list))
  76. split_index = middle_x + index - int(len(y_sum_list) / 2)
  77. split_index = int(split_index / radio)
  78. y_raw, x_raw, _ = im_raw.shape
  79. img_left = im_raw[1:y_raw, 1:split_index]
  80. img_right = im_raw[1:y_raw, split_index + 1:x_raw]
  81. left_path = img_path.replace('.jpg', '_left.jpg')
  82. right_path = img_path.replace('.jpg', '_right.jpg')
  83. cv2.imencode('.jpg', img_left)[1].tofile(left_path)
  84. cv2.imencode('.jpg', img_right)[1].tofile(right_path)
  85. print(left_path)
  86. print(right_path)
  87. def smart_split_img_at_middle_by_x_axis(img_path, resize_radio=0.1):
  88. im_raw = utils.read_img(img_path)
  89. im_resize = utils.resize_by_percent(im_raw, resize_radio)
  90. bin_img = utils.rgb2binary(im_resize)
  91. ry, rx = bin_img.shape
  92. img_mtx0 = np.asarray(bin_img)
  93. y_sum_array0 = img_mtx0.sum(axis=0) # y轴求和
  94. subtracts_arr = np.abs(array_latter_subtracts_precious(y_sum_array0 / ry)) # 长度减1
  95. subtracts_arr_index = np.argsort(subtracts_arr, kind='quicksort', order=None)
  96. subtracts_arr_index = subtracts_arr_index[-10:]
  97. index_middle_distance_list = list(np.abs(subtracts_arr_index - int(rx / 2)))
  98. split_index = subtracts_arr_index[index_middle_distance_list.index(min(index_middle_distance_list))] + 1
  99. split_index = int(split_index / resize_radio)
  100. img_left, img_right = split_by_index(im_raw, split_index)
  101. left_path = img_path.replace('.jpg', '_left.jpg')
  102. right_path = img_path.replace('.jpg', '_right.jpg')
  103. cv2.imencode('.jpg', img_left)[1].tofile(left_path)
  104. cv2.imencode('.jpg', img_right)[1].tofile(right_path)
  105. print(left_path)
  106. print(right_path)
  107. def segment2parts_by_pix(crop_img):
  108. p_image = utils.preprocess(crop_img)
  109. height, width = p_image.shape
  110. sum_x_axis = p_image.sum(axis=0) / (height*255)
  111. # sum_x_axis = (sum_x_axis / (255*height)).astype(float)
  112. kernel = np.array([-2, 0, 2])
  113. sobel_filter = scipy.signal.convolve(sum_x_axis, kernel) # 一维卷积运算
  114. temp = np.abs(sobel_filter[1:-1])/np.max(np.abs(sobel_filter[1:-1]))
  115. temp[temp < 0.6] = 0
  116. temp[temp != 0] = 1
  117. index = np.where(temp == 1)[0]
  118. width1 = width // 9
  119. intervals = [(0, width1), (4 * width1, 5 * width1), (8 * width1, width)] # 左开右闭
  120. index_list = []
  121. for i, interval in enumerate(intervals):
  122. index_sec_list = []
  123. for ele in index:
  124. if interval[0] < ele <= interval[1]:
  125. index_sec_list.append(ele)
  126. index_list.append(index_sec_list)
  127. left_x_point, middle_x_point, right_x_point = 9999, 9999, 9999
  128. left_del_part = (0, left_x_point)
  129. middle_part = (left_x_point, middle_x_point)
  130. right_part = (middle_x_point, right_x_point)
  131. right_del_part = (right_x_point, width)
  132. # left
  133. if index_list[0]:
  134. left_x_point = index_list[0][-1]
  135. left_del_part = (0, left_x_point)
  136. # middle
  137. if index_list[1]:
  138. value_list = [abs(sobel_filter[index]) for index in index_list[1]]
  139. middle_x_point = index_list[1][value_list.index(max(value_list))]
  140. middle_part = (left_x_point, middle_x_point)
  141. # right
  142. if index_list[2]:
  143. right_x_point = index_list[2][0]
  144. right_part = (middle_x_point, right_x_point)
  145. right_del_part = (right_x_point, width)
  146. split_point = sorted(list(set(sorted(list(left_del_part + middle_part + right_part + right_del_part))) - {9999}))
  147. split_pairs = []
  148. if len(split_point) > 2:
  149. a = split_point[:-1]
  150. b = split_point[1:]
  151. for i, ele in enumerate(a):
  152. if b[i] - ele > width1:
  153. split_pairs.append((ele, b[i]))
  154. return split_pairs
  155. def segment2parts(im_raw, save_path):
  156. img_parts_dict_list = []
  157. # randon_img = radon_rotate_ski(im_raw)
  158. # 试卷顶部可能有黑边,切去3%
  159. yy, xx = im_raw.shape[0], im_raw.shape[1]
  160. y_crop_pix = int(yy*0.03)
  161. # x_crop_pix = int(xx*0.03)
  162. x_crop_pix = 0
  163. im_crop = im_raw[y_crop_pix:yy-y_crop_pix, x_crop_pix:xx-x_crop_pix]
  164. split_pairs = segment2parts_by_pix(im_crop)
  165. if len(split_pairs) >= 2:
  166. for index, ele in enumerate(split_pairs):
  167. dst = im_raw[:, ele[0]:ele[1]]
  168. save_path_final = save_path.replace('.jpg', '') + '_{}_{}_{}.jpg'.format(ele[0], 0, index)
  169. cv2.imencode('.jpg', dst)[1].tofile(save_path_final)
  170. image = cv2.imencode('.jpg', dst)[1]
  171. base64_data = str(base64.b64encode(image))[2:-1]
  172. part_dict = {'img_part': base64_data,
  173. 'x_bias': ele[0] + x_crop_pix,
  174. 'y_bias': 0}
  175. img_parts_dict_list.append(part_dict)
  176. else:
  177. img = im_crop[:, split_pairs[0][0]:split_pairs[0][1]]
  178. resize_ratio = 0.3
  179. im_resize = utils.resize_by_percent(img, resize_ratio)
  180. # gray
  181. if len(im_resize.shape) >= 3:
  182. gray_img = cv2.cvtColor(im_resize, cv2.COLOR_BGR2GRAY)
  183. else:
  184. gray_img = im_resize
  185. ry, rx = gray_img.shape
  186. # 高斯
  187. glur_img = cv2.GaussianBlur(gray_img, (5, 5), 0)
  188. # otsu
  189. _ret, threshed_img = cv2.threshold(glur_img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
  190. if ry < rx:
  191. x_kernel = int(10*resize_ratio)
  192. else:
  193. x_kernel = int(10 * resize_ratio)
  194. kernel = np.ones((glur_img.shape[0], x_kernel), np.uint8) # height, width
  195. dilation = cv2.dilate(threshed_img, kernel, iterations=1)
  196. # cv2.imshow(' ', dilation)
  197. # if cv2.waitKey(0) == 27:
  198. # cv2.destroyAllWindows()
  199. # _, cnts, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  200. (major, minor, _) = cv2.__version__.split(".")
  201. contours = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  202. cnts = contours[0] if int(major) > 3 else contours[1]
  203. box_list = [cv2.boundingRect(cnt) for cnt in cnts]
  204. box_array = np.asarray(box_list)
  205. box_array[:, 2] = box_array[:, 0] + box_array[:, 2]
  206. box_array[:, 3] = box_array[:, 1] + box_array[:, 3]
  207. middle_x = rx // 2
  208. left_box = np.asarray([0, 0, 0, 0])
  209. right_box = np.asarray([0, 0, 0, 0])
  210. for box in box_array:
  211. x, y, xmax, ymax = box
  212. if x + (xmax-x)//2 <= middle_x:
  213. left_box = np.vstack([left_box, box])
  214. else:
  215. right_box = np.vstack([right_box, box])
  216. left_box_list = []
  217. right_box_list = []
  218. try:
  219. left_box_list = left_box[1:, :][:, :2].min(axis=0).tolist() + left_box[1:, :][:, 2:].max(axis=0).tolist()
  220. except Exception:
  221. pass # 单面的情况
  222. try:
  223. right_box_list = right_box[1:, :][:, :2].min(axis=0).tolist() + right_box[1:, :][:, 2:].max(axis=0).tolist()
  224. except Exception:
  225. pass
  226. box_list = [left_box_list, right_box_list]
  227. bias = int(70 * resize_ratio)
  228. for index, box in enumerate(box_list):
  229. if len(box) > 0:
  230. xmin, ymin, xmax, ymax = box
  231. if xmin - bias > 0:
  232. xmin = xmin - bias
  233. else:
  234. xmin = 0
  235. dst = im_crop[int(ymin / resize_ratio):int(ymax / resize_ratio),
  236. int(xmin / resize_ratio):int(xmax / resize_ratio)]
  237. save_path_final = save_path.replace('.jpg', '') + '_{}_{}_{}.jpg'.format(xmin, ymin, index)
  238. cv2.imencode('.jpg', dst)[1].tofile(save_path_final)
  239. image = cv2.imencode('.jpg', dst)[1]
  240. base64_data = str(base64.b64encode(image))[2:-1]
  241. part_dict = {'img_part': base64_data,
  242. 'x_bias': int(xmin/resize_ratio) + x_crop_pix + split_pairs[0][0],
  243. 'y_bias': int(ymin/resize_ratio) + y_crop_pix + 0}
  244. if (xmax - xmin)/resize_ratio > 100: # 去掉竖长条
  245. img_parts_dict_list.append(part_dict)
  246. return img_parts_dict_list