split_lines.py 3.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. # @Author : lightXu
  2. # @File : split_lines.py
  3. import os
  4. import cv2
  5. import numpy as np
  6. from segment.image_operation import utils
  7. from django.conf import settings
  8. def find_contours(resized_img, ex_x, ex_y):
  9. threshed = utils.rgb2binary(resized_img)
  10. kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (ex_x, ex_y)) # 膨胀系数
  11. # morphed = cv2.morphologyEx(threshed, cv2.MORPH_CLOSE, kernel)
  12. morphed = cv2.dilate(threshed, kernel, iterations=1)
  13. _, cnts, hierarchy = cv2.findContours(morphed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  14. cnt = sorted(cnts, key=cv2.contourArea)[-1]
  15. x, y, w, h = cv2.boundingRect(cnt)
  16. x = x + int(ex_x * 0.5)
  17. w = w - int(ex_x * 0.5)
  18. dst = threshed[y:y + h, x:x + w]
  19. return dst, (y, y + h, x, x + w), cnts
  20. def save_lines_by_index_without_white_line(path, split_img, split_index, resize_radio):
  21. img_y = split_img.shape[0]
  22. img_x = split_img.shape[1]
  23. lines_list = []
  24. for i in range(1, len(split_index)):
  25. if i % 2 != 1:
  26. start0 = int((split_index[i - 1] - 2) / resize_radio) # 0,1间隔, 交替相减a2-a1, 每行上下的白多一点
  27. end0 = int((split_index[i] - 1 + 2) / resize_radio) # 前一个索引
  28. start = start0 if (start0 >= 0) else 0
  29. end = end0 if (end0 <= img_y) else img_y
  30. line = split_img[start:end, 1:img_x]
  31. if len(line) < 1:
  32. continue
  33. _, _, cnts = find_contours(line, 500, 70) # x轴膨胀,去掉每行的白色, 第二个参数按行膨胀,第三个参数按列膨胀
  34. for cnt_id, cnt in enumerate(reversed(cnts)):
  35. x, y, w, h = cv2.boundingRect(cnt)
  36. # print(x, y, w, h)
  37. if w * h > 100:
  38. cj_out = line[y:y + h, x:x + w]
  39. # line_list.append(cj_out)
  40. save_path = os.path.join(path,
  41. '{:04d}_{:04d}_{:04d}_{:04d}_{}.jpg'.format(start, end, x, x+w, cnt_id))
  42. cv2.imencode('.jpg', cj_out)[1].tofile(save_path)
  43. # print(save_path)
  44. filename = os.path.abspath(save_path)
  45. lines_list.append(filename)
  46. return lines_list
  47. def line_split(path, save_path, tolerance_pix_number):
  48. resize_radio = settings.RESIZE_RADIO
  49. images = utils.read_img(path)
  50. # raw_y = images.shape[0]
  51. # raw_x = images.shape[1]
  52. # images = images[:raw_y, int(raw_x * 0.05):raw_x - int(raw_x * 0.05)]
  53. resize_img = utils.resize_by_percent(images, resize_radio)
  54. resize_crop_imgs, max_bbox, _ = find_contours(resize_img, 10, 200) # y轴膨胀,整体去掉白色,去掉扫描后图像边界的黑色线条
  55. bbox = [int(ele / resize_radio) for ele in max_bbox]
  56. img_arr = np.asarray(resize_crop_imgs)
  57. img_size = img_arr.shape
  58. width = img_size[1]
  59. sum_x_axis = img_arr.sum(axis=1) / width
  60. # hei[hei <= 254] = 0 # black
  61. sum_x_axis[sum_x_axis > 255 * tolerance_pix_number / width] = 1 # white
  62. sum_x_axis[sum_x_axis != 1] = 0
  63. sum_x_axis_list = list(sum_x_axis)
  64. split_index0 = []
  65. num = 0
  66. for i, ele in enumerate(sum_x_axis_list):
  67. num = num % 2
  68. if ele == num:
  69. # print(i)
  70. num = num + 1
  71. split_index0.append(i)
  72. split_img0 = images[bbox[0]:bbox[1], bbox[2]:bbox[3]]
  73. lines_list = save_lines_by_index_without_white_line(save_path, split_img0, split_index0, resize_radio)
  74. return bbox, lines_list