craft_utils.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. """
  2. Copyright (c) 2019-present NAVER Corp.
  3. MIT License
  4. """
  5. # -*- coding: utf-8 -*-
  6. import numpy as np
  7. import cv2
  8. import math
  9. """ auxilary functions """
  10. # unwarp corodinates
  11. def warpCoord(Minv, pt):
  12. out = np.matmul(Minv, (pt[0], pt[1], 1))
  13. return np.array([out[0] / out[2], out[1] / out[2]])
  14. """ end of auxilary functions """
  15. def getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text):
  16. # prepare data
  17. linkmap = linkmap.copy()
  18. textmap = textmap.copy()
  19. img_h, img_w = textmap.shape
  20. """ labeling method """
  21. ret, text_score = cv2.threshold(textmap, low_text, 1, 0)
  22. ret, link_score = cv2.threshold(linkmap, link_threshold, 1, 0)
  23. text_score_comb = np.clip(text_score + link_score, 0, 1)
  24. nLabels, labels, stats, centroids = cv2.connectedComponentsWithStats(text_score_comb.astype(np.uint8),
  25. connectivity=4)
  26. det = []
  27. mapper = []
  28. for k in range(1, nLabels):
  29. # size filtering
  30. size = stats[k, cv2.CC_STAT_AREA]
  31. if size < 10: continue
  32. # thresholding
  33. if np.max(textmap[labels == k]) < text_threshold: continue
  34. # make segmentation map
  35. segmap = np.zeros(textmap.shape, dtype=np.uint8)
  36. segmap[labels == k] = 255
  37. segmap[np.logical_and(link_score == 1, text_score == 0)] = 0 # remove link area
  38. x, y = stats[k, cv2.CC_STAT_LEFT], stats[k, cv2.CC_STAT_TOP]
  39. w, h = stats[k, cv2.CC_STAT_WIDTH], stats[k, cv2.CC_STAT_HEIGHT]
  40. niter = int(math.sqrt(size * min(w, h) / (w * h)) * 2)
  41. sx, ex, sy, ey = x - niter, x + w + niter + 1, y - niter, y + h + niter + 1
  42. # boundary check
  43. if sx < 0: sx = 0
  44. if sy < 0: sy = 0
  45. if ex >= img_w: ex = img_w
  46. if ey >= img_h: ey = img_h
  47. kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1 + niter, 1 + niter))
  48. segmap[sy:ey, sx:ex] = cv2.dilate(segmap[sy:ey, sx:ex], kernel)
  49. # make box
  50. np_contours = np.roll(np.array(np.where(segmap != 0)), 1, axis=0).transpose().reshape(-1, 2)
  51. rectangle = cv2.minAreaRect(np_contours)
  52. box = cv2.boxPoints(rectangle)
  53. # align diamond-shape
  54. w, h = np.linalg.norm(box[0] - box[1]), np.linalg.norm(box[1] - box[2])
  55. box_ratio = max(w, h) / (min(w, h) + 1e-5)
  56. if abs(1 - box_ratio) <= 0.1:
  57. l, r = min(np_contours[:, 0]), max(np_contours[:, 0])
  58. t, b = min(np_contours[:, 1]), max(np_contours[:, 1])
  59. box = np.array([[l, t], [r, t], [r, b], [l, b]], dtype=np.float32)
  60. # make clock-wise order
  61. startidx = box.sum(axis=1).argmin()
  62. box = np.roll(box, 4 - startidx, 0)
  63. box = np.array(box)
  64. det.append(box)
  65. mapper.append(k)
  66. return det, labels, mapper
  67. def getPoly_core(boxes, labels, mapper, linkmap):
  68. # configs
  69. num_cp = 5
  70. max_len_ratio = 0.7
  71. expand_ratio = 1.45
  72. max_r = 2.0
  73. step_r = 0.2
  74. polys = []
  75. for k, box in enumerate(boxes):
  76. # size filter for small instance
  77. w, h = int(np.linalg.norm(box[0] - box[1]) + 1), int(np.linalg.norm(box[1] - box[2]) + 1)
  78. if w < 10 or h < 10:
  79. polys.append(None);
  80. continue
  81. # warp image
  82. tar = np.float32([[0, 0], [w, 0], [w, h], [0, h]])
  83. M = cv2.getPerspectiveTransform(box, tar)
  84. word_label = cv2.warpPerspective(labels, M, (w, h), flags=cv2.INTER_NEAREST)
  85. try:
  86. Minv = np.linalg.inv(M)
  87. except:
  88. polys.append(None);
  89. continue
  90. # binarization for selected label
  91. cur_label = mapper[k]
  92. word_label[word_label != cur_label] = 0
  93. word_label[word_label > 0] = 1
  94. """ Polygon generation """
  95. # find top/bottom contours
  96. cp = []
  97. max_len = -1
  98. for i in range(w):
  99. region = np.where(word_label[:, i] != 0)[0]
  100. if len(region) < 2: continue
  101. cp.append((i, region[0], region[-1]))
  102. length = region[-1] - region[0] + 1
  103. if length > max_len: max_len = length
  104. # pass if max_len is similar to h
  105. if h * max_len_ratio < max_len:
  106. polys.append(None);
  107. continue
  108. # get pivot points with fixed length
  109. tot_seg = num_cp * 2 + 1
  110. seg_w = w / tot_seg # segment width
  111. pp = [None] * num_cp # init pivot points
  112. cp_section = [[0, 0]] * tot_seg
  113. seg_height = [0] * num_cp
  114. seg_num = 0
  115. num_sec = 0
  116. prev_h = -1
  117. for i in range(0, len(cp)):
  118. (x, sy, ey) = cp[i]
  119. if (seg_num + 1) * seg_w <= x and seg_num <= tot_seg:
  120. # average previous segment
  121. if num_sec == 0: break
  122. cp_section[seg_num] = [cp_section[seg_num][0] / num_sec, cp_section[seg_num][1] / num_sec]
  123. num_sec = 0
  124. # reset variables
  125. seg_num += 1
  126. prev_h = -1
  127. # accumulate center points
  128. cy = (sy + ey) * 0.5
  129. cur_h = ey - sy + 1
  130. cp_section[seg_num] = [cp_section[seg_num][0] + x, cp_section[seg_num][1] + cy]
  131. num_sec += 1
  132. if seg_num % 2 == 0: continue # No polygon area
  133. if prev_h < cur_h:
  134. pp[int((seg_num - 1) / 2)] = (x, cy)
  135. seg_height[int((seg_num - 1) / 2)] = cur_h
  136. prev_h = cur_h
  137. # processing last segment
  138. if num_sec != 0:
  139. cp_section[-1] = [cp_section[-1][0] / num_sec, cp_section[-1][1] / num_sec]
  140. # pass if num of pivots is not sufficient or segment widh is smaller than character height
  141. if None in pp or seg_w < np.max(seg_height) * 0.25:
  142. polys.append(None);
  143. continue
  144. # calc median maximum of pivot points
  145. half_char_h = np.median(seg_height) * expand_ratio / 2
  146. # calc gradiant and apply to make horizontal pivots
  147. new_pp = []
  148. for i, (x, cy) in enumerate(pp):
  149. dx = cp_section[i * 2 + 2][0] - cp_section[i * 2][0]
  150. dy = cp_section[i * 2 + 2][1] - cp_section[i * 2][1]
  151. if dx == 0: # gradient if zero
  152. new_pp.append([x, cy - half_char_h, x, cy + half_char_h])
  153. continue
  154. rad = - math.atan2(dy, dx)
  155. c, s = half_char_h * math.cos(rad), half_char_h * math.sin(rad)
  156. new_pp.append([x - s, cy - c, x + s, cy + c])
  157. # get edge points to cover character heatmaps
  158. isSppFound, isEppFound = False, False
  159. grad_s = (pp[1][1] - pp[0][1]) / (pp[1][0] - pp[0][0]) + (pp[2][1] - pp[1][1]) / (pp[2][0] - pp[1][0])
  160. grad_e = (pp[-2][1] - pp[-1][1]) / (pp[-2][0] - pp[-1][0]) + (pp[-3][1] - pp[-2][1]) / (pp[-3][0] - pp[-2][0])
  161. for r in np.arange(0.5, max_r, step_r):
  162. dx = 2 * half_char_h * r
  163. if not isSppFound:
  164. line_img = np.zeros(word_label.shape, dtype=np.uint8)
  165. dy = grad_s * dx
  166. p = np.array(new_pp[0]) - np.array([dx, dy, dx, dy])
  167. cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
  168. if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
  169. spp = p
  170. isSppFound = True
  171. if not isEppFound:
  172. line_img = np.zeros(word_label.shape, dtype=np.uint8)
  173. dy = grad_e * dx
  174. p = np.array(new_pp[-1]) + np.array([dx, dy, dx, dy])
  175. cv2.line(line_img, (int(p[0]), int(p[1])), (int(p[2]), int(p[3])), 1, thickness=1)
  176. if np.sum(np.logical_and(word_label, line_img)) == 0 or r + 2 * step_r >= max_r:
  177. epp = p
  178. isEppFound = True
  179. if isSppFound and isEppFound:
  180. break
  181. # pass if boundary of polygon is not found
  182. if not (isSppFound and isEppFound):
  183. polys.append(None);
  184. continue
  185. # make final polygon
  186. poly = []
  187. poly.append(warpCoord(Minv, (spp[0], spp[1])))
  188. for p in new_pp:
  189. poly.append(warpCoord(Minv, (p[0], p[1])))
  190. poly.append(warpCoord(Minv, (epp[0], epp[1])))
  191. poly.append(warpCoord(Minv, (epp[2], epp[3])))
  192. for p in reversed(new_pp):
  193. poly.append(warpCoord(Minv, (p[2], p[3])))
  194. poly.append(warpCoord(Minv, (spp[2], spp[3])))
  195. # add to final result
  196. polys.append(np.array(poly))
  197. return polys
  198. def getDetBoxes(textmap, linkmap, text_threshold, link_threshold, low_text, poly=False):
  199. boxes, labels, mapper = getDetBoxes_core(textmap, linkmap, text_threshold, link_threshold, low_text)
  200. if poly:
  201. polys = getPoly_core(boxes, labels, mapper, linkmap)
  202. else:
  203. polys = [None] * len(boxes)
  204. return boxes, polys
  205. def adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net=2):
  206. if len(polys) > 0:
  207. polys = np.array(polys)
  208. for k in range(len(polys)):
  209. if polys[k] is not None:
  210. polys[k] *= (ratio_w * ratio_net, ratio_h * ratio_net)
  211. return polys