proposal_target_layer.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. # --------------------------------------------------------
  2. # Faster R-CNN
  3. # Copyright (c) 2015 Microsoft
  4. # Licensed under The MIT License [see LICENSE for details]
  5. # Written by Ross Girshick, Sean Bell and Xinlei Chen
  6. # --------------------------------------------------------
  7. from __future__ import absolute_import
  8. from __future__ import division
  9. from __future__ import print_function
  10. import numpy as np
  11. import numpy.random as npr
  12. from segment.sheet_resolve.lib.model.config import cfg
  13. from segment.sheet_resolve.lib.model.bbox_transform import bbox_transform
  14. from segment.sheet_resolve.lib.utils.py_bbox import bbox_overlaps
  15. def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
  16. """
  17. Assign object detection proposals to ground-truth targets. Produces proposal
  18. classification labels and bounding-box regression targets.
  19. """
  20. # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
  21. # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
  22. all_rois = rpn_rois
  23. all_scores = rpn_scores
  24. # Include ground-truth boxes in the set of candidate rois
  25. if cfg.TRAIN.USE_GT:
  26. zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
  27. all_rois = np.vstack(
  28. (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
  29. )
  30. # not sure if it a wise appending, but anyway i am not using it
  31. all_scores = np.vstack((all_scores, zeros))
  32. num_images = 1
  33. rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
  34. fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
  35. # Sample rois with classification labels and bounding box regression
  36. # targets
  37. labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
  38. all_rois, all_scores, gt_boxes, fg_rois_per_image,
  39. rois_per_image, _num_classes)
  40. rois = rois.reshape(-1, 5)
  41. roi_scores = roi_scores.reshape(-1)
  42. labels = labels.reshape(-1, 1)
  43. bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)
  44. bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
  45. bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
  46. return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
  47. def _get_bbox_regression_labels(bbox_target_data, num_classes):
  48. """Bounding-box regression targets (bbox_target_data) are stored in a
  49. compact form N x (class, tx, ty, tw, th)
  50. This function expands those targets into the 4-of-4*K representation used
  51. by the network (i.e. only one class has non-zero targets).
  52. Returns:
  53. bbox_target (ndarray): N x 4K blob of regression targets
  54. bbox_inside_weights (ndarray): N x 4K blob of loss weights
  55. """
  56. clss = bbox_target_data[:, 0]
  57. bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
  58. bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
  59. inds = np.where(clss > 0)[0]
  60. for ind in inds:
  61. cls = clss[ind]
  62. start = int(4 * cls)
  63. end = start + 4
  64. bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
  65. bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
  66. return bbox_targets, bbox_inside_weights
  67. def _compute_targets(ex_rois, gt_rois, labels):
  68. """Compute bounding-box regression targets for an image."""
  69. assert ex_rois.shape[0] == gt_rois.shape[0]
  70. assert ex_rois.shape[1] == 4
  71. assert gt_rois.shape[1] == 4
  72. targets = bbox_transform(ex_rois, gt_rois)
  73. if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
  74. # Optionally normalize targets by a precomputed mean and stdev
  75. targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
  76. / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
  77. return np.hstack(
  78. (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
  79. def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  80. """Generate a random sample of RoIs comprising foreground and background
  81. examples.
  82. """
  83. # overlaps: (rois x gt_boxes)
  84. overlaps = bbox_overlaps(
  85. np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
  86. np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
  87. gt_assignment = overlaps.argmax(axis=1)
  88. max_overlaps = overlaps.max(axis=1)
  89. labels = gt_boxes[gt_assignment, 4]
  90. # Select foreground RoIs as those with >= FG_THRESH overlap
  91. fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
  92. # Guard against the case when an image has fewer than fg_rois_per_image
  93. # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  94. bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
  95. (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
  96. # Small modification to the original version where we ensure a fixed number of regions are sampled
  97. if fg_inds.size > 0 and bg_inds.size > 0:
  98. fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
  99. fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
  100. bg_rois_per_image = rois_per_image - fg_rois_per_image
  101. to_replace = bg_inds.size < bg_rois_per_image
  102. bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
  103. elif fg_inds.size > 0:
  104. to_replace = fg_inds.size < rois_per_image
  105. fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
  106. fg_rois_per_image = rois_per_image
  107. elif bg_inds.size > 0:
  108. to_replace = bg_inds.size < rois_per_image
  109. bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
  110. fg_rois_per_image = 0
  111. else:
  112. import pdb
  113. pdb.set_trace()
  114. # The indices that we're selecting (both fg and bg)
  115. keep_inds = np.append(fg_inds, bg_inds)
  116. # Select sampled values from various arrays:
  117. labels = labels[keep_inds]
  118. # Clamp labels for the background RoIs to 0
  119. labels[int(fg_rois_per_image):] = 0
  120. rois = all_rois[keep_inds]
  121. roi_scores = all_scores[keep_inds]
  122. bbox_target_data = _compute_targets(
  123. rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
  124. bbox_targets, bbox_inside_weights = \
  125. _get_bbox_regression_labels(bbox_target_data, num_classes)
  126. return labels, rois, roi_scores, bbox_targets, bbox_inside_weights