123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- # --------------------------------------------------------
- # Faster R-CNN
- # Copyright (c) 2015 Microsoft
- # Licensed under The MIT License [see LICENSE for details]
- # Written by Ross Girshick, Sean Bell and Xinlei Chen
- # --------------------------------------------------------
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import numpy as np
- import numpy.random as npr
- from segment.sheet_resolve.lib.model.config import cfg
- from segment.sheet_resolve.lib.model.bbox_transform import bbox_transform
- from segment.sheet_resolve.lib.utils.py_bbox import bbox_overlaps
- def proposal_target_layer(rpn_rois, rpn_scores, gt_boxes, _num_classes):
- """
- Assign object detection proposals to ground-truth targets. Produces proposal
- classification labels and bounding-box regression targets.
- """
- # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
- # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
- all_rois = rpn_rois
- all_scores = rpn_scores
- # Include ground-truth boxes in the set of candidate rois
- if cfg.TRAIN.USE_GT:
- zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
- all_rois = np.vstack(
- (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
- )
- # not sure if it a wise appending, but anyway i am not using it
- all_scores = np.vstack((all_scores, zeros))
- num_images = 1
- rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
- fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
- # Sample rois with classification labels and bounding box regression
- # targets
- labels, rois, roi_scores, bbox_targets, bbox_inside_weights = _sample_rois(
- all_rois, all_scores, gt_boxes, fg_rois_per_image,
- rois_per_image, _num_classes)
- rois = rois.reshape(-1, 5)
- roi_scores = roi_scores.reshape(-1)
- labels = labels.reshape(-1, 1)
- bbox_targets = bbox_targets.reshape(-1, _num_classes * 4)
- bbox_inside_weights = bbox_inside_weights.reshape(-1, _num_classes * 4)
- bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32)
- return rois, roi_scores, labels, bbox_targets, bbox_inside_weights, bbox_outside_weights
- def _get_bbox_regression_labels(bbox_target_data, num_classes):
- """Bounding-box regression targets (bbox_target_data) are stored in a
- compact form N x (class, tx, ty, tw, th)
- This function expands those targets into the 4-of-4*K representation used
- by the network (i.e. only one class has non-zero targets).
- Returns:
- bbox_target (ndarray): N x 4K blob of regression targets
- bbox_inside_weights (ndarray): N x 4K blob of loss weights
- """
- clss = bbox_target_data[:, 0]
- bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
- bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
- inds = np.where(clss > 0)[0]
- for ind in inds:
- cls = clss[ind]
- start = int(4 * cls)
- end = start + 4
- bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
- bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
- return bbox_targets, bbox_inside_weights
- def _compute_targets(ex_rois, gt_rois, labels):
- """Compute bounding-box regression targets for an image."""
- assert ex_rois.shape[0] == gt_rois.shape[0]
- assert ex_rois.shape[1] == 4
- assert gt_rois.shape[1] == 4
- targets = bbox_transform(ex_rois, gt_rois)
- if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
- # Optionally normalize targets by a precomputed mean and stdev
- targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
- / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
- return np.hstack(
- (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
- def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
- """Generate a random sample of RoIs comprising foreground and background
- examples.
- """
- # overlaps: (rois x gt_boxes)
- overlaps = bbox_overlaps(
- np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
- np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
- gt_assignment = overlaps.argmax(axis=1)
- max_overlaps = overlaps.max(axis=1)
- labels = gt_boxes[gt_assignment, 4]
- # Select foreground RoIs as those with >= FG_THRESH overlap
- fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
- # Guard against the case when an image has fewer than fg_rois_per_image
- # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
- bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
- (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
- # Small modification to the original version where we ensure a fixed number of regions are sampled
- if fg_inds.size > 0 and bg_inds.size > 0:
- fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
- fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
- bg_rois_per_image = rois_per_image - fg_rois_per_image
- to_replace = bg_inds.size < bg_rois_per_image
- bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
- elif fg_inds.size > 0:
- to_replace = fg_inds.size < rois_per_image
- fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
- fg_rois_per_image = rois_per_image
- elif bg_inds.size > 0:
- to_replace = bg_inds.size < rois_per_image
- bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
- fg_rois_per_image = 0
- else:
- import pdb
- pdb.set_trace()
- # The indices that we're selecting (both fg and bg)
- keep_inds = np.append(fg_inds, bg_inds)
- # Select sampled values from various arrays:
- labels = labels[keep_inds]
- # Clamp labels for the background RoIs to 0
- labels[int(fg_rois_per_image):] = 0
- rois = all_rois[keep_inds]
- roi_scores = all_scores[keep_inds]
- bbox_target_data = _compute_targets(
- rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
- bbox_targets, bbox_inside_weights = \
- _get_bbox_regression_labels(bbox_target_data, num_classes)
- return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
|