Source code for chainercv.links.model.faster_rcnn.utils.anchor_target_creator

import numpy as np

import chainer
from chainer.backends import cuda

from chainercv.links.model.faster_rcnn.utils.bbox2loc import bbox2loc
from chainercv.utils.bbox.bbox_iou import bbox_iou


[docs]class AnchorTargetCreator(object): """Assign the ground truth bounding boxes to anchors. Assigns the ground truth bounding boxes to anchors for training Region Proposal Networks introduced in Faster R-CNN [#]_. Offsets and scales to match anchors to the ground truth are calculated using the encoding scheme of :func:`~chainercv.links.model.faster_rcnn.bbox2loc`. .. [#] Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun. \ Faster R-CNN: Towards Real-Time Object Detection with \ Region Proposal Networks. NIPS 2015. Args: n_sample (int): The number of regions to produce. pos_iou_thresh (float): Anchors with IoU above this threshold will be assigned as positive. neg_iou_thresh (float): Anchors with IoU below this threshold will be assigned as negative. pos_ratio (float): Ratio of positive regions in the sampled regions. """ def __init__(self, n_sample=256, pos_iou_thresh=0.7, neg_iou_thresh=0.3, pos_ratio=0.5): self.n_sample = n_sample self.pos_iou_thresh = pos_iou_thresh self.neg_iou_thresh = neg_iou_thresh self.pos_ratio = pos_ratio
[docs] def __call__(self, bbox, anchor, img_size): """Assign ground truth supervision to sampled subset of anchors. Types of input arrays and output arrays are same. Here are notations. * :math:`S` is the number of anchors. * :math:`R` is the number of bounding boxes. Args: bbox (array): Coordinates of bounding boxes. Its shape is :math:`(R, 4)`. anchor (array): Coordinates of anchors. Its shape is :math:`(S, 4)`. img_size (tuple of ints): A tuple :obj:`H, W`, which is a tuple of height and width of an image. Returns: (array, array): * **loc**: Offsets and scales to match the anchors to \ the ground truth bounding boxes. Its shape is :math:`(S, 4)`. * **label**: Labels of anchors with values \ :obj:`(1=positive, 0=negative, -1=ignore)`. Its shape \ is :math:`(S,)`. """ xp = cuda.get_array_module(bbox) bbox = cuda.to_cpu(bbox) anchor = cuda.to_cpu(anchor) img_H, img_W = img_size n_anchor = len(anchor) inside_index = _get_inside_index(anchor, img_H, img_W) anchor = anchor[inside_index] argmax_ious, label = self._create_label( inside_index, anchor, bbox) # compute bounding box regression targets loc = bbox2loc(anchor, bbox[argmax_ious]) # map up to original set of anchors label = _unmap(label, n_anchor, inside_index, fill=-1) loc = _unmap(loc, n_anchor, inside_index, fill=0) if xp != np: loc = chainer.backends.cuda.to_gpu(loc) label = chainer.backends.cuda.to_gpu(label) return loc, label
def _create_label(self, inside_index, anchor, bbox): # label: 1 is positive, 0 is negative, -1 is dont care label = np.empty((len(inside_index), ), dtype=np.int32) label.fill(-1) argmax_ious, max_ious, gt_argmax_ious = \ self._calc_ious(anchor, bbox, inside_index) # assign negative labels first so that positive labels can clobber them label[max_ious < self.neg_iou_thresh] = 0 # positive label: for each gt, anchor with highest iou label[gt_argmax_ious] = 1 # positive label: above threshold IOU label[max_ious >= self.pos_iou_thresh] = 1 # subsample positive labels if we have too many n_pos = int(self.pos_ratio * self.n_sample) pos_index = np.where(label == 1)[0] if len(pos_index) > n_pos: disable_index = np.random.choice( pos_index, size=(len(pos_index) - n_pos), replace=False) label[disable_index] = -1 # subsample negative labels if we have too many n_neg = self.n_sample - np.sum(label == 1) neg_index = np.where(label == 0)[0] if len(neg_index) > n_neg: disable_index = np.random.choice( neg_index, size=(len(neg_index) - n_neg), replace=False) label[disable_index] = -1 return argmax_ious, label def _calc_ious(self, anchor, bbox, inside_index): # ious between the anchors and the gt boxes ious = bbox_iou(anchor, bbox) argmax_ious = ious.argmax(axis=1) max_ious = ious[np.arange(len(inside_index)), argmax_ious] gt_argmax_ious = ious.argmax(axis=0) gt_max_ious = ious[gt_argmax_ious, np.arange(ious.shape[1])] gt_argmax_ious = np.where(ious == gt_max_ious)[0] return argmax_ious, max_ious, gt_argmax_ious
def _unmap(data, count, index, fill=0): # Unmap a subset of item (data) back to the original set of items (of # size count) if len(data.shape) == 1: ret = np.empty((count,), dtype=data.dtype) ret.fill(fill) ret[index] = data else: ret = np.empty((count,) + data.shape[1:], dtype=data.dtype) ret.fill(fill) ret[index, :] = data return ret def _get_inside_index(anchor, H, W): # Calc indicies of anchors which are located completely inside of the image # whose size is speficied. xp = cuda.get_array_module(anchor) index_inside = xp.where( (anchor[:, 0] >= 0) & (anchor[:, 1] >= 0) & (anchor[:, 2] <= H) & (anchor[:, 3] <= W) )[0] return index_inside