Source code for chainercv.links.model.faster_rcnn.region_proposal_network

import numpy as np

import chainer
from chainer.backends import cuda
import chainer.functions as F
import chainer.links as L

from chainercv.links.model.faster_rcnn.utils.generate_anchor_base import \
    generate_anchor_base
from chainercv.links.model.faster_rcnn.utils.proposal_creator import \
    ProposalCreator


[docs]class RegionProposalNetwork(chainer.Chain): """Region Proposal Network introduced in Faster R-CNN. This is Region Proposal Network introduced in Faster R-CNN [#]_. This takes features extracted from images and propose class agnostic bounding boxes around "objects". .. [#] Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun. \ Faster R-CNN: Towards Real-Time Object Detection with \ Region Proposal Networks. NIPS 2015. Args: in_channels (int): The channel size of input. mid_channels (int): The channel size of the intermediate tensor. ratios (list of floats): This is ratios of width to height of the anchors. anchor_scales (list of numbers): This is areas of anchors. Those areas will be the product of the square of an element in :obj:`anchor_scales` and the original area of the reference window. feat_stride (int): Stride size after extracting features from an image. initialW (callable): Initial weight value. If :obj:`None` then this function uses Gaussian distribution scaled by 0.1 to initialize weight. May also be a callable that takes an array and edits its values. proposal_creator_params (dict): Key valued paramters for :class:`~chainercv.links.model.faster_rcnn.ProposalCreator`. .. seealso:: :class:`~chainercv.links.model.faster_rcnn.ProposalCreator` """ def __init__( self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], feat_stride=16, initialW=None, proposal_creator_params={}, ): self.anchor_base = generate_anchor_base( anchor_scales=anchor_scales, ratios=ratios) self.feat_stride = feat_stride self.proposal_layer = ProposalCreator(**proposal_creator_params) n_anchor = self.anchor_base.shape[0] super(RegionProposalNetwork, self).__init__() with self.init_scope(): self.conv1 = L.Convolution2D( in_channels, mid_channels, 3, 1, 1, initialW=initialW) self.score = L.Convolution2D( mid_channels, n_anchor * 2, 1, 1, 0, initialW=initialW) self.loc = L.Convolution2D( mid_channels, n_anchor * 4, 1, 1, 0, initialW=initialW)
[docs] def __call__(self, x, img_size, scale=1.): """Forward Region Proposal Network. Here are notations. * :math:`N` is batch size. * :math:`C` channel size of the input. * :math:`H` and :math:`W` are height and witdh of the input feature. * :math:`A` is number of anchors assigned to each pixel. Args: x (~chainer.Variable): The Features extracted from images. Its shape is :math:`(N, C, H, W)`. img_size (tuple of ints): A tuple :obj:`height, width`, which contains image size after scaling. scale (float): The amount of scaling done to the input images after reading them from files. Returns: (~chainer.Variable, ~chainer.Variable, array, array, array): This is a tuple of five following values. * **rpn_locs**: Predicted bounding box offsets and scales for \ anchors. Its shape is :math:`(N, H W A, 4)`. * **rpn_scores**: Predicted foreground scores for \ anchors. Its shape is :math:`(N, H W A, 2)`. * **rois**: A bounding box array containing coordinates of \ proposal boxes. This is a concatenation of bounding box \ arrays from multiple images in the batch. \ Its shape is :math:`(R', 4)`. Given :math:`R_i` predicted \ bounding boxes from the :math:`i` th image, \ :math:`R' = \\sum _{i=1} ^ N R_i`. * **roi_indices**: An array containing indices of images to \ which RoIs correspond to. Its shape is :math:`(R',)`. * **anchor**: Coordinates of enumerated shifted anchors. \ Its shape is :math:`(H W A, 4)`. """ n, _, hh, ww = x.shape anchor = _enumerate_shifted_anchor( self.xp.array(self.anchor_base), self.feat_stride, hh, ww) n_anchor = anchor.shape[0] // (hh * ww) h = F.relu(self.conv1(x)) rpn_locs = self.loc(h) rpn_locs = rpn_locs.transpose((0, 2, 3, 1)).reshape((n, -1, 4)) rpn_scores = self.score(h) rpn_scores = rpn_scores.transpose((0, 2, 3, 1)) rpn_fg_scores =\ rpn_scores.reshape((n, hh, ww, n_anchor, 2))[:, :, :, :, 1] rpn_fg_scores = rpn_fg_scores.reshape((n, -1)) rpn_scores = rpn_scores.reshape((n, -1, 2)) rois = [] roi_indices = [] for i in range(n): roi = self.proposal_layer( rpn_locs[i].array, rpn_fg_scores[i].array, anchor, img_size, scale=scale) batch_index = i * self.xp.ones((len(roi),), dtype=np.int32) rois.append(roi) roi_indices.append(batch_index) rois = self.xp.concatenate(rois, axis=0) roi_indices = self.xp.concatenate(roi_indices, axis=0) return rpn_locs, rpn_scores, rois, roi_indices, anchor
def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width): # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors xp = cuda.get_array_module(anchor_base) shift_y = xp.arange(0, height * feat_stride, feat_stride) shift_x = xp.arange(0, width * feat_stride, feat_stride) shift_x, shift_y = xp.meshgrid(shift_x, shift_y) shift = xp.stack((shift_y.ravel(), shift_x.ravel(), shift_y.ravel(), shift_x.ravel()), axis=1) A = anchor_base.shape[0] K = shift.shape[0] anchor = anchor_base.reshape((1, A, 4)) + \ shift.reshape((1, K, 4)).transpose((1, 0, 2)) anchor = anchor.reshape((K * A, 4)).astype(np.float32) return anchor