Source code for chainercv.links.model.faster_rcnn.faster_rcnn_vgg

import numpy as np

import chainer
import chainer.functions as F
import chainer.links as L

from chainercv.links.model.faster_rcnn.faster_rcnn import FasterRCNN
from chainercv.links.model.faster_rcnn.region_proposal_network import \
    RegionProposalNetwork
from chainercv.links.model.vgg.vgg16 import VGG16
from chainercv import utils


[docs]class FasterRCNNVGG16(FasterRCNN): """Faster R-CNN based on VGG-16. When you specify the path of a pre-trained chainer model serialized as a :obj:`.npz` file in the constructor, this chain model automatically initializes all the parameters with it. When a string in prespecified set is provided, a pretrained model is loaded from weights distributed on the Internet. The list of pretrained models supported are as follows: * :obj:`voc07`: Loads weights trained with the trainval split of \ PASCAL VOC2007 Detection Dataset. * :obj:`imagenet`: Loads weights trained with ImageNet Classfication \ task for the feature extractor and the head modules. \ Weights that do not have a corresponding layer in VGG-16 \ will be randomly initialized. For descriptions on the interface of this model, please refer to :class:`~chainercv.links.model.faster_rcnn.FasterRCNN`. :class:`~chainercv.links.model.faster_rcnn.FasterRCNNVGG16` supports finer control on random initializations of weights by arguments :obj:`vgg_initialW`, :obj:`rpn_initialW`, :obj:`loc_initialW` and :obj:`score_initialW`. It accepts a callable that takes an array and edits its values. If :obj:`None` is passed as an initializer, the default initializer is used. Args: n_fg_class (int): The number of classes excluding the background. pretrained_model (string): The destination of the pre-trained chainer model serialized as a :obj:`.npz` file. If this is one of the strings described above, it automatically loads weights stored under a directory :obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/models/`, where :obj:`$CHAINER_DATASET_ROOT` is set as :obj:`$HOME/.chainer/dataset` unless you specify another value by modifying the environment variable. min_size (int): A preprocessing paramter for :meth:`prepare`. max_size (int): A preprocessing paramter for :meth:`prepare`. ratios (list of floats): This is ratios of width to height of the anchors. anchor_scales (list of numbers): This is areas of anchors. Those areas will be the product of the square of an element in :obj:`anchor_scales` and the original area of the reference window. vgg_initialW (callable): Initializer for the layers corresponding to the VGG-16 layers. rpn_initialW (callable): Initializer for Region Proposal Network layers. loc_initialW (callable): Initializer for the localization head. score_initialW (callable): Initializer for the score head. proposal_creator_params (dict): Key valued paramters for :class:`~chainercv.links.model.faster_rcnn.ProposalCreator`. """ _models = { 'voc07': { 'param': {'n_fg_class': 20}, 'url': 'https://chainercv-models.preferred.jp/' 'faster_rcnn_vgg16_voc07_trained_2018_06_01.npz', 'cv2': True }, 'voc0712': { 'param': {'n_fg_class': 20}, 'url': 'https://chainercv-models.preferred.jp/' 'faster_rcnn_vgg16_voc0712_trained_2017_07_21.npz', 'cv2': True }, } feat_stride = 16 def __init__(self, n_fg_class=None, pretrained_model=None, min_size=600, max_size=1000, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32], vgg_initialW=None, rpn_initialW=None, loc_initialW=None, score_initialW=None, proposal_creator_params={}): param, path = utils.prepare_pretrained_model( {'n_fg_class': n_fg_class}, pretrained_model, self._models) if loc_initialW is None: loc_initialW = chainer.initializers.Normal(0.001) if score_initialW is None: score_initialW = chainer.initializers.Normal(0.01) if rpn_initialW is None: rpn_initialW = chainer.initializers.Normal(0.01) if vgg_initialW is None and pretrained_model: vgg_initialW = chainer.initializers.constant.Zero() extractor = VGG16(initialW=vgg_initialW) extractor.pick = 'conv5_3' # Delete all layers after conv5_3. extractor.remove_unused() rpn = RegionProposalNetwork( 512, 512, ratios=ratios, anchor_scales=anchor_scales, feat_stride=self.feat_stride, initialW=rpn_initialW, proposal_creator_params=proposal_creator_params, ) head = VGG16RoIHead( param['n_fg_class'] + 1, roi_size=7, spatial_scale=1. / self.feat_stride, vgg_initialW=vgg_initialW, loc_initialW=loc_initialW, score_initialW=score_initialW ) super(FasterRCNNVGG16, self).__init__( extractor, rpn, head, mean=np.array([122.7717, 115.9465, 102.9801], dtype=np.float32)[:, None, None], min_size=min_size, max_size=max_size ) if path == 'imagenet': self._copy_imagenet_pretrained_vgg16() elif path: chainer.serializers.load_npz(path, self) def _copy_imagenet_pretrained_vgg16(self): pretrained_model = VGG16(pretrained_model='imagenet') self.extractor.conv1_1.copyparams(pretrained_model.conv1_1) self.extractor.conv1_2.copyparams(pretrained_model.conv1_2) self.extractor.conv2_1.copyparams(pretrained_model.conv2_1) self.extractor.conv2_2.copyparams(pretrained_model.conv2_2) self.extractor.conv3_1.copyparams(pretrained_model.conv3_1) self.extractor.conv3_2.copyparams(pretrained_model.conv3_2) self.extractor.conv3_3.copyparams(pretrained_model.conv3_3) self.extractor.conv4_1.copyparams(pretrained_model.conv4_1) self.extractor.conv4_2.copyparams(pretrained_model.conv4_2) self.extractor.conv4_3.copyparams(pretrained_model.conv4_3) self.extractor.conv5_1.copyparams(pretrained_model.conv5_1) self.extractor.conv5_2.copyparams(pretrained_model.conv5_2) self.extractor.conv5_3.copyparams(pretrained_model.conv5_3) self.head.fc6.copyparams(pretrained_model.fc6) self.head.fc7.copyparams(pretrained_model.fc7)
[docs]class VGG16RoIHead(chainer.Chain): """Faster R-CNN Head for VGG-16 based implementation. This class is used as a head for Faster R-CNN. This outputs class-wise localizations and classification based on feature maps in the given RoIs. Args: n_class (int): The number of classes possibly including the background. roi_size (int): Height and width of the feature maps after RoI-pooling. spatial_scale (float): Scale of the roi is resized. vgg_initialW (callable): Initializer for the layers corresponding to the VGG-16 layers. loc_initialW (callable): Initializer for the localization head. score_initialW (callable): Initializer for the score head. """ def __init__(self, n_class, roi_size, spatial_scale, vgg_initialW=None, loc_initialW=None, score_initialW=None): # n_class includes the background super(VGG16RoIHead, self).__init__() with self.init_scope(): self.fc6 = L.Linear(25088, 4096, initialW=vgg_initialW) self.fc7 = L.Linear(4096, 4096, initialW=vgg_initialW) self.cls_loc = L.Linear(4096, n_class * 4, initialW=loc_initialW) self.score = L.Linear(4096, n_class, initialW=score_initialW) self.n_class = n_class self.roi_size = roi_size self.spatial_scale = spatial_scale def __call__(self, x, rois, roi_indices): """Forward the chain. We assume that there are :math:`N` batches. Args: x (~chainer.Variable): 4D image variable. rois (array): A bounding box array containing coordinates of proposal boxes. This is a concatenation of bounding box arrays from multiple images in the batch. Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed RoIs from the :math:`i` th image, :math:`R' = \\sum _{i=1} ^ N R_i`. roi_indices (array): An array containing indices of images to which bounding boxes correspond to. Its shape is :math:`(R',)`. """ roi_indices = roi_indices.astype(np.float32) indices_and_rois = self.xp.concatenate( (roi_indices[:, None], rois), axis=1) pool = _roi_pooling_2d_yx( x, indices_and_rois, self.roi_size, self.roi_size, self.spatial_scale) fc6 = F.relu(self.fc6(pool)) fc7 = F.relu(self.fc7(fc6)) roi_cls_locs = self.cls_loc(fc7) roi_scores = self.score(fc7) return roi_cls_locs, roi_scores
def _roi_pooling_2d_yx(x, indices_and_rois, outh, outw, spatial_scale): xy_indices_and_rois = indices_and_rois[:, [0, 2, 1, 4, 3]] pool = F.roi_pooling_2d( x, xy_indices_and_rois, outh, outw, spatial_scale) return pool