Source code for chainercv.links.model.faster_rcnn.faster_rcnn_train_chain

import numpy as np

import chainer
from chainer.backends import cuda
import chainer.functions as F

from chainercv.links.model.faster_rcnn.utils.anchor_target_creator import\
    AnchorTargetCreator
from chainercv.links.model.faster_rcnn.utils.proposal_target_creator import\
    ProposalTargetCreator


[docs]class FasterRCNNTrainChain(chainer.Chain): """Calculate losses for Faster R-CNN and report them. This is used to train Faster R-CNN in the joint training scheme [#FRCNN]_. The losses include: * :obj:`rpn_loc_loss`: The localization loss for \ Region Proposal Network (RPN). * :obj:`rpn_cls_loss`: The classification loss for RPN. * :obj:`roi_loc_loss`: The localization loss for the head module. * :obj:`roi_cls_loss`: The classification loss for the head module. .. [#FRCNN] Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun. \ Faster R-CNN: Towards Real-Time Object Detection with \ Region Proposal Networks. NIPS 2015. Args: faster_rcnn (~chainercv.links.model.faster_rcnn.FasterRCNN): A Faster R-CNN model that is going to be trained. rpn_sigma (float): Sigma parameter for the localization loss of Region Proposal Network (RPN). The default value is 3, which is the value used in [#FRCNN]_. roi_sigma (float): Sigma paramter for the localization loss of the head. The default value is 1, which is the value used in [#FRCNN]_. anchor_target_creator: An instantiation of :class:`~chainercv.links.model.faster_rcnn.AnchorTargetCreator`. proposal_target_creator: An instantiation of :class:`~chainercv.links.model.faster_rcnn.ProposalTargetCreator`. """ def __init__(self, faster_rcnn, rpn_sigma=3., roi_sigma=1., anchor_target_creator=AnchorTargetCreator(), proposal_target_creator=ProposalTargetCreator()): super(FasterRCNNTrainChain, self).__init__() with self.init_scope(): self.faster_rcnn = faster_rcnn self.rpn_sigma = rpn_sigma self.roi_sigma = roi_sigma self.anchor_target_creator = anchor_target_creator self.proposal_target_creator = proposal_target_creator self.loc_normalize_mean = faster_rcnn.loc_normalize_mean self.loc_normalize_std = faster_rcnn.loc_normalize_std
[docs] def __call__(self, imgs, bboxes, labels, scale): """Forward Faster R-CNN and calculate losses. Here are notations used. * :math:`N` is the batch size. * :math:`R` is the number of bounding boxes per image. Currently, only :math:`N=1` is supported. Args: imgs (~chainer.Variable): A variable with a batch of images. bboxes (~chainer.Variable): A batch of bounding boxes. Its shape is :math:`(N, R, 4)`. labels (~chainer.Variable): A batch of labels. Its shape is :math:`(N, R)`. The background is excluded from the definition, which means that the range of the value is :math:`[0, L - 1]`. :math:`L` is the number of foreground classes. scale (float or ~chainer.Variable): Amount of scaling applied to the raw image during preprocessing. Returns: chainer.Variable: Scalar loss variable. This is the sum of losses for Region Proposal Network and the head module. """ if isinstance(bboxes, chainer.Variable): bboxes = bboxes.array if isinstance(labels, chainer.Variable): labels = labels.array if isinstance(scale, chainer.Variable): scale = scale.array scale = np.asscalar(cuda.to_cpu(scale)) n = bboxes.shape[0] if n != 1: raise ValueError('Currently only batch size 1 is supported.') _, _, H, W = imgs.shape img_size = (H, W) features = self.faster_rcnn.extractor(imgs) rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn( features, img_size, scale) # Since batch size is one, convert variables to singular form bbox = bboxes[0] label = labels[0] rpn_score = rpn_scores[0] rpn_loc = rpn_locs[0] roi = rois # Sample RoIs and forward sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator( roi, bbox, label, self.loc_normalize_mean, self.loc_normalize_std) sample_roi_index = self.xp.zeros((len(sample_roi),), dtype=np.int32) roi_cls_loc, roi_score = self.faster_rcnn.head( features, sample_roi, sample_roi_index) # RPN losses gt_rpn_loc, gt_rpn_label = self.anchor_target_creator( bbox, anchor, img_size) rpn_loc_loss = _fast_rcnn_loc_loss( rpn_loc, gt_rpn_loc, gt_rpn_label, self.rpn_sigma) rpn_cls_loss = F.softmax_cross_entropy(rpn_score, gt_rpn_label) # Losses for outputs of the head. n_sample = roi_cls_loc.shape[0] roi_cls_loc = roi_cls_loc.reshape((n_sample, -1, 4)) roi_loc = roi_cls_loc[self.xp.arange(n_sample), gt_roi_label] roi_loc_loss = _fast_rcnn_loc_loss( roi_loc, gt_roi_loc, gt_roi_label, self.roi_sigma) roi_cls_loss = F.softmax_cross_entropy(roi_score, gt_roi_label) loss = rpn_loc_loss + rpn_cls_loss + roi_loc_loss + roi_cls_loss chainer.reporter.report({'rpn_loc_loss': rpn_loc_loss, 'rpn_cls_loss': rpn_cls_loss, 'roi_loc_loss': roi_loc_loss, 'roi_cls_loss': roi_cls_loss, 'loss': loss}, self) return loss
def _smooth_l1_loss(x, t, in_weight, sigma): sigma2 = sigma ** 2 diff = in_weight * (x - t) abs_diff = F.absolute(diff) flag = (abs_diff.array < (1. / sigma2)).astype(np.float32) y = (flag * (sigma2 / 2.) * F.square(diff) + (1 - flag) * (abs_diff - 0.5 / sigma2)) return F.sum(y) def _fast_rcnn_loc_loss(pred_loc, gt_loc, gt_label, sigma): xp = chainer.backends.cuda.get_array_module(pred_loc) in_weight = xp.zeros_like(gt_loc) # Localization loss is calculated only for positive rois. in_weight[gt_label > 0] = 1 loc_loss = _smooth_l1_loss(pred_loc, gt_loc, in_weight, sigma) # Normalize by total number of negtive and positive rois. loc_loss /= xp.sum(gt_label >= 0) return loc_loss