Source code for chainercv.experimental.links.model.fcis.fcis_train_chain

from __future__ import division

import numpy as np

import chainer
from chainer.backends import cuda
import chainer.functions as F

from chainercv.experimental.links.model.fcis.utils.proposal_target_creator \
    import ProposalTargetCreator
from chainercv.links.model.faster_rcnn.faster_rcnn_train_chain \
    import _fast_rcnn_loc_loss
from chainercv.links.model.faster_rcnn.utils.anchor_target_creator \
    import AnchorTargetCreator


[docs]class FCISTrainChain(chainer.Chain):

    """Calculate losses for FCIS and report them.

    This is used to train FCIS in the joint training scheme [#FCISCVPR]_.

    The losses include:

    * :obj:`rpn_loc_loss`: The localization loss for \
        Region Proposal Network (RPN).
    * :obj:`rpn_cls_loss`: The classification loss for RPN.
    * :obj:`roi_loc_loss`: The localization loss for the head module.
    * :obj:`roi_cls_loss`: The classification loss for the head module.
    * :obj:`roi_mask_loss`: The mask loss for the head module.

    .. [#FCISCVPR] Yi Li, Haozhi Qi, Jifeng Dai, Xiangyang Ji, Yichen Wei. \
    Fully Convolutional Instance-aware Semantic Segmentation. CVPR 2017.

    Args:
        fcis (~chainercv.experimental.links.model.fcis.FCIS):
            A FCIS model for training.
        rpn_sigma (float): Sigma parameter for the localization loss
            of Region Proposal Network (RPN). The default value is 3,
            which is the value used in [#FCISCVPR]_.
        roi_sigma (float): Sigma paramter for the localization loss of
            the head. The default value is 1, which is the value used
            in [#FCISCVPR]_.
        anchor_target_creator: An instantiation of
            :class:`~chainercv.links.model.faster_rcnn.AnchorTargetCreator`.
        proposal_target_creator: An instantiation of
            :class:`~chainercv.experimental.links.model.fcis.ProposalTargetCreator`.

    """

    def __init__(
            self, fcis,
            rpn_sigma=3.0, roi_sigma=1.0,
            anchor_target_creator=AnchorTargetCreator(),
            proposal_target_creator=ProposalTargetCreator()
    ):

        super(FCISTrainChain, self).__init__()
        with self.init_scope():
            self.fcis = fcis
        self.rpn_sigma = rpn_sigma
        self.roi_sigma = roi_sigma
        self.mask_size = self.fcis.head.roi_size

        self.loc_normalize_mean = fcis.loc_normalize_mean
        self.loc_normalize_std = fcis.loc_normalize_std

        self.anchor_target_creator = anchor_target_creator
        self.proposal_target_creator = proposal_target_creator

[docs]    def __call__(self, imgs, masks, labels, bboxes, scale):
        """Forward FCIS and calculate losses.

        Here are notations used.

        * :math:`N` is the batch size.
        * :math:`R` is the number of bounding boxes per image.
        * :math:`H` is the image height.
        * :math:`W` is the image width.

        Currently, only :math:`N=1` is supported.

        Args:
            imgs (~chainer.Variable): A variable with a batch of images.
            masks (~chainer.Variable): A batch of masks.
                Its shape is :math:`(N, R, H, W)`.
            labels (~chainer.Variable): A batch of labels.
                Its shape is :math:`(N, R)`. The background is excluded from
                the definition, which means that the range of the value
                is :math:`[0, L - 1]`. :math:`L` is the number of foreground
                classes.
            bboxes (~chainer.Variable): A batch of bounding boxes.
                Its shape is :math:`(N, R, 4)`.
            scale (float or ~chainer.Variable): Amount of scaling applied to
                the raw image during preprocessing.

        Returns:
            chainer.Variable:
            Scalar loss variable.
            This is the sum of losses for Region Proposal Network and
            the head module.

        """
        if isinstance(masks, chainer.Variable):
            masks = masks.array
        if isinstance(labels, chainer.Variable):
            labels = labels.array
        if isinstance(bboxes, chainer.Variable):
            bboxes = bboxes.array
        if isinstance(scale, chainer.Variable):
            scale = scale.array
        scale = np.asscalar(cuda.to_cpu(scale))

        n = masks.shape[0]
        # batch size = 1
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = imgs.shape
        img_size = (H, W)
        assert img_size == masks.shape[2:]

        rpn_features, roi_features = self.fcis.extractor(imgs)
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.fcis.rpn(
            rpn_features, img_size, scale)

        # batch size = 1
        mask = masks[0]
        label = labels[0]
        bbox = bboxes[0]
        rpn_score = rpn_scores[0]
        rpn_loc = rpn_locs[0]
        roi = rois

        # Sample RoIs and forward
        sample_roi, gt_roi_mask, gt_roi_label, gt_roi_loc = \
            self.proposal_target_creator(
                roi, mask, label, bbox, self.loc_normalize_mean,
                self.loc_normalize_std, self.mask_size)

        sample_roi_index = self.xp.zeros(
            (len(sample_roi),), dtype=np.int32)
        roi_ag_seg_score, roi_ag_loc, roi_cls_score, _, _ = self.fcis.head(
            roi_features, sample_roi, sample_roi_index, img_size, gt_roi_label)

        # RPN losses
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            bbox, anchor, img_size)

        # CPU -> GPU
        if cuda.get_array_module(rpn_loc.array) != np:
            gt_rpn_loc = cuda.to_gpu(gt_rpn_loc)
            gt_rpn_label = cuda.to_gpu(gt_rpn_label)

        rpn_loc_loss = _fast_rcnn_loc_loss(
            rpn_loc, gt_rpn_loc, gt_rpn_label, self.rpn_sigma)
        rpn_cls_loss = F.softmax_cross_entropy(rpn_score, gt_rpn_label)

        # Losses for outputs of the head
        n_roi = roi_ag_loc.shape[0]
        gt_roi_fg_label = (gt_roi_label > 0).astype(np.int)
        roi_loc = roi_ag_loc[self.xp.arange(n_roi), gt_roi_fg_label]

        roi_loc_loss = _fast_rcnn_loc_loss(
            roi_loc, gt_roi_loc, gt_roi_label, self.roi_sigma)
        roi_cls_loss = F.softmax_cross_entropy(roi_cls_score, gt_roi_label)
        # normalize by every (valid and invalid) instances
        roi_mask_loss = F.softmax_cross_entropy(
            roi_ag_seg_score, gt_roi_mask, normalize=False) \
            * 10.0 / self.mask_size / self.mask_size

        loss = rpn_loc_loss + rpn_cls_loss \
            + roi_loc_loss + roi_cls_loss + roi_mask_loss
        chainer.reporter.report({
            'rpn_loc_loss': rpn_loc_loss,
            'rpn_cls_loss': rpn_cls_loss,
            'roi_loc_loss': roi_loc_loss,
            'roi_cls_loss': roi_cls_loss,
            'roi_mask_loss': roi_mask_loss,
            'loss': loss,
        }, self)

        return loss