from __future__ import division
import chainer
import chainer.functions as F
import chainer.links as L
import numpy as np
from chainercv.experimental.links.model.fcis import FCIS
from chainercv.functions import psroi_pooling_2d
from chainercv.links import Conv2DBNActiv
from chainercv.links.model.faster_rcnn.region_proposal_network import \
RegionProposalNetwork
from chainercv.links.model.faster_rcnn.utils.loc2bbox import loc2bbox
from chainercv.links.model.resnet.resblock import ResBlock
from chainercv.links import ResNet101
from chainercv import utils
[docs]class FCISResNet101(FCIS):
"""FCIS based on ResNet101.
When you specify the path of a pre-trained chainer model serialized as
a :obj:`.npz` file in the constructor, this chain model automatically
initializes all the parameters with it.
When a string in prespecified set is provided, a pretrained model is
loaded from weights distributed on the Internet.
The list of pretrained models supported are as follows:
* :obj:`sbd`: Loads weights trained with the trainval split of Semantic \
Boundaries Dataset.
For descriptions on the interface of this model, please refer to
:class:`~chainercv.experimental.links.model.fcis.FCIS`.
:class:`~chainercv.experimental.links.model.fcis.FCISResNet101`
supports finer control on random initializations of weights by arguments
:obj:`resnet_initialW`, :obj:`rpn_initialW` and :obj:`head_initialW`.
It accepts a callable that takes an array and edits its values.
If :obj:`None` is passed as an initializer, the default initializer is
used.
Args:
n_fg_class (int): The number of classes excluding the background.
pretrained_model (str): The destination of the pre-trained
chainer model serialized as a :obj:`.npz` file.
If this is one of the strings described
above, it automatically loads weights stored under a directory
:obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/models/`,
where :obj:`$CHAINER_DATASET_ROOT` is set as
:obj:`$HOME/.chainer/dataset` unless you specify another value
by modifying the environment variable.
min_size (int): A preprocessing paramter for :meth:`prepare`.
max_size (int): A preprocessing paramter for :meth:`prepare`.
roi_size (int): Height and width of the feature maps after
Position Sensitive RoI pooling.
group_size (int): Group height and width for Position Sensitive
ROI pooling.
ratios (list of floats): This is ratios of width to height of
the anchors.
anchor_scales (list of numbers): This is areas of anchors.
Those areas will be the product of the square of an element in
:obj:`anchor_scales` and the original area of the reference
window.
loc_normalize_mean (tuple of four floats): Mean values of
localization estimates.
loc_normalize_std (tupler of four floats): Standard deviation
of localization estimates.
iter2 (bool): if the value is set :obj:`True`, Position Sensitive
ROI pooling is executed twice. In the second time, Position
Sensitive ROI pooling uses improved ROIs by the localization
parameters calculated in the first time.
resnet_initialW (callable): Initializer for the layers corresponding to
the ResNet101 layers.
rpn_initialW (callable): Initializer for Region Proposal Network
layers.
head_initialW (callable): Initializer for the head layers.
proposal_creator_params (dict): Key valued paramters for
:class:`~chainercv.links.model.faster_rcnn.ProposalCreator`.
"""
_models = {
'sbd': {
'param': {'n_fg_class': 20},
'url': 'https://chainercv-models.preferred.jp/'
'fcis_resnet101_sbd_trained_2018_06_22.npz',
'cv2': True
},
'sbd_converted': {
'param': {'n_fg_class': 20},
'url': 'https://chainercv-models.preferred.jp/'
'fcis_resnet101_sbd_converted_2018_07_02.npz',
'cv2': True
}
}
feat_stride = 16
def __init__(
self,
n_fg_class=None,
pretrained_model=None,
min_size=600, max_size=1000,
roi_size=21, group_size=7,
ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32],
loc_normalize_mean=(0.0, 0.0, 0.0, 0.0),
loc_normalize_std=(0.2, 0.2, 0.5, 0.5),
iter2=True,
resnet_initialW=None, rpn_initialW=None, head_initialW=None,
proposal_creator_params={
'nms_thresh': 0.7,
'n_train_pre_nms': 6000,
'n_train_post_nms': 300,
'n_test_pre_nms': 6000,
'n_test_post_nms': 300,
'force_cpu_nms': False,
'min_size': 16
}):
param, path = utils.prepare_pretrained_model(
{'n_fg_class': n_fg_class}, pretrained_model, self._models)
if rpn_initialW is None:
rpn_initialW = chainer.initializers.Normal(0.01)
if resnet_initialW is None and pretrained_model:
resnet_initialW = chainer.initializers.constant.Zero()
extractor = ResNet101Extractor(
initialW=resnet_initialW)
rpn = RegionProposalNetwork(
1024, 512,
ratios=ratios,
anchor_scales=anchor_scales,
feat_stride=self.feat_stride,
initialW=rpn_initialW,
proposal_creator_params=proposal_creator_params)
head = FCISResNet101Head(
param['n_fg_class'] + 1,
roi_size=roi_size, group_size=group_size,
spatial_scale=1. / self.feat_stride,
loc_normalize_mean=loc_normalize_mean,
loc_normalize_std=loc_normalize_std,
iter2=iter2, initialW=head_initialW)
mean = np.array([123.15, 115.90, 103.06],
dtype=np.float32)[:, None, None]
super(FCISResNet101, self).__init__(
extractor, rpn, head,
mean, min_size, max_size,
loc_normalize_mean, loc_normalize_std)
if path == 'imagenet':
self._copy_imagenet_pretrained_resnet()
elif path:
chainer.serializers.load_npz(path, self)
def _copy_imagenet_pretrained_resnet(self):
def _copy_conv2dbn(src, dst):
dst.conv.W.array = src.conv.W.array
if src.conv.b is not None and dst.conv.b is not None:
dst.conv.b.array = src.conv.b.array
dst.bn.gamma.array = src.bn.gamma.array
dst.bn.beta.array = src.bn.beta.array
dst.bn.avg_var = src.bn.avg_var
dst.bn.avg_mean = src.bn.avg_mean
def _copy_bottleneck(src, dst):
if hasattr(src, 'residual_conv'):
_copy_conv2dbn(src.residual_conv, dst.residual_conv)
_copy_conv2dbn(src.conv1, dst.conv1)
_copy_conv2dbn(src.conv2, dst.conv2)
_copy_conv2dbn(src.conv3, dst.conv3)
def _copy_resblock(src, dst):
for layer_name in src.layer_names:
_copy_bottleneck(
getattr(src, layer_name), getattr(dst, layer_name))
pretrained_model = ResNet101(arch='he', pretrained_model='imagenet')
_copy_conv2dbn(pretrained_model.conv1, self.extractor.conv1)
_copy_resblock(pretrained_model.res2, self.extractor.res2)
_copy_resblock(pretrained_model.res3, self.extractor.res3)
_copy_resblock(pretrained_model.res4, self.extractor.res4)
_copy_resblock(pretrained_model.res5, self.extractor.res5)
[docs]class FCISResNet101Head(chainer.Chain):
"""FCIS Head for ResNet101 based implementation.
This class is used as a head for FCIS.
This outputs class-agnostice segmentation scores, class-agnostic
localizations and classification based on feature maps in the given RoIs.
Args:
n_class (int): The number of classes possibly including the background.
roi_size (int): Height and width of the feature maps after
Position Sensitive RoI pooling.
group_size (int): Group height and width for Position Sensitive
ROI pooling.
spatial_scale (float): Scale of the roi is resized.
loc_normalize_mean (tuple of four floats): Mean values of
localization estimates.
loc_normalize_std (tupler of four floats): Standard deviation
of localization estimates.
iter2 (bool): if the value is set :obj:`True`, Position Sensitive
ROI pooling is executed twice. In the second time, Position
Sensitive ROI pooling uses improved ROIs by the localization
parameters calculated in the first time.
initialW (callable): Initializer for the layers.
"""
def __init__(
self,
n_class,
roi_size, group_size, spatial_scale,
loc_normalize_mean, loc_normalize_std,
iter2, initialW=None
):
super(FCISResNet101Head, self).__init__()
if initialW is None:
initialW = chainer.initializers.Normal(0.01)
self.n_class = n_class
self.spatial_scale = spatial_scale
self.group_size = group_size
self.roi_size = roi_size
self.loc_normalize_mean = loc_normalize_mean
self.loc_normalize_std = loc_normalize_std
self.iter2 = iter2
with self.init_scope():
self.conv1 = L.Convolution2D(
2048, 1024, 1, 1, 0, initialW=initialW)
self.cls_seg = L.Convolution2D(
1024, group_size * group_size * n_class * 2,
1, 1, 0, initialW=initialW)
self.ag_loc = L.Convolution2D(
1024, group_size * group_size * 2 * 4,
1, 1, 0, initialW=initialW)
def __call__(self, x, rois, roi_indices, img_size, gt_roi_labels=None):
"""Forward the chain.
We assume that there are :math:`N` batches.
Args:
x (~chainer.Variable): 4D image variable.
rois (array): A bounding box array containing coordinates of
proposal boxes. This is a concatenation of bounding box
arrays from multiple images in the batch.
Its shape is :math:`(R', 4)`. Given :math:`R_i` proposed
RoIs from the :math:`i` th image,
:math:`R' = \\sum _{i=1} ^ N R_i`.
roi_indices (array): An array containing indices of images to
which bounding boxes correspond to. Its shape is :math:`(R',)`.
img_size (tuple of int): A tuple containing image size.
"""
h = F.relu(self.conv1(x))
h_cls_seg = self.cls_seg(h)
h_ag_loc = self.ag_loc(h)
# PSROI pooling and regression
roi_ag_seg_scores, roi_ag_locs, roi_cls_scores = self._pool(
h_cls_seg, h_ag_loc, rois, roi_indices, gt_roi_labels)
if self.iter2:
# 2nd Iteration
# get rois2 for more precise prediction
roi_ag_locs = roi_ag_locs.array
mean = self.xp.array(self.loc_normalize_mean)
std = self.xp.array(self.loc_normalize_std)
roi_locs = roi_ag_locs[:, 1, :]
roi_locs = (roi_locs * std + mean).astype(np.float32)
rois2 = loc2bbox(rois, roi_locs)
rois2[:, 0::2] = self.xp.clip(rois2[:, 0::2], 0, img_size[0])
rois2[:, 1::2] = self.xp.clip(rois2[:, 1::2], 0, img_size[1])
# PSROI pooling and regression
roi_ag_seg_scores2, roi_ag_locs2, roi_cls_scores2 = self._pool(
h_cls_seg, h_ag_loc, rois2, roi_indices, gt_roi_labels)
# concat 1st and 2nd iteration results
rois = self.xp.concatenate((rois, rois2))
roi_indices = self.xp.concatenate((roi_indices, roi_indices))
roi_ag_seg_scores = F.concat(
(roi_ag_seg_scores, roi_ag_seg_scores2), axis=0)
roi_ag_locs = F.concat(
(roi_ag_locs, roi_ag_locs2), axis=0)
roi_cls_scores = F.concat(
(roi_cls_scores, roi_cls_scores2), axis=0)
return roi_ag_seg_scores, roi_ag_locs, roi_cls_scores, \
rois, roi_indices
def _pool(
self, h_cls_seg, h_ag_loc, rois, roi_indices, gt_roi_labels):
# PSROI Pooling
# shape: (n_roi, n_class, 2, roi_size, roi_size)
roi_cls_ag_seg_scores = psroi_pooling_2d(
h_cls_seg, rois, roi_indices,
self.n_class * 2, self.roi_size, self.roi_size,
self.spatial_scale, self.group_size)
roi_cls_ag_seg_scores = F.reshape(
roi_cls_ag_seg_scores,
(-1, self.n_class, 2, self.roi_size, self.roi_size))
# shape: (n_roi, 2*4, roi_size, roi_size)
roi_ag_loc_scores = psroi_pooling_2d(
h_ag_loc, rois, roi_indices,
2 * 4, self.roi_size, self.roi_size,
self.spatial_scale, self.group_size)
# shape: (n_roi, n_class)
roi_cls_scores = F.average(
F.max(roi_cls_ag_seg_scores, axis=2), axis=(2, 3))
# Bbox Regression
# shape: (n_roi, 2, 4)
roi_ag_locs = F.average(roi_ag_loc_scores, axis=(2, 3))
roi_ag_locs = F.reshape(roi_ag_locs, (-1, 2, 4))
# Mask Regression
# shape: (n_roi, n_class, 2, roi_size, roi_size)
if gt_roi_labels is None:
max_cls_indices = roi_cls_scores.array.argmax(axis=1)
else:
max_cls_indices = gt_roi_labels
# shape: (n_roi, 2, roi_size, roi_size)
roi_ag_seg_scores = roi_cls_ag_seg_scores[
self.xp.arange(len(max_cls_indices)), max_cls_indices]
return roi_ag_seg_scores, roi_ag_locs, roi_cls_scores