Source code for chainercv.links.model.segnet.segnet_basic

from __future__ import division

import numpy as np

import chainer
import chainer.functions as F
import chainer.links as L

from chainercv.transforms import resize
from chainercv import utils


[docs]class SegNetBasic(chainer.Chain):

    """SegNet Basic for semantic segmentation.

    This is a SegNet [#]_ model for semantic segmenation. This is based on
    SegNetBasic model that is found here_.

    When you specify the path of a pretrained chainer model serialized as
    a :obj:`.npz` file in the constructor, this chain model automatically
    initializes all the parameters with it.
    When a string in prespecified set is provided, a pretrained model is
    loaded from weights distributed on the Internet.
    The list of pretrained models supported are as follows:

    * :obj:`camvid`: Loads weights trained with the train split of \
        CamVid dataset.

    .. [#] Vijay Badrinarayanan, Alex Kendall and Roberto Cipolla "SegNet: A \
    Deep Convolutional Encoder-Decoder Architecture for Image Segmentation." \
    PAMI, 2017

    .. _here: http://github.com/alexgkendall/SegNet-Tutorial

    Args:
        n_class (int): The number of classes. If :obj:`None`, it can
            be infered if :obj:`pretrained_model` is given.
        pretrained_model (string): The destination of the pretrained
            chainer model serialized as a :obj:`.npz` file.
            If this is one of the strings described
            above, it automatically loads weights stored under a directory
            :obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/models/`,
            where :obj:`$CHAINER_DATASET_ROOT` is set as
            :obj:`$HOME/.chainer/dataset` unless you specify another value
            by modifying the environment variable.
        initialW (callable): Initializer for convolution layers.

    """

    _models = {
        'camvid': {
            'param': {'n_class': 11},
            'url': 'https://chainercv-models.preferred.jp/'
            'segnet_camvid_trained_2017_05_28.npz'
        }
    }

    def __init__(self, n_class=None, pretrained_model=None, initialW=None):
        param, path = utils.prepare_pretrained_model(
            {'n_class': n_class}, pretrained_model, self._models)
        self.n_class = param['n_class']

        if initialW is None:
            initialW = chainer.initializers.HeNormal()

        super(SegNetBasic, self).__init__()
        with self.init_scope():
            self.conv1 = L.Convolution2D(
                None, 64, 7, 1, 3, nobias=True, initialW=initialW)
            self.conv1_bn = L.BatchNormalization(64, initial_beta=0.001)
            self.conv2 = L.Convolution2D(
                64, 64, 7, 1, 3, nobias=True, initialW=initialW)
            self.conv2_bn = L.BatchNormalization(64, initial_beta=0.001)
            self.conv3 = L.Convolution2D(
                64, 64, 7, 1, 3, nobias=True, initialW=initialW)
            self.conv3_bn = L.BatchNormalization(64, initial_beta=0.001)
            self.conv4 = L.Convolution2D(
                64, 64, 7, 1, 3, nobias=True, initialW=initialW)
            self.conv4_bn = L.BatchNormalization(64, initial_beta=0.001)
            self.conv_decode4 = L.Convolution2D(
                64, 64, 7, 1, 3, nobias=True, initialW=initialW)
            self.conv_decode4_bn = L.BatchNormalization(64, initial_beta=0.001)
            self.conv_decode3 = L.Convolution2D(
                64, 64, 7, 1, 3, nobias=True, initialW=initialW)
            self.conv_decode3_bn = L.BatchNormalization(64, initial_beta=0.001)
            self.conv_decode2 = L.Convolution2D(
                64, 64, 7, 1, 3, nobias=True, initialW=initialW)
            self.conv_decode2_bn = L.BatchNormalization(64, initial_beta=0.001)
            self.conv_decode1 = L.Convolution2D(
                64, 64, 7, 1, 3, nobias=True, initialW=initialW)
            self.conv_decode1_bn = L.BatchNormalization(64, initial_beta=0.001)
            self.conv_classifier = L.Convolution2D(
                64, self.n_class, 1, 1, 0, initialW=initialW)

        if path:
            chainer.serializers.load_npz(path, self)

    def _upsampling_2d(self, x, indices):
        if x.shape != indices.shape:
            min_h = min(x.shape[2], indices.shape[2])
            min_w = min(x.shape[3], indices.shape[3])
            x = x[:, :, :min_h, :min_w]
            indices = indices[:, :, :min_h, :min_w]
        outsize = (x.shape[2] * 2, x.shape[3] * 2)
        return F.upsampling_2d(x, indices, ksize=2, stride=2, outsize=outsize)

[docs]    def __call__(self, x):
        """Compute an image-wise score from a batch of images

        Args:
            x (chainer.Variable): A variable with 4D image array.

        Returns:
            chainer.Variable:
            An image-wise score. Its channel size is :obj:`self.n_class`.

        """
        h = F.local_response_normalization(x, 5, 1, 1e-4 / 5., 0.75)
        h, indices1 = F.max_pooling_2d(
            F.relu(self.conv1_bn(self.conv1(h))), 2, 2, return_indices=True)
        h, indices2 = F.max_pooling_2d(
            F.relu(self.conv2_bn(self.conv2(h))), 2, 2, return_indices=True)
        h, indices3 = F.max_pooling_2d(
            F.relu(self.conv3_bn(self.conv3(h))), 2, 2, return_indices=True)
        h, indices4 = F.max_pooling_2d(
            F.relu(self.conv4_bn(self.conv4(h))), 2, 2, return_indices=True)
        h = self._upsampling_2d(h, indices4)
        h = self.conv_decode4_bn(self.conv_decode4(h))
        h = self._upsampling_2d(h, indices3)
        h = self.conv_decode3_bn(self.conv_decode3(h))
        h = self._upsampling_2d(h, indices2)
        h = self.conv_decode2_bn(self.conv_decode2(h))
        h = self._upsampling_2d(h, indices1)
        h = self.conv_decode1_bn(self.conv_decode1(h))
        score = self.conv_classifier(h)
        return score

[docs]    def predict(self, imgs):
        """Conduct semantic segmentations from images.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their values are :math:`[0, 255]`.

        Returns:
            list of numpy.ndarray:

            List of integer labels predicted from each image in the input \
            list.

        """
        labels = []
        for img in imgs:
            C, H, W = img.shape
            with chainer.using_config('train', False), \
                    chainer.function.no_backprop_mode():
                x = chainer.Variable(self.xp.asarray(img[np.newaxis]))
                score = self.__call__(x)[0].data
            score = chainer.backends.cuda.to_cpu(score)
            if score.shape != (C, H, W):
                dtype = score.dtype
                score = resize(score, (H, W)).astype(dtype)

            label = np.argmax(score, axis=0).astype(np.int32)
            labels.append(label)
        return labels