from __future__ import division
import numpy as np
import chainer
import chainer.functions as F
import chainer.links as L
from chainercv.transforms import resize
from chainercv import utils
[docs]class SegNetBasic(chainer.Chain):
"""SegNet Basic for semantic segmentation.
This is a SegNet [#]_ model for semantic segmenation. This is based on
SegNetBasic model that is found here_.
When you specify the path of a pretrained chainer model serialized as
a :obj:`.npz` file in the constructor, this chain model automatically
initializes all the parameters with it.
When a string in prespecified set is provided, a pretrained model is
loaded from weights distributed on the Internet.
The list of pretrained models supported are as follows:
* :obj:`camvid`: Loads weights trained with the train split of \
CamVid dataset.
.. [#] Vijay Badrinarayanan, Alex Kendall and Roberto Cipolla "SegNet: A \
Deep Convolutional Encoder-Decoder Architecture for Image Segmentation." \
PAMI, 2017
.. _here: http://github.com/alexgkendall/SegNet-Tutorial
Args:
n_class (int): The number of classes. If :obj:`None`, it can
be infered if :obj:`pretrained_model` is given.
pretrained_model (string): The destination of the pretrained
chainer model serialized as a :obj:`.npz` file.
If this is one of the strings described
above, it automatically loads weights stored under a directory
:obj:`$CHAINER_DATASET_ROOT/pfnet/chainercv/models/`,
where :obj:`$CHAINER_DATASET_ROOT` is set as
:obj:`$HOME/.chainer/dataset` unless you specify another value
by modifying the environment variable.
initialW (callable): Initializer for convolution layers.
"""
_models = {
'camvid': {
'param': {'n_class': 11},
'url': 'https://chainercv-models.preferred.jp/'
'segnet_camvid_trained_2017_05_28.npz'
}
}
def __init__(self, n_class=None, pretrained_model=None, initialW=None):
param, path = utils.prepare_pretrained_model(
{'n_class': n_class}, pretrained_model, self._models)
self.n_class = param['n_class']
if initialW is None:
initialW = chainer.initializers.HeNormal()
super(SegNetBasic, self).__init__()
with self.init_scope():
self.conv1 = L.Convolution2D(
None, 64, 7, 1, 3, nobias=True, initialW=initialW)
self.conv1_bn = L.BatchNormalization(64, initial_beta=0.001)
self.conv2 = L.Convolution2D(
64, 64, 7, 1, 3, nobias=True, initialW=initialW)
self.conv2_bn = L.BatchNormalization(64, initial_beta=0.001)
self.conv3 = L.Convolution2D(
64, 64, 7, 1, 3, nobias=True, initialW=initialW)
self.conv3_bn = L.BatchNormalization(64, initial_beta=0.001)
self.conv4 = L.Convolution2D(
64, 64, 7, 1, 3, nobias=True, initialW=initialW)
self.conv4_bn = L.BatchNormalization(64, initial_beta=0.001)
self.conv_decode4 = L.Convolution2D(
64, 64, 7, 1, 3, nobias=True, initialW=initialW)
self.conv_decode4_bn = L.BatchNormalization(64, initial_beta=0.001)
self.conv_decode3 = L.Convolution2D(
64, 64, 7, 1, 3, nobias=True, initialW=initialW)
self.conv_decode3_bn = L.BatchNormalization(64, initial_beta=0.001)
self.conv_decode2 = L.Convolution2D(
64, 64, 7, 1, 3, nobias=True, initialW=initialW)
self.conv_decode2_bn = L.BatchNormalization(64, initial_beta=0.001)
self.conv_decode1 = L.Convolution2D(
64, 64, 7, 1, 3, nobias=True, initialW=initialW)
self.conv_decode1_bn = L.BatchNormalization(64, initial_beta=0.001)
self.conv_classifier = L.Convolution2D(
64, self.n_class, 1, 1, 0, initialW=initialW)
if path:
chainer.serializers.load_npz(path, self)
def _upsampling_2d(self, x, indices):
if x.shape != indices.shape:
min_h = min(x.shape[2], indices.shape[2])
min_w = min(x.shape[3], indices.shape[3])
x = x[:, :, :min_h, :min_w]
indices = indices[:, :, :min_h, :min_w]
outsize = (x.shape[2] * 2, x.shape[3] * 2)
return F.upsampling_2d(x, indices, ksize=2, stride=2, outsize=outsize)
[docs] def __call__(self, x):
"""Compute an image-wise score from a batch of images
Args:
x (chainer.Variable): A variable with 4D image array.
Returns:
chainer.Variable:
An image-wise score. Its channel size is :obj:`self.n_class`.
"""
h = F.local_response_normalization(x, 5, 1, 1e-4 / 5., 0.75)
h, indices1 = F.max_pooling_2d(
F.relu(self.conv1_bn(self.conv1(h))), 2, 2, return_indices=True)
h, indices2 = F.max_pooling_2d(
F.relu(self.conv2_bn(self.conv2(h))), 2, 2, return_indices=True)
h, indices3 = F.max_pooling_2d(
F.relu(self.conv3_bn(self.conv3(h))), 2, 2, return_indices=True)
h, indices4 = F.max_pooling_2d(
F.relu(self.conv4_bn(self.conv4(h))), 2, 2, return_indices=True)
h = self._upsampling_2d(h, indices4)
h = self.conv_decode4_bn(self.conv_decode4(h))
h = self._upsampling_2d(h, indices3)
h = self.conv_decode3_bn(self.conv_decode3(h))
h = self._upsampling_2d(h, indices2)
h = self.conv_decode2_bn(self.conv_decode2(h))
h = self._upsampling_2d(h, indices1)
h = self.conv_decode1_bn(self.conv_decode1(h))
score = self.conv_classifier(h)
return score
[docs] def predict(self, imgs):
"""Conduct semantic segmentations from images.
Args:
imgs (iterable of numpy.ndarray): Arrays holding images.
All images are in CHW and RGB format
and the range of their values are :math:`[0, 255]`.
Returns:
list of numpy.ndarray:
List of integer labels predicted from each image in the input \
list.
"""
labels = []
for img in imgs:
C, H, W = img.shape
with chainer.using_config('train', False), \
chainer.function.no_backprop_mode():
x = chainer.Variable(self.xp.asarray(img[np.newaxis]))
score = self.__call__(x)[0].data
score = chainer.backends.cuda.to_cpu(score)
if score.shape != (C, H, W):
dtype = score.dtype
score = resize(score, (H, W)).astype(dtype)
label = np.argmax(score, axis=0).astype(np.int32)
labels.append(label)
return labels