from __future__ import division
import numpy as np
import chainer
import chainer.functions as F
from chainer import initializers
import chainer.links as L
from chainercv.links.model.ssd import Multibox
from chainercv.links.model.ssd import Normalize
from chainercv.links.model.ssd import SSD
from chainercv import utils
# RGB, (C, 1, 1) format
_imagenet_mean = np.array((123, 117, 104)).reshape((-1, 1, 1))
[docs]class VGG16(chainer.Chain):
"""An extended VGG-16 model for SSD300 and SSD512.
This is an extended VGG-16 model proposed in [#]_.
The differences from original VGG-16 [#]_ are shown below.
* :obj:`conv5_1`, :obj:`conv5_2` and :obj:`conv5_3` are changed from \
:class:`~chainer.links.Convolution2d` to \
:class:`~chainer.links.DilatedConvolution2d`.
* :class:`~chainercv.links.model.ssd.Normalize` is \
inserted after :obj:`conv4_3`.
* The parameters of max pooling after :obj:`conv5_3` are changed.
* :obj:`fc6` and :obj:`fc7` are converted to :obj:`conv6` and :obj:`conv7`.
.. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan,
Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
SSD: Single Shot MultiBox Detector. ECCV 2016.
.. [#] Karen Simonyan, Andrew Zisserman.
Very Deep Convolutional Networks for Large-Scale Image Recognition.
ICLR 2015.
"""
def __init__(self):
super(VGG16, self).__init__()
with self.init_scope():
self.conv1_1 = L.Convolution2D(64, 3, pad=1)
self.conv1_2 = L.Convolution2D(64, 3, pad=1)
self.conv2_1 = L.Convolution2D(128, 3, pad=1)
self.conv2_2 = L.Convolution2D(128, 3, pad=1)
self.conv3_1 = L.Convolution2D(256, 3, pad=1)
self.conv3_2 = L.Convolution2D(256, 3, pad=1)
self.conv3_3 = L.Convolution2D(256, 3, pad=1)
self.conv4_1 = L.Convolution2D(512, 3, pad=1)
self.conv4_2 = L.Convolution2D(512, 3, pad=1)
self.conv4_3 = L.Convolution2D(512, 3, pad=1)
self.norm4 = Normalize(512, initial=initializers.Constant(20))
self.conv5_1 = L.DilatedConvolution2D(512, 3, pad=1)
self.conv5_2 = L.DilatedConvolution2D(512, 3, pad=1)
self.conv5_3 = L.DilatedConvolution2D(512, 3, pad=1)
self.conv6 = L.DilatedConvolution2D(1024, 3, pad=6, dilate=6)
self.conv7 = L.Convolution2D(1024, 1)
[docs] def __call__(self, x):
ys = []
h = F.relu(self.conv1_1(x))
h = F.relu(self.conv1_2(h))
h = F.max_pooling_2d(h, 2)
h = F.relu(self.conv2_1(h))
h = F.relu(self.conv2_2(h))
h = F.max_pooling_2d(h, 2)
h = F.relu(self.conv3_1(h))
h = F.relu(self.conv3_2(h))
h = F.relu(self.conv3_3(h))
h = F.max_pooling_2d(h, 2)
h = F.relu(self.conv4_1(h))
h = F.relu(self.conv4_2(h))
h = F.relu(self.conv4_3(h))
ys.append(self.norm4(h))
h = F.max_pooling_2d(h, 2)
h = F.relu(self.conv5_1(h))
h = F.relu(self.conv5_2(h))
h = F.relu(self.conv5_3(h))
h = F.max_pooling_2d(h, 3, stride=1, pad=1)
h = F.relu(self.conv6(h))
h = F.relu(self.conv7(h))
ys.append(h)
return ys
[docs]class SSD300(SSD):
"""Single Shot Multibox Detector with 300x300 inputs.
This is a model of Single Shot Multibox Detector [#]_.
This model uses :class:`~chainercv.links.model.ssd.VGG16Extractor300` as
its feature extractor.
.. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
SSD: Single Shot MultiBox Detector. ECCV 2016.
Args:
n_fg_class (int): The number of classes excluding the background.
pretrained_model (string): The weight file to be loaded.
This can take :obj:`'voc0712'`, `filepath` or :obj:`None`.
The default value is :obj:`None`.
* :obj:`'voc0712'`: Load weights trained on trainval split of \
PASCAL VOC 2007 and 2012. \
The weight file is downloaded and cached automatically. \
:obj:`n_fg_class` must be :obj:`20` or :obj:`None`. \
These weights were converted from the Caffe model provided by \
`the original implementation \
<https://github.com/weiliu89/caffe/tree/ssd>`_. \
The conversion code is `chainercv/examples/ssd/caffe2npz.py`.
* :obj:`'imagenet'`: Load weights of VGG-16 trained on ImageNet. \
The weight file is downloaded and cached automatically. \
This option initializes weights partially and the rests are \
initialized randomly. In this case, :obj:`n_fg_class` \
can be set to any number.
* `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
must be specified properly.
* :obj:`None`: Do not load weights.
"""
_models = {
'voc0712': {
'param': {'n_fg_class': 20},
'url': 'https://chainercv-models.preferred.jp/'
'ssd300_voc0712_converted_2017_06_06.npz',
'cv2': True
},
'imagenet': {
'url': 'https://chainercv-models.preferred.jp/'
'ssd_vgg16_imagenet_converted_2017_06_09.npz',
'cv2': True
},
}
def __init__(self, n_fg_class=None, pretrained_model=None):
param, path = utils.prepare_pretrained_model(
{'n_fg_class': n_fg_class}, pretrained_model, self._models)
super(SSD300, self).__init__(
extractor=VGG16Extractor300(),
multibox=Multibox(
n_class=param['n_fg_class'] + 1,
aspect_ratios=((2,), (2, 3), (2, 3), (2, 3), (2,), (2,))),
steps=(8, 16, 32, 64, 100, 300),
sizes=(30, 60, 111, 162, 213, 264, 315),
mean=_imagenet_mean)
if path:
chainer.serializers.load_npz(path, self, strict=False)
[docs]class SSD512(SSD):
"""Single Shot Multibox Detector with 512x512 inputs.
This is a model of Single Shot Multibox Detector [#]_.
This model uses :class:`~chainercv.links.model.ssd.VGG16Extractor512` as
its feature extractor.
.. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
SSD: Single Shot MultiBox Detector. ECCV 2016.
Args:
n_fg_class (int): The number of classes excluding the background.
pretrained_model (string): The weight file to be loaded.
This can take :obj:`'voc0712'`, `filepath` or :obj:`None`.
The default value is :obj:`None`.
* :obj:`'voc0712'`: Load weights trained on trainval split of \
PASCAL VOC 2007 and 2012. \
The weight file is downloaded and cached automatically. \
:obj:`n_fg_class` must be :obj:`20` or :obj:`None`. \
These weights were converted from the Caffe model provided by \
`the original implementation \
<https://github.com/weiliu89/caffe/tree/ssd>`_. \
The conversion code is `chainercv/examples/ssd/caffe2npz.py`.
* :obj:`'imagenet'`: Load weights of VGG-16 trained on ImageNet. \
The weight file is downloaded and cached automatically. \
This option initializes weights partially and the rests are \
initialized randomly. In this case, :obj:`n_fg_class` \
can be set to any number.
* `filepath`: A path of npz file. In this case, :obj:`n_fg_class` \
must be specified properly.
* :obj:`None`: Do not load weights.
"""
_models = {
'voc0712': {
'param': {'n_fg_class': 20},
'url': 'https://chainercv-models.preferred.jp/'
'ssd512_voc0712_converted_2017_06_06.npz',
'cv2': True
},
'imagenet': {
'url': 'https://chainercv-models.preferred.jp/'
'ssd_vgg16_imagenet_converted_2017_06_09.npz',
'cv2': True
},
}
def __init__(self, n_fg_class=None, pretrained_model=None,
use_pretrained_class_weights=True):
param, path = utils.prepare_pretrained_model(
{'n_fg_class': n_fg_class}, pretrained_model, self._models)
super(SSD512, self).__init__(
extractor=VGG16Extractor512(),
multibox=Multibox(
n_class=param['n_fg_class'] + 1,
aspect_ratios=(
(2,), (2, 3), (2, 3), (2, 3), (2, 3), (2,), (2,))),
steps=(8, 16, 32, 64, 128, 256, 512),
sizes=(35.84, 76.8, 153.6, 230.4, 307.2, 384.0, 460.8, 537.6),
mean=_imagenet_mean)
if path:
chainer.serializers.load_npz(path, self, strict=False)