Source code for chainercv.links.model.ssd.ssd

from __future__ import division

import numpy as np

import chainer

from chainercv.links.model.ssd import MultiboxCoder
from chainercv import transforms


[docs]class SSD(chainer.Chain): """Base class of Single Shot Multibox Detector. This is a base class of Single Shot Multibox Detector [#]_. .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector. ECCV 2016. Args: extractor: A link which extracts feature maps. This link must have :obj:`insize`, :obj:`grids` and :meth:`__call__`. * :obj:`insize`: An integer which indicates \ the size of input images. Images are resized to this size before \ feature extraction. * :obj:`grids`: An iterable of integer. Each integer indicates \ the size of feature map. This value is used by \ :class:`~chainercv.links.model.ssd.MultiBboxCoder`. * :meth:`__call_`: A method which computes feature maps. \ It must take a batched images and return batched feature maps. multibox: A link which computes :obj:`mb_locs` and :obj:`mb_confs` from feature maps. This link must have :obj:`n_class`, :obj:`aspect_ratios` and :meth:`__call__`. * :obj:`n_class`: An integer which indicates the number of \ classes. \ This value should include the background class. * :obj:`aspect_ratios`: An iterable of tuple of integer. \ Each tuple indicates the aspect ratios of default bounding boxes \ at each feature maps. This value is used by \ :class:`~chainercv.links.model.ssd.MultiboxCoder`. * :meth:`__call__`: A method which computes \ :obj:`mb_locs` and :obj:`mb_confs`. \ It must take a batched feature maps and \ return :obj:`mb_locs` and :obj:`mb_confs`. steps (iterable of float): The step size for each feature map. This value is used by :class:`~chainercv.links.model.ssd.MultiboxCoder`. sizes (iterable of float): The base size of default bounding boxes for each feature map. This value is used by :class:`~chainercv.links.model.ssd.MultiboxCoder`. variance (tuple of floats): Two coefficients for decoding the locations of bounding boxe. This value is used by :class:`~chainercv.links.model.ssd.MultiboxCoder`. The default value is :obj:`(0.1, 0.2)`. Parameters: nms_thresh (float): The threshold value for :func:`~chainercv.utils.non_maximum_suppression`. The default value is :obj:`0.45`. This value can be changed directly or by using :meth:`use_preset`. score_thresh (float): The threshold value for confidence score. If a bounding box whose confidence score is lower than this value, the bounding box will be suppressed. The default value is :obj:`0.6`. This value can be changed directly or by using :meth:`use_preset`. """ def __init__( self, extractor, multibox, steps, sizes, variance=(0.1, 0.2), mean=0): self.mean = mean self.use_preset('visualize') super(SSD, self).__init__() with self.init_scope(): self.extractor = extractor self.multibox = multibox self.coder = MultiboxCoder( extractor.grids, multibox.aspect_ratios, steps, sizes, variance) @property def insize(self): return self.extractor.insize @property def n_fg_class(self): return self.multibox.n_class - 1
[docs] def to_cpu(self): super(SSD, self).to_cpu() self.coder.to_cpu()
[docs] def to_gpu(self, device=None): super(SSD, self).to_gpu(device) self.coder.to_gpu(device=device)
[docs] def __call__(self, x): """Compute localization and classification from a batch of images. This method computes two variables, :obj:`mb_locs` and :obj:`mb_confs`. :func:`self.coder.decode` converts these variables to bounding box coordinates and confidence scores. These variables are also used in training SSD. Args: x (chainer.Variable): A variable holding a batch of images. The images are preprocessed by :meth:`_prepare`. Returns: tuple of chainer.Variable: This method returns two variables, :obj:`mb_locs` and :obj:`mb_confs`. * **mb_locs**: A variable of float arrays of shape \ :math:`(B, K, 4)`, \ where :math:`B` is the number of samples in the batch and \ :math:`K` is the number of default bounding boxes. * **mb_confs**: A variable of float arrays of shape \ :math:`(B, K, n\_fg\_class + 1)`. """ return self.multibox(self.extractor(x))
def _prepare(self, img): img = img.astype(np.float32) img = transforms.resize(img, (self.insize, self.insize)) img -= self.mean return img
[docs] def use_preset(self, preset): """Use the given preset during prediction. This method changes values of :obj:`nms_thresh` and :obj:`score_thresh`. These values are a threshold value used for non maximum suppression and a threshold value to discard low confidence proposals in :meth:`predict`, respectively. If the attributes need to be changed to something other than the values provided in the presets, please modify them by directly accessing the public attributes. Args: preset ({'visualize', 'evaluate'}): A string to determine the preset to use. """ if preset == 'visualize': self.nms_thresh = 0.45 self.score_thresh = 0.6 elif preset == 'evaluate': self.nms_thresh = 0.45 self.score_thresh = 0.01 else: raise ValueError('preset must be visualize or evaluate')
[docs] def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ x = [] sizes = [] for img in imgs: _, H, W = img.shape img = self._prepare(img) x.append(self.xp.array(img)) sizes.append((H, W)) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): x = chainer.Variable(self.xp.stack(x)) mb_locs, mb_confs = self(x) mb_locs, mb_confs = mb_locs.array, mb_confs.array bboxes = [] labels = [] scores = [] for mb_loc, mb_conf, size in zip(mb_locs, mb_confs, sizes): bbox, label, score = self.coder.decode( mb_loc, mb_conf, self.nms_thresh, self.score_thresh) bbox = transforms.resize_bbox( bbox, (self.insize, self.insize), size) bboxes.append(chainer.backends.cuda.to_cpu(bbox)) labels.append(chainer.backends.cuda.to_cpu(label)) scores.append(chainer.backends.cuda.to_cpu(score)) return bboxes, labels, scores