Source code for chainercv.links.model.ssd.ssd

from __future__ import division

import numpy as np

import chainer

from chainercv.links.model.ssd import MultiboxCoder
from chainercv import transforms


[docs]class SSD(chainer.Chain):
    """Base class of Single Shot Multibox Detector.

    This is a base class of Single Shot Multibox Detector [#]_.

    .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy,
       Scott Reed, Cheng-Yang Fu, Alexander C. Berg.
       SSD: Single Shot MultiBox Detector. ECCV 2016.

    Args:
        extractor: A link which extracts feature maps.
            This link must have :obj:`insize`, :obj:`grids` and
            :meth:`__call__`.

            * :obj:`insize`: An integer which indicates \
            the size of input images. Images are resized to this size before \
            feature extraction.
            * :obj:`grids`: An iterable of integer. Each integer indicates \
            the size of feature map. This value is used by \
            :class:`~chainercv.links.model.ssd.MultiBboxCoder`.
            * :meth:`__call_`: A method which computes feature maps. \
            It must take a batched images and return batched feature maps.
        multibox: A link which computes :obj:`mb_locs` and :obj:`mb_confs`
            from feature maps.
            This link must have :obj:`n_class`, :obj:`aspect_ratios` and
            :meth:`__call__`.

            * :obj:`n_class`: An integer which indicates the number of \
            classes. \
            This value should include the background class.
            * :obj:`aspect_ratios`: An iterable of tuple of integer. \
            Each tuple indicates the aspect ratios of default bounding boxes \
            at each feature maps. This value is used by \
            :class:`~chainercv.links.model.ssd.MultiboxCoder`.
            * :meth:`__call__`: A method which computes \
            :obj:`mb_locs` and :obj:`mb_confs`. \
            It must take a batched feature maps and \
            return :obj:`mb_locs` and :obj:`mb_confs`.
        steps (iterable of float): The step size for each feature map.
            This value is used by
            :class:`~chainercv.links.model.ssd.MultiboxCoder`.
        sizes (iterable of float): The base size of default bounding boxes
            for each feature map. This value is used by
            :class:`~chainercv.links.model.ssd.MultiboxCoder`.
        variance (tuple of floats): Two coefficients for decoding
            the locations of bounding boxe.
            This value is used by
            :class:`~chainercv.links.model.ssd.MultiboxCoder`.
            The default value is :obj:`(0.1, 0.2)`.

    Parameters:
        nms_thresh (float): The threshold value
            for :func:`~chainercv.utils.non_maximum_suppression`.
            The default value is :obj:`0.45`.
            This value can be changed directly or by using :meth:`use_preset`.
        score_thresh (float): The threshold value for confidence score.
            If a bounding box whose confidence score is lower than this value,
            the bounding box will be suppressed.
            The default value is :obj:`0.6`.
            This value can be changed directly or by using :meth:`use_preset`.

    """

    def __init__(
            self, extractor, multibox,
            steps, sizes, variance=(0.1, 0.2),
            mean=0):
        self.mean = mean
        self.use_preset('visualize')

        super(SSD, self).__init__()
        with self.init_scope():
            self.extractor = extractor
            self.multibox = multibox

        self.coder = MultiboxCoder(
            extractor.grids, multibox.aspect_ratios, steps, sizes, variance)

    @property
    def insize(self):
        return self.extractor.insize

    @property
    def n_fg_class(self):
        return self.multibox.n_class - 1

[docs]    def to_cpu(self):
        super(SSD, self).to_cpu()
        self.coder.to_cpu()

[docs]    def to_gpu(self, device=None):
        super(SSD, self).to_gpu(device)
        self.coder.to_gpu(device=device)

[docs]    def __call__(self, x):
        """Compute localization and classification from a batch of images.

        This method computes two variables, :obj:`mb_locs` and :obj:`mb_confs`.
        :func:`self.coder.decode` converts these variables to bounding box
        coordinates and confidence scores.
        These variables are also used in training SSD.

        Args:
            x (chainer.Variable): A variable holding a batch of images.
                The images are preprocessed by :meth:`_prepare`.

        Returns:
            tuple of chainer.Variable:
            This method returns two variables, :obj:`mb_locs` and
            :obj:`mb_confs`.

            * **mb_locs**: A variable of float arrays of shape \
                :math:`(B, K, 4)`, \
                where :math:`B` is the number of samples in the batch and \
                :math:`K` is the number of default bounding boxes.
            * **mb_confs**: A variable of float arrays of shape \
                :math:`(B, K, n\_fg\_class + 1)`.
        """

        return self.multibox(self.extractor(x))

    def _prepare(self, img):
        img = img.astype(np.float32)
        img = transforms.resize(img, (self.insize, self.insize))
        img -= self.mean
        return img

[docs]    def use_preset(self, preset):
        """Use the given preset during prediction.

        This method changes values of :obj:`nms_thresh` and
        :obj:`score_thresh`. These values are a threshold value
        used for non maximum suppression and a threshold value
        to discard low confidence proposals in :meth:`predict`,
        respectively.

        If the attributes need to be changed to something
        other than the values provided in the presets, please modify
        them by directly accessing the public attributes.

        Args:
            preset ({'visualize', 'evaluate'}): A string to determine the
                preset to use.
        """

        if preset == 'visualize':
            self.nms_thresh = 0.45
            self.score_thresh = 0.6
        elif preset == 'evaluate':
            self.nms_thresh = 0.45
            self.score_thresh = 0.01
        else:
            raise ValueError('preset must be visualize or evaluate')

[docs]    def predict(self, imgs):
        """Detect objects from images.

        This method predicts objects for each image.

        Args:
            imgs (iterable of numpy.ndarray): Arrays holding images.
                All images are in CHW and RGB format
                and the range of their value is :math:`[0, 255]`.

        Returns:
           tuple of lists:
           This method returns a tuple of three lists,
           :obj:`(bboxes, labels, scores)`.

           * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
               where :math:`R` is the number of bounding boxes in a image. \
               Each bouding box is organized by \
               :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
               in the second axis.
           * **labels** : A list of integer arrays of shape :math:`(R,)`. \
               Each value indicates the class of the bounding box. \
               Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
               number of the foreground classes.
           * **scores** : A list of float arrays of shape :math:`(R,)`. \
               Each value indicates how confident the prediction is.

        """

        x = []
        sizes = []
        for img in imgs:
            _, H, W = img.shape
            img = self._prepare(img)
            x.append(self.xp.array(img))
            sizes.append((H, W))

        with chainer.using_config('train', False), \
                chainer.function.no_backprop_mode():
            x = chainer.Variable(self.xp.stack(x))
            mb_locs, mb_confs = self(x)
        mb_locs, mb_confs = mb_locs.array, mb_confs.array

        bboxes = []
        labels = []
        scores = []
        for mb_loc, mb_conf, size in zip(mb_locs, mb_confs, sizes):
            bbox, label, score = self.coder.decode(
                mb_loc, mb_conf, self.nms_thresh, self.score_thresh)
            bbox = transforms.resize_bbox(
                bbox, (self.insize, self.insize), size)
            bboxes.append(chainer.backends.cuda.to_cpu(bbox))
            labels.append(chainer.backends.cuda.to_cpu(label))
            scores.append(chainer.backends.cuda.to_cpu(score))

        return bboxes, labels, scores