Source code for chainercv.links.model.feature_predictor

import numpy as np
import warnings

import chainer
from chainer.backends import cuda

from chainercv.transforms import center_crop
from chainercv.transforms import resize
from chainercv.transforms import scale
from chainercv.transforms import ten_crop


[docs]class FeaturePredictor(chainer.Chain):

    """Wrapper that adds a prediction method to a feature extraction link.

    The :meth:`predict` takes three steps to make a prediction.

    1. Preprocess input images
    2. Forward the preprocessed images to the network
    3. Average features in the case when more than one crops are extracted.

    Example:

        >>> from chainercv.links import VGG16
        >>> from chainercv.links import FeaturePredictor
        >>> base_model = VGG16()
        >>> model = FeaturePredictor(base_model, 224, 256)
        >>> prob = model.predict([img])
        # Predicting multiple features
        >>> model.extractor.pick = ['conv5_3', 'fc7']
        >>> conv5_3, fc7 = model.predict([img])

    When :obj:`self.crop == 'center'`, :meth:`predict` extracts features from
    the center crop of the input images.
    When :obj:`self.crop == '10'`, :meth:`predict` extracts features from
    patches that are ten-cropped from the input images.

    When extracting more than one crops from an image, the output of
    :meth:`predict` returns the average of the features computed from the
    crops.

    Args:
        extractor: A feature extraction link. This is a callable chain
            that takes a batch of images and returns a variable or a
            tuple of variables.
        crop_size (int or tuple): The height and the width of an image after
            cropping in preprocessing.
            If this is an integer, the image is cropped to
            :math:`(crop\_size, crop\_size)`.
        scale_size (int or tuple): If :obj:`scale_size` is :obj:`None`,
            neither scaling nor resizing is conducted during preprocessing.
            This is the default behavior.
            If this is an integer, an image is resized so that the length of
            the shorter edge is equal to :obj:`scale_size`. If this is a tuple
            :obj:`(height, width)`, the image is resized to
            :math:`(height, width)`.
        crop ({'center', '10'}): Determines the style of cropping.
        mean (numpy.ndarray): A mean value. If this is :obj:`None`,
            :obj:`extractor.mean` is used as the mean value.

    """

    def __init__(self, extractor,
                 crop_size, scale_size=None,
                 crop='center', mean=None):
        super(FeaturePredictor, self).__init__()
        self.scale_size = scale_size
        if isinstance(crop_size, int):
            crop_size = (crop_size, crop_size)
        self.crop_size = crop_size
        self.crop = crop
        with self.init_scope():
            self.extractor = extractor

        if mean is None:
            self.mean = self.extractor.mean
        else:
            self.mean = mean

    def _prepare(self, img):
        """Prepare an image for feeding it to a model.

        This is a standard preprocessing scheme used by feature extraction
        models.
        First, the image is scaled or resized according to :math:`scale_size`.
        Note that this step is optional.
        Next, the image is cropped to :math:`crop_size`.
        Last, the image is mean subtracted by an array :obj:`mean`.

        Args:
            img (~numpy.ndarray): An image. This is in CHW format.
                The range of its value is :math:`[0, 255]`.

        Returns:
            ~numpy.ndarray:
            A preprocessed image. This is 4D array whose batch size is
            the number of crops.

        """
        if self.scale_size is not None:
            if isinstance(self.scale_size, int):
                img = scale(img, size=self.scale_size)
            else:
                img = resize(img, size=self.scale_size)

        if self.crop == '10':
            imgs = ten_crop(img, self.crop_size)
        elif self.crop == 'center':
            imgs = center_crop(img, self.crop_size)[np.newaxis]

        imgs -= self.mean[np.newaxis]

        return imgs

    def _average_crops(self, y, n_crop):
        if y.ndim == 4:
            warnings.warn(
                'Four dimensional features are averaged. '
                'If these are batch of 2D spatial features, '
                'their spatial information would be lost.')

        xp = chainer.backends.cuda.get_array_module(y)
        y = y.reshape((-1, n_crop) + y.shape[1:])
        y = xp.mean(y, axis=1)
        return y

[docs]    def predict(self, imgs):
        """Predict features from images.

        Given :math:`N` input images, this method outputs a batched array with
        batchsize :math:`N`.

        Args:
            imgs (iterable of numpy.ndarray): Array-images.
                All images are in CHW format
                and the range of their value is :math:`[0, 255]`.

        Returns:
            numpy.ndarray or tuple of numpy.ndarray:
            A batch of features or a tuple of them.

        """
        # [(C, H_0, W_0), ..., (C, H_{B-1}, W_{B-1})] -> (B, N, C, H, W)
        imgs = self.xp.asarray([self._prepare(img) for img in imgs])
        n_crop = imgs.shape[-4]
        shape = (-1, imgs.shape[-3]) + self.crop_size
        # (B, N, C, H, W) -> (B * N, C, H, W)
        imgs = imgs.reshape(shape)

        with chainer.using_config('train', False), \
                chainer.function.no_backprop_mode():
            imgs = chainer.Variable(imgs)
            features = self.extractor(imgs)

        if isinstance(features, tuple):
            output = []
            for feature in features:
                feature = feature.array
                if n_crop > 1:
                    feature = self._average_crops(feature, n_crop)
                output.append(cuda.to_cpu(feature))
            output = tuple(output)
        else:
            output = cuda.to_cpu(features.array)
            if n_crop > 1:
                output = self._average_crops(output, n_crop)

        return output