Source code for chainercv.links.model.feature_predictor

import numpy as np
import warnings

import chainer
from chainer.backends import cuda

from chainercv.transforms import center_crop
from chainercv.transforms import resize
from chainercv.transforms import scale
from chainercv.transforms import ten_crop


[docs]class FeaturePredictor(chainer.Chain): """Wrapper that adds a prediction method to a feature extraction link. The :meth:`predict` takes three steps to make a prediction. 1. Preprocess input images 2. Forward the preprocessed images to the network 3. Average features in the case when more than one crops are extracted. Example: >>> from chainercv.links import VGG16 >>> from chainercv.links import FeaturePredictor >>> base_model = VGG16() >>> model = FeaturePredictor(base_model, 224, 256) >>> prob = model.predict([img]) # Predicting multiple features >>> model.extractor.pick = ['conv5_3', 'fc7'] >>> conv5_3, fc7 = model.predict([img]) When :obj:`self.crop == 'center'`, :meth:`predict` extracts features from the center crop of the input images. When :obj:`self.crop == '10'`, :meth:`predict` extracts features from patches that are ten-cropped from the input images. When extracting more than one crops from an image, the output of :meth:`predict` returns the average of the features computed from the crops. Args: extractor: A feature extraction link. This is a callable chain that takes a batch of images and returns a variable or a tuple of variables. crop_size (int or tuple): The height and the width of an image after cropping in preprocessing. If this is an integer, the image is cropped to :math:`(crop\_size, crop\_size)`. scale_size (int or tuple): If :obj:`scale_size` is :obj:`None`, neither scaling nor resizing is conducted during preprocessing. This is the default behavior. If this is an integer, an image is resized so that the length of the shorter edge is equal to :obj:`scale_size`. If this is a tuple :obj:`(height, width)`, the image is resized to :math:`(height, width)`. crop ({'center', '10'}): Determines the style of cropping. mean (numpy.ndarray): A mean value. If this is :obj:`None`, :obj:`extractor.mean` is used as the mean value. """ def __init__(self, extractor, crop_size, scale_size=None, crop='center', mean=None): super(FeaturePredictor, self).__init__() self.scale_size = scale_size if isinstance(crop_size, int): crop_size = (crop_size, crop_size) self.crop_size = crop_size self.crop = crop with self.init_scope(): self.extractor = extractor if mean is None: self.mean = self.extractor.mean else: self.mean = mean def _prepare(self, img): """Prepare an image for feeding it to a model. This is a standard preprocessing scheme used by feature extraction models. First, the image is scaled or resized according to :math:`scale_size`. Note that this step is optional. Next, the image is cropped to :math:`crop_size`. Last, the image is mean subtracted by an array :obj:`mean`. Args: img (~numpy.ndarray): An image. This is in CHW format. The range of its value is :math:`[0, 255]`. Returns: ~numpy.ndarray: A preprocessed image. This is 4D array whose batch size is the number of crops. """ if self.scale_size is not None: if isinstance(self.scale_size, int): img = scale(img, size=self.scale_size) else: img = resize(img, size=self.scale_size) if self.crop == '10': imgs = ten_crop(img, self.crop_size) elif self.crop == 'center': imgs = center_crop(img, self.crop_size)[np.newaxis] imgs -= self.mean[np.newaxis] return imgs def _average_crops(self, y, n_crop): if y.ndim == 4: warnings.warn( 'Four dimensional features are averaged. ' 'If these are batch of 2D spatial features, ' 'their spatial information would be lost.') xp = chainer.backends.cuda.get_array_module(y) y = y.reshape((-1, n_crop) + y.shape[1:]) y = xp.mean(y, axis=1) return y
[docs] def predict(self, imgs): """Predict features from images. Given :math:`N` input images, this method outputs a batched array with batchsize :math:`N`. Args: imgs (iterable of numpy.ndarray): Array-images. All images are in CHW format and the range of their value is :math:`[0, 255]`. Returns: numpy.ndarray or tuple of numpy.ndarray: A batch of features or a tuple of them. """ # [(C, H_0, W_0), ..., (C, H_{B-1}, W_{B-1})] -> (B, N, C, H, W) imgs = self.xp.asarray([self._prepare(img) for img in imgs]) n_crop = imgs.shape[-4] shape = (-1, imgs.shape[-3]) + self.crop_size # (B, N, C, H, W) -> (B * N, C, H, W) imgs = imgs.reshape(shape) with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): imgs = chainer.Variable(imgs) features = self.extractor(imgs) if isinstance(features, tuple): output = [] for feature in features: feature = feature.array if n_crop > 1: feature = self._average_crops(feature, n_crop) output.append(cuda.to_cpu(feature)) output = tuple(output) else: output = cuda.to_cpu(features.array) if n_crop > 1: output = self._average_crops(output, n_crop) return output