import numpy as np
import random
[docs]def random_expand(img, max_ratio=4, fill=0, return_param=False):
"""Expand an image randomly.
This method randomly place the input image on a larger canvas. The size of
the canvas is :math:`(rH, rW)`, where :math:`(H, W)` is the size of the
input image and :math:`r` is a random ratio drawn from
:math:`[1, max\_ratio]`. The canvas is filled by a value :obj:`fill`
except for the region where the original image is placed.
This data augmentation trick is used to create "zoom out" effect [#]_.
.. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, \
Scott Reed, Cheng-Yang Fu, Alexander C. Berg. \
SSD: Single Shot MultiBox Detector. ECCV 2016.
Args:
img (~numpy.ndarray): An image array to be augmented. This is in
CHW format.
max_ratio (float): The maximum ratio of expansion. In the original
paper, this value is 4.
fill (float, tuple or ~numpy.ndarray): The value of padded pixels.
In the original paper, this value is the mean of ImageNet.
If it is :class:`numpy.ndarray`,
its shape should be :math:`(C, 1, 1)`,
where :math:`C` is the number of channels of :obj:`img`.
return_param (bool): Returns random parameters.
Returns:
~numpy.ndarray or (~numpy.ndarray, dict):
If :obj:`return_param = False`,
returns an array :obj:`out_img` that is the result of expansion.
If :obj:`return_param = True`,
returns a tuple whose elements are :obj:`out_img, param`.
:obj:`param` is a dictionary of intermediate parameters whose
contents are listed below with key, value-type and the description
of the value.
* **ratio** (*float*): The sampled value used to make the canvas.
* **y_offset** (*int*): The y coodinate of the top left corner of\
the image after placing on the canvas.
* **x_offset** (*int*): The x coordinate of the top left corner\
of the image after placing on the canvas.
"""
if max_ratio <= 1:
if return_param:
return img, {'ratio': 1, 'y_offset': 0, 'x_offset': 0}
else:
return img
C, H, W = img.shape
ratio = random.uniform(1, max_ratio)
out_H, out_W = int(H * ratio), int(W * ratio)
y_offset = random.randint(0, out_H - H)
x_offset = random.randint(0, out_W - W)
out_img = np.empty((C, out_H, out_W), dtype=img.dtype)
out_img[:] = np.array(fill).reshape((-1, 1, 1))
out_img[:, y_offset:y_offset + H, x_offset:x_offset + W] = img
if return_param:
param = {'ratio': ratio, 'y_offset': y_offset, 'x_offset': x_offset}
return out_img, param
else:
return out_img