Source code for chainercv.links.model.ssd.transforms

from __future__ import division

import numpy as np
import random
import six

from chainercv import utils


[docs]def random_distort( img, brightness_delta=32, contrast_low=0.5, contrast_high=1.5, saturation_low=0.5, saturation_high=1.5, hue_delta=18): """A color related data augmentation used in SSD. This function is a combination of four augmentation methods: brightness, contrast, saturation and hue. * brightness: Adding a random offset to the intensity of the image. * contrast: Multiplying the intensity of the image by a random scale. * saturation: Multiplying the saturation of the image by a random scale. * hue: Adding a random offset to the hue of the image randomly. This data augmentation is used in training of Single Shot Multibox Detector [#]_. Note that this function requires :mod:`cv2`. .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector. ECCV 2016. Args: img (~numpy.ndarray): An image array to be augmented. This is in CHW and RGB format. brightness_delta (float): The offset for saturation will be drawn from :math:`[-brightness\_delta, brightness\_delta]`. The default value is :obj:`32`. contrast_low (float): The scale for contrast will be drawn from :math:`[contrast\_low, contrast\_high]`. The default value is :obj:`0.5`. contrast_high (float): See :obj:`contrast_low`. The default value is :obj:`1.5`. saturation_low (float): The scale for saturation will be drawn from :math:`[saturation\_low, saturation\_high]`. The default value is :obj:`0.5`. saturation_high (float): See :obj:`saturation_low`. The default value is :obj:`1.5`. hue_delta (float): The offset for hue will be drawn from :math:`[-hue\_delta, hue\_delta]`. The default value is :obj:`18`. Returns: An image in CHW and RGB format. """ import cv2 cv_img = img[::-1].transpose((1, 2, 0)).astype(np.uint8) def convert(img, alpha=1, beta=0): img = img.astype(float) * alpha + beta img[img < 0] = 0 img[img > 255] = 255 return img.astype(np.uint8) def brightness(cv_img, delta): if random.randrange(2): return convert( cv_img, beta=random.uniform(-delta, delta)) else: return cv_img def contrast(cv_img, low, high): if random.randrange(2): return convert( cv_img, alpha=random.uniform(low, high)) else: return cv_img def saturation(cv_img, low, high): if random.randrange(2): cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2HSV) cv_img[:, :, 1] = convert( cv_img[:, :, 1], alpha=random.uniform(low, high)) return cv2.cvtColor(cv_img, cv2.COLOR_HSV2BGR) else: return cv_img def hue(cv_img, delta): if random.randrange(2): cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2HSV) cv_img[:, :, 0] = ( cv_img[:, :, 0].astype(int) + random.randint(-delta, delta)) % 180 return cv2.cvtColor(cv_img, cv2.COLOR_HSV2BGR) else: return cv_img cv_img = brightness(cv_img, brightness_delta) if random.randrange(2): cv_img = contrast(cv_img, contrast_low, contrast_high) cv_img = saturation(cv_img, saturation_low, saturation_high) cv_img = hue(cv_img, hue_delta) else: cv_img = saturation(cv_img, saturation_low, saturation_high) cv_img = hue(cv_img, hue_delta) cv_img = contrast(cv_img, contrast_low, contrast_high) return cv_img.astype(np.float32).transpose((2, 0, 1))[::-1]
[docs]def random_crop_with_bbox_constraints( img, bbox, min_scale=0.3, max_scale=1, max_aspect_ratio=2, constraints=None, max_trial=50, return_param=False): """Crop an image randomly with bounding box constraints. This data augmentation is used in training of Single Shot Multibox Detector [#]_. More details can be found in data augmentation section of the original paper. .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector. ECCV 2016. Args: img (~numpy.ndarray): An image array to be cropped. This is in CHW format. bbox (~numpy.ndarray): Bounding boxes used for constraints. The shape is :math:`(R, 4)`. :math:`R` is the number of bounding boxes. min_scale (float): The minimum ratio between a cropped region and the original image. The default value is :obj:`0.3`. max_scale (float): The maximum ratio between a cropped region and the original image. The default value is :obj:`1`. max_aspect_ratio (float): The maximum aspect ratio of cropped region. The default value is :obj:`2`. constaraints (iterable of tuples): An iterable of constraints. Each constraint should be :obj:`(min_iou, max_iou)` format. If you set :obj:`min_iou` or :obj:`max_iou` to :obj:`None`, it means not limited. If this argument is not specified, :obj:`((0.1, None), (0.3, None), (0.5, None), (0.7, None), (0.9, None), (None, 1))` will be used. max_trial (int): The maximum number of trials to be conducted for each constraint. If this function can not find any region that satisfies the constraint in :math:`max\_trial` trials, this function skips the constraint. The default value is :obj:`50`. return_param (bool): If :obj:`True`, this function returns information of intermediate values. Returns: ~numpy.ndarray or (~numpy.ndarray, dict): If :obj:`return_param = False`, returns an array :obj:`img` that is cropped from the input array. If :obj:`return_param = True`, returns a tuple whose elements are :obj:`img, param`. :obj:`param` is a dictionary of intermediate parameters whose contents are listed below with key, value-type and the description of the value. * **constraint** (*tuple*): The chosen constraint. * **y_slice** (*slice*): A slice in vertical direction used to crop \ the input image. * **x_slice** (*slice*): A slice in horizontal direction used to crop \ the input image. """ if constraints is None: constraints = ( (0.1, None), (0.3, None), (0.5, None), (0.7, None), (0.9, None), (None, 1), ) _, H, W = img.shape params = [{ 'constraint': None, 'y_slice': slice(0, H), 'x_slice': slice(0, W)}] if len(bbox) == 0: constraints = [] for min_iou, max_iou in constraints: if min_iou is None: min_iou = 0 if max_iou is None: max_iou = 1 for _ in six.moves.range(max_trial): scale = random.uniform(min_scale, max_scale) aspect_ratio = random.uniform( max(1 / max_aspect_ratio, scale * scale), min(max_aspect_ratio, 1 / (scale * scale))) crop_h = int(H * scale / np.sqrt(aspect_ratio)) crop_w = int(W * scale * np.sqrt(aspect_ratio)) crop_t = random.randrange(H - crop_h) crop_l = random.randrange(W - crop_w) crop_bb = np.array(( crop_t, crop_l, crop_t + crop_h, crop_l + crop_w)) iou = utils.bbox_iou(bbox, crop_bb[np.newaxis]) if min_iou <= iou.min() and iou.max() <= max_iou: params.append({ 'constraint': (min_iou, max_iou), 'y_slice': slice(crop_t, crop_t + crop_h), 'x_slice': slice(crop_l, crop_l + crop_w)}) break param = random.choice(params) img = img[:, param['y_slice'], param['x_slice']] if return_param: return img, param else: return img
[docs]def resize_with_random_interpolation(img, size, return_param=False): """Resize an image with a randomly selected interpolation method. This function is similar to :func:`chainercv.transforms.resize`, but this chooses the interpolation method randomly. This data augmentation is used in training of Single Shot Multibox Detector [#]_. Note that this function requires :mod:`cv2`. .. [#] Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed, Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector. ECCV 2016. Args: img (~numpy.ndarray): An array to be transformed. This is in CHW format and the type should be :obj:`numpy.float32`. size (tuple): This is a tuple of length 2. Its elements are ordered as (height, width). return_param (bool): Returns information of interpolation. Returns: ~numpy.ndarray or (~numpy.ndarray, dict): If :obj:`return_param = False`, returns an array :obj:`img` that is the result of rotation. If :obj:`return_param = True`, returns a tuple whose elements are :obj:`img, param`. :obj:`param` is a dictionary of intermediate parameters whose contents are listed below with key, value-type and the description of the value. * **interpolatation**: The chosen interpolation method. """ import cv2 cv_img = img.transpose((1, 2, 0)) inters = ( cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4, ) inter = random.choice(inters) H, W = size cv_img = cv2.resize(cv_img, (W, H), interpolation=inter) # If input is a grayscale image, cv2 returns a two-dimentional array. if len(cv_img.shape) == 2: cv_img = cv_img[:, :, np.newaxis] img = cv_img.astype(np.float32).transpose((2, 0, 1)) if return_param: return img, {'interpolation': inter} else: return img