from chainer.backends import cuda
[docs]def loc2bbox(src_bbox, loc):
"""Decode bounding boxes from bounding box offsets and scales.
Given bounding box offsets and scales computed by
:meth:`bbox2loc`, this function decodes the representation to
coordinates in 2D image coordinates.
Given scales and offsets :math:`t_y, t_x, t_h, t_w` and a bounding
box whose center is :math:`(y, x) = p_y, p_x` and size :math:`p_h, p_w`,
the decoded bounding box's center :math:`\\hat{g}_y`, :math:`\\hat{g}_x`
and size :math:`\\hat{g}_h`, :math:`\\hat{g}_w` are calculated
by the following formulas.
* :math:`\\hat{g}_y = p_h t_y + p_y`
* :math:`\\hat{g}_x = p_w t_x + p_x`
* :math:`\\hat{g}_h = p_h \\exp(t_h)`
* :math:`\\hat{g}_w = p_w \\exp(t_w)`
The decoding formulas are used in works such as R-CNN [#]_.
The output is same type as the type of the inputs.
.. [#] Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik. \
Rich feature hierarchies for accurate object detection and semantic \
segmentation. CVPR 2014.
Args:
src_bbox (array): A coordinates of bounding boxes.
Its shape is :math:`(R, 4)`. These coordinates are
:math:`p_{ymin}, p_{xmin}, p_{ymax}, p_{xmax}`.
loc (array): An array with offsets and scales.
The shapes of :obj:`src_bbox` and :obj:`loc` should be same.
This contains values :math:`t_y, t_x, t_h, t_w`.
Returns:
array:
Decoded bounding box coordinates. Its shape is :math:`(R, 4)`. \
The second axis contains four values \
:math:`\\hat{g}_{ymin}, \\hat{g}_{xmin},
\\hat{g}_{ymax}, \\hat{g}_{xmax}`.
"""
xp = cuda.get_array_module(src_bbox)
if src_bbox.shape[0] == 0:
return xp.zeros((0, 4), dtype=loc.dtype)
src_bbox = src_bbox.astype(src_bbox.dtype, copy=False)
src_height = src_bbox[:, 2] - src_bbox[:, 0]
src_width = src_bbox[:, 3] - src_bbox[:, 1]
src_ctr_y = src_bbox[:, 0] + 0.5 * src_height
src_ctr_x = src_bbox[:, 1] + 0.5 * src_width
dy = loc[:, 0::4]
dx = loc[:, 1::4]
dh = loc[:, 2::4]
dw = loc[:, 3::4]
ctr_y = dy * src_height[:, xp.newaxis] + src_ctr_y[:, xp.newaxis]
ctr_x = dx * src_width[:, xp.newaxis] + src_ctr_x[:, xp.newaxis]
h = xp.exp(dh) * src_height[:, xp.newaxis]
w = xp.exp(dw) * src_width[:, xp.newaxis]
dst_bbox = xp.zeros(loc.shape, dtype=loc.dtype)
dst_bbox[:, 0::4] = ctr_y - 0.5 * h
dst_bbox[:, 1::4] = ctr_x - 0.5 * w
dst_bbox[:, 2::4] = ctr_y + 0.5 * h
dst_bbox[:, 3::4] = ctr_x + 0.5 * w
return dst_bbox