import numpy as np
import chainer
from chainer.backends import cuda
from chainercv.links.model.faster_rcnn.utils.loc2bbox import loc2bbox
from chainercv.utils.bbox.non_maximum_suppression import \
non_maximum_suppression
[docs]class ProposalCreator(object):
"""Proposal regions are generated by calling this object.
The :meth:`__call__` of this object outputs object detection proposals by
applying estimated bounding box offsets
to a set of anchors.
This class takes parameters to control number of bounding boxes to
pass to NMS and keep after NMS.
If the paramters are negative, it uses all the bounding boxes supplied
or keep all the bounding boxes returned by NMS.
This class is used for Region Proposal Networks introduced in
Faster R-CNN [#]_.
.. [#] Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun. \
Faster R-CNN: Towards Real-Time Object Detection with \
Region Proposal Networks. NIPS 2015.
Args:
nms_thresh (float): Threshold value used when calling NMS.
n_train_pre_nms (int): Number of top scored bounding boxes
to keep before passing to NMS in train mode.
n_train_post_nms (int): Number of top scored bounding boxes
to keep after passing to NMS in train mode.
n_test_pre_nms (int): Number of top scored bounding boxes
to keep before passing to NMS in test mode.
n_test_post_nms (int): Number of top scored bounding boxes
to keep after passing to NMS in test mode.
force_cpu_nms (bool): If this is :obj:`True`,
always use NMS in CPU mode. If :obj:`False`,
the NMS mode is selected based on the type of inputs.
min_size (int): A paramter to determine the threshold on
discarding bounding boxes based on their sizes.
"""
def __init__(self,
nms_thresh=0.7,
n_train_pre_nms=12000,
n_train_post_nms=2000,
n_test_pre_nms=6000,
n_test_post_nms=300,
force_cpu_nms=False,
min_size=16
):
self.nms_thresh = nms_thresh
self.n_train_pre_nms = n_train_pre_nms
self.n_train_post_nms = n_train_post_nms
self.n_test_pre_nms = n_test_pre_nms
self.n_test_post_nms = n_test_post_nms
self.force_cpu_nms = force_cpu_nms
self.min_size = min_size
[docs] def __call__(self, loc, score,
anchor, img_size, scale=1.):
"""Propose RoIs.
Inputs :obj:`loc, score, anchor` refer to the same anchor when indexed
by the same index.
On notations, :math:`R` is the total number of anchors. This is equal
to product of the height and the width of an image and the number of
anchor bases per pixel.
Type of the output is same as the inputs.
Args:
loc (array): Predicted offsets and scaling to anchors.
Its shape is :math:`(R, 4)`.
score (array): Predicted foreground probability for anchors.
Its shape is :math:`(R,)`.
anchor (array): Coordinates of anchors. Its shape is
:math:`(R, 4)`.
img_size (tuple of ints): A tuple :obj:`height, width`,
which contains image size after scaling.
scale (float): The scaling factor used to scale an image after
reading it from a file.
Returns:
array:
An array of coordinates of proposal boxes.
Its shape is :math:`(S, 4)`. :math:`S` is less than
:obj:`self.n_test_post_nms` in test time and less than
:obj:`self.n_train_post_nms` in train time. :math:`S` depends on
the size of the predicted bounding boxes and the number of
bounding boxes discarded by NMS.
"""
if chainer.config.train:
n_pre_nms = self.n_train_pre_nms
n_post_nms = self.n_train_post_nms
else:
n_pre_nms = self.n_test_pre_nms
n_post_nms = self.n_test_post_nms
xp = cuda.get_array_module(loc)
loc = cuda.to_cpu(loc)
score = cuda.to_cpu(score)
anchor = cuda.to_cpu(anchor)
# Convert anchors into proposal via bbox transformations.
roi = loc2bbox(anchor, loc)
# Clip predicted boxes to image.
roi[:, slice(0, 4, 2)] = np.clip(
roi[:, slice(0, 4, 2)], 0, img_size[0])
roi[:, slice(1, 4, 2)] = np.clip(
roi[:, slice(1, 4, 2)], 0, img_size[1])
# Remove predicted boxes with either height or width < threshold.
min_size = self.min_size * scale
hs = roi[:, 2] - roi[:, 0]
ws = roi[:, 3] - roi[:, 1]
keep = np.where((hs >= min_size) & (ws >= min_size))[0]
roi = roi[keep, :]
score = score[keep]
# Sort all (proposal, score) pairs by score from highest to lowest.
# Take top pre_nms_topN (e.g. 6000).
order = score.ravel().argsort()[::-1]
if n_pre_nms > 0:
order = order[:n_pre_nms]
roi = roi[order, :]
# Apply nms (e.g. threshold = 0.7).
# Take after_nms_topN (e.g. 300).
if xp != np and not self.force_cpu_nms:
keep = non_maximum_suppression(
cuda.to_gpu(roi),
thresh=self.nms_thresh)
keep = cuda.to_cpu(keep)
else:
keep = non_maximum_suppression(
roi,
thresh=self.nms_thresh)
if n_post_nms > 0:
keep = keep[:n_post_nms]
roi = roi[keep]
if xp != np:
roi = cuda.to_gpu(roi)
return roi