import chainer
from chainer.backends import cuda
from chainercv import transforms
[docs]class YOLOBase(chainer.Chain):
"""Base class for YOLOv2 and YOLOv3.
An inheriting this class should have :obj:`extractor`,
:meth:`__call__`, and :meth:`_decode`.
"""
@property
def insize(self):
return self.extractor.insize
[docs] def use_preset(self, preset):
"""Use the given preset during prediction.
This method changes values of :obj:`nms_thresh` and
:obj:`score_thresh`. These values are a threshold value
used for non maximum suppression and a threshold value
to discard low confidence proposals in :meth:`predict`,
respectively.
If the attributes need to be changed to something
other than the values provided in the presets, please modify
them by directly accessing the public attributes.
Args:
preset ({'visualize', 'evaluate'}): A string to determine the
preset to use.
"""
if preset == 'visualize':
self.nms_thresh = 0.45
self.score_thresh = 0.5
elif preset == 'evaluate':
self.nms_thresh = 0.45
self.score_thresh = 0.005
else:
raise ValueError('preset must be visualize or evaluate')
[docs] def predict(self, imgs):
"""Detect objects from images.
This method predicts objects for each image.
Args:
imgs (iterable of numpy.ndarray): Arrays holding images.
All images are in CHW and RGB format
and the range of their value is :math:`[0, 255]`.
Returns:
tuple of lists:
This method returns a tuple of three lists,
:obj:`(bboxes, labels, scores)`.
* **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
where :math:`R` is the number of bounding boxes in a image. \
Each bouding box is organized by \
:math:`(y_{min}, x_{min}, y_{max}, x_{max})` \
in the second axis.
* **labels** : A list of integer arrays of shape :math:`(R,)`. \
Each value indicates the class of the bounding box. \
Values are in range :math:`[0, L - 1]`, where :math:`L` is the \
number of the foreground classes.
* **scores** : A list of float arrays of shape :math:`(R,)`. \
Each value indicates how confident the prediction is.
"""
x = []
params = []
for img in imgs:
_, H, W = img.shape
img, param = transforms.resize_contain(
img / 255, (self.insize, self.insize), fill=0.5,
return_param=True)
x.append(self.xp.array(img))
param['size'] = (H, W)
params.append(param)
with chainer.using_config('train', False), \
chainer.function.no_backprop_mode():
locs, objs, confs = self(self.xp.stack(x))
locs = locs.array
objs = objs.array
confs = confs.array
bboxes = []
labels = []
scores = []
for loc, obj, conf, param in zip(locs, objs, confs, params):
bbox, label, score = self._decode(loc, obj, conf)
bbox = cuda.to_cpu(bbox)
label = cuda.to_cpu(label)
score = cuda.to_cpu(score)
bbox = transforms.translate_bbox(
bbox, -self.insize / 2, -self.insize / 2)
bbox = transforms.resize_bbox(
bbox, param['scaled_size'], param['size'])
bbox = transforms.translate_bbox(
bbox, param['size'][0] / 2, param['size'][1] / 2)
bboxes.append(bbox)
labels.append(label)
scores.append(score)
return bboxes, labels, scores