Source code for chainercv.experimental.links.model.fcis.utils.mask_voting

import numpy as np

from chainercv.transforms.image.resize import resize
from chainercv.utils.bbox.bbox_iou import bbox_iou
from chainercv.utils import non_maximum_suppression


def _mask_aggregation(
        bbox, seg_prob, seg_weight,
        size, binary_thresh
):
    assert bbox.shape[0] == len(seg_prob)
    assert bbox.shape[0] == seg_weight.shape[0]

    aggregated_msk = np.zeros(size, dtype=np.float32)
    for bb, seg_pb, seg_w in zip(bbox, seg_prob, seg_weight):
        bb = np.round(bb).astype(np.int32)
        y_min, x_min, y_max, x_max = bb
        if y_max - y_min > 0 and x_max - x_min > 0:
            seg_pb = resize(
                seg_pb.astype(np.float32)[None],
                (y_max - y_min, x_max - x_min))
            seg_m = (seg_pb >= binary_thresh).astype(np.float32)[0]
            aggregated_msk[y_min:y_max, x_min:x_max] += seg_m * seg_w

    y_indices, x_indices = np.where(aggregated_msk >= binary_thresh)
    if len(y_indices) == 0 or len(x_indices) == 0:
        return None, None
    else:
        y_max = y_indices.max() + 1
        y_min = y_indices.min()
        x_max = x_indices.max() + 1
        x_min = x_indices.min()

        aggregated_bb = np.array(
            [y_min, x_min, y_max, x_max],
            dtype=np.float32)
        aggregated_cmsk = aggregated_msk[y_min:y_max, x_min:x_max]
        return aggregated_cmsk[None], aggregated_bb[None]


[docs]def mask_voting( seg_prob, bbox, cls_prob, size, score_thresh, nms_thresh, mask_merge_thresh, binary_thresh, limit=100, bg_label=0 ): """Refine mask probabilities by merging multiple masks. First, this function discard invalid masks with non maximum suppression. Then, it merges masks with weight calculated from class probabilities and iou. This function improves the mask qualities by merging overlapped masks predicted as the same object class. Here are notations used. * :math:`R` is the total number of RoIs produced in one image. * :math:`L` is the number of classes excluding the background. * :math:`RH` is the height of pooled image. * :math:`RW` is the height of pooled image. Args: seg_prob (array): A mask probability array whose shape is :math:`(R, RH, RW)`. bbox (array): A bounding box array whose shape is :math:`(R, 4)`. cls_prob (array): A class probability array whose shape is :math:`(R, L + 1)`. size (tuple of int): Original image size. score_thresh (float): A threshold value of the class score. nms_thresh (float): A threshold value of non maximum suppression. mask_merge_thresh (float): A threshold value of the bounding box iou for mask merging. binary_thresh (float): A threshold value of mask score for mask merging. limit (int): The maximum number of outputs. bg_label (int): The id of the background label. Returns: array, array, array, array: * **v_seg_prob**: Merged mask probability. Its shapes is \ :math:`(N, RH, RW)`. * **v_bbox**: Bounding boxes for the merged masks. Its shape is \ :math:`(N, 4)`. * **v_label**: Class labels for the merged masks. Its shape is \ :math:`(N, )`. * **v_score**: Class probabilities for the merged masks. Its shape \ is :math:`(N, )`. """ seg_size = seg_prob.shape[1:] n_class = cls_prob.shape[1] v_seg_prob = [] v_bbox = [] v_label = [] v_cls_prob = [] cls_score = [] cls_bbox = [] for label in range(0, n_class): # background if label == bg_label: continue # non maximum suppression score_l = cls_prob[:, label] keep_indices = non_maximum_suppression( bbox, nms_thresh, score_l) bbox_l = bbox[keep_indices] score_l = score_l[keep_indices] cls_bbox.append(bbox_l) cls_score.append(score_l) sorted_score = np.sort(np.concatenate(cls_score))[::-1] n_keep = min(len(sorted_score), limit) score_thresh = max(sorted_score[n_keep - 1], score_thresh) for label in range(0, n_class): # background if label == bg_label: continue bbox_l = cls_bbox[label - 1] score_l = cls_score[label - 1] keep_indices = np.where(score_l >= score_thresh) bbox_l = bbox_l[keep_indices] score_l = score_l[keep_indices] v_seg_prob_l = [] v_bbox_l = [] v_score_l = [] for i, bb in enumerate(bbox_l): iou = bbox_iou(bbox, bb[np.newaxis, :]) keep_indices = np.where(iou >= mask_merge_thresh)[0] seg_weight = cls_prob[keep_indices, label] seg_weight = seg_weight / seg_weight.sum() seg_prob_i = seg_prob[keep_indices] bbox_i = bbox[keep_indices] m_seg, m_bbox = _mask_aggregation( bbox_i, seg_prob_i, seg_weight, size, binary_thresh) if m_seg is not None and m_bbox is not None: m_seg = resize(m_seg, seg_size) m_seg = np.clip(m_seg, 0.0, 1.0) v_seg_prob_l.append(m_seg) v_bbox_l.append(m_bbox) v_score_l.append(score_l[i]) if len(v_seg_prob_l) > 0: v_label_l = np.repeat( label - 1, len(v_score_l)).astype(np.int32) v_seg_prob += v_seg_prob_l v_bbox += v_bbox_l v_label.append(v_label_l) v_cls_prob.append(v_score_l) if len(v_seg_prob) > 0: v_seg_prob = np.concatenate(v_seg_prob) v_bbox = np.concatenate(v_bbox) v_label = np.concatenate(v_label) v_cls_prob = np.concatenate(v_cls_prob) else: v_seg_prob = np.empty((0, seg_size[0], seg_size[1])) v_bbox = np.empty((0, 4)) v_label = np.empty((0, )) v_cls_prob = np.empty((0, )) return v_seg_prob, v_bbox, v_label, v_cls_prob