ViTPose/mmpose/core/evaluation/top_down_eval.py


								# Copyright (c) OpenMMLab. All rights reserved.

								import warnings


								import cv2

								import numpy as np


								from mmpose.core.post_processing import transform_preds


								def _calc_distances(preds, targets, mask, normalize):

								    """Calculate the normalized distances between preds and target.


								    Note:

								        batch_size: N

								        num_keypoints: K

								        dimension of keypoints: D (normally, D=2 or D=3)


								    Args:

								        preds (np.ndarray[N, K, D]): Predicted keypoint location.

								        targets (np.ndarray[N, K, D]): Groundtruth keypoint location.

								        mask (np.ndarray[N, K]): Visibility of the target. False for invisible

								            joints, and True for visible. Invisible joints will be ignored for

								            accuracy calculation.

								        normalize (np.ndarray[N, D]): Typical value is heatmap_size


								    Returns:

								        np.ndarray[K, N]: The normalized distances. \

								            If target keypoints are missing, the distance is -1.

								    """

								    N, K, _ = preds.shape

								    # set mask=0 when normalize==0

								    _mask = mask.copy()

								    _mask[np.where((normalize == 0).sum(1))[0], :] = False

								    distances = np.full((N, K), -1, dtype=np.float32)

								    # handle invalid values

								    normalize[np.where(normalize <= 0)] = 1e6

								    distances[_mask] = np.linalg.norm(

								        ((preds - targets) / normalize[:, None, :])[_mask], axis=-1)

								    return distances.T


								def _distance_acc(distances, thr=0.5):

								    """Return the percentage below the distance threshold, while ignoring

								    distances values with -1.


								    Note:

								        batch_size: N

								    Args:

								        distances (np.ndarray[N, ]): The normalized distances.

								        thr (float): Threshold of the distances.


								    Returns:

								        float: Percentage of distances below the threshold. \

								            If all target keypoints are missing, return -1.

								    """

								    distance_valid = distances != -1

								    num_distance_valid = distance_valid.sum()

								    if num_distance_valid > 0:

								        return (distances[distance_valid] < thr).sum() / num_distance_valid

								    return -1


								def _get_max_preds(heatmaps):

								    """Get keypoint predictions from score maps.


								    Note:

								        batch_size: N

								        num_keypoints: K

								        heatmap height: H

								        heatmap width: W


								    Args:

								        heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.


								    Returns:

								        tuple: A tuple containing aggregated results.


								        - preds (np.ndarray[N, K, 2]): Predicted keypoint location.

								        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.

								    """

								    assert isinstance(heatmaps,

								                      np.ndarray), ('heatmaps should be numpy.ndarray')

								    assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'


								    N, K, _, W = heatmaps.shape

								    heatmaps_reshaped = heatmaps.reshape((N, K, -1))

								    idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))

								    maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))


								    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)

								    preds[:, :, 0] = preds[:, :, 0] % W

								    preds[:, :, 1] = preds[:, :, 1] // W


								    preds = np.where(np.tile(maxvals, (1, 1, 2)) > 0.0, preds, -1)

								    return preds, maxvals


								def _get_max_preds_3d(heatmaps):

								    """Get keypoint predictions from 3D score maps.


								    Note:

								        batch size: N

								        num keypoints: K

								        heatmap depth size: D

								        heatmap height: H

								        heatmap width: W


								    Args:

								        heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.


								    Returns:

								        tuple: A tuple containing aggregated results.


								        - preds (np.ndarray[N, K, 3]): Predicted keypoint location.

								        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.

								    """

								    assert isinstance(heatmaps, np.ndarray), \

								        ('heatmaps should be numpy.ndarray')

								    assert heatmaps.ndim == 5, 'heatmaps should be 5-ndim'


								    N, K, D, H, W = heatmaps.shape

								    heatmaps_reshaped = heatmaps.reshape((N, K, -1))

								    idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))

								    maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))


								    preds = np.zeros((N, K, 3), dtype=np.float32)

								    _idx = idx[..., 0]

								    preds[..., 2] = _idx // (H * W)

								    preds[..., 1] = (_idx // W) % H

								    preds[..., 0] = _idx % W


								    preds = np.where(maxvals > 0.0, preds, -1)

								    return preds, maxvals


								def pose_pck_accuracy(output, target, mask, thr=0.05, normalize=None):

								    """Calculate the pose accuracy of PCK for each individual keypoint and the

								    averaged accuracy across all keypoints from heatmaps.


								    Note:

								        PCK metric measures accuracy of the localization of the body joints.

								        The distances between predicted positions and the ground-truth ones

								        are typically normalized by the bounding box size.

								        The threshold (thr) of the normalized distance is commonly set

								        as 0.05, 0.1 or 0.2 etc.


								        - batch_size: N

								        - num_keypoints: K

								        - heatmap height: H

								        - heatmap width: W


								    Args:

								        output (np.ndarray[N, K, H, W]): Model output heatmaps.

								        target (np.ndarray[N, K, H, W]): Groundtruth heatmaps.

								        mask (np.ndarray[N, K]): Visibility of the target. False for invisible

								            joints, and True for visible. Invisible joints will be ignored for

								            accuracy calculation.

								        thr (float): Threshold of PCK calculation. Default 0.05.

								        normalize (np.ndarray[N, 2]): Normalization factor for H&W.


								    Returns:

								        tuple: A tuple containing keypoint accuracy.


								        - np.ndarray[K]: Accuracy of each keypoint.

								        - float: Averaged accuracy across all keypoints.

								        - int: Number of valid keypoints.

								    """

								    N, K, H, W = output.shape

								    if K == 0:

								        return None, 0, 0

								    if normalize is None:

								        normalize = np.tile(np.array([[H, W]]), (N, 1))


								    pred, _ = _get_max_preds(output)

								    gt, _ = _get_max_preds(target)

								    return keypoint_pck_accuracy(pred, gt, mask, thr, normalize)


								def keypoint_pck_accuracy(pred, gt, mask, thr, normalize):

								    """Calculate the pose accuracy of PCK for each individual keypoint and the

								    averaged accuracy across all keypoints for coordinates.


								    Note:

								        PCK metric measures accuracy of the localization of the body joints.

								        The distances between predicted positions and the ground-truth ones

								        are typically normalized by the bounding box size.

								        The threshold (thr) of the normalized distance is commonly set

								        as 0.05, 0.1 or 0.2 etc.


								        - batch_size: N

								        - num_keypoints: K


								    Args:

								        pred (np.ndarray[N, K, 2]): Predicted keypoint location.

								        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.

								        mask (np.ndarray[N, K]): Visibility of the target. False for invisible

								            joints, and True for visible. Invisible joints will be ignored for

								            accuracy calculation.

								        thr (float): Threshold of PCK calculation.

								        normalize (np.ndarray[N, 2]): Normalization factor for H&W.


								    Returns:

								        tuple: A tuple containing keypoint accuracy.


								        - acc (np.ndarray[K]): Accuracy of each keypoint.

								        - avg_acc (float): Averaged accuracy across all keypoints.

								        - cnt (int): Number of valid keypoints.

								    """

								    distances = _calc_distances(pred, gt, mask, normalize)


								    acc = np.array([_distance_acc(d, thr) for d in distances])

								    valid_acc = acc[acc >= 0]

								    cnt = len(valid_acc)

								    avg_acc = valid_acc.mean() if cnt > 0 else 0

								    return acc, avg_acc, cnt


								def keypoint_auc(pred, gt, mask, normalize, num_step=20):

								    """Calculate the pose accuracy of PCK for each individual keypoint and the

								    averaged accuracy across all keypoints for coordinates.


								    Note:

								        - batch_size: N

								        - num_keypoints: K


								    Args:

								        pred (np.ndarray[N, K, 2]): Predicted keypoint location.

								        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.

								        mask (np.ndarray[N, K]): Visibility of the target. False for invisible

								            joints, and True for visible. Invisible joints will be ignored for

								            accuracy calculation.

								        normalize (float): Normalization factor.


								    Returns:

								        float: Area under curve.

								    """

								    nor = np.tile(np.array([[normalize, normalize]]), (pred.shape[0], 1))

								    x = [1.0 * i / num_step for i in range(num_step)]

								    y = []

								    for thr in x:

								        _, avg_acc, _ = keypoint_pck_accuracy(pred, gt, mask, thr, nor)

								        y.append(avg_acc)


								    auc = 0

								    for i in range(num_step):

								        auc += 1.0 / num_step * y[i]

								    return auc


								def keypoint_nme(pred, gt, mask, normalize_factor):

								    """Calculate the normalized mean error (NME).


								    Note:

								        - batch_size: N

								        - num_keypoints: K


								    Args:

								        pred (np.ndarray[N, K, 2]): Predicted keypoint location.

								        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.

								        mask (np.ndarray[N, K]): Visibility of the target. False for invisible

								            joints, and True for visible. Invisible joints will be ignored for

								            accuracy calculation.

								        normalize_factor (np.ndarray[N, 2]): Normalization factor.


								    Returns:

								        float: normalized mean error

								    """

								    distances = _calc_distances(pred, gt, mask, normalize_factor)

								    distance_valid = distances[distances != -1]

								    return distance_valid.sum() / max(1, len(distance_valid))


								def keypoint_epe(pred, gt, mask):

								    """Calculate the end-point error.


								    Note:

								        - batch_size: N

								        - num_keypoints: K


								    Args:

								        pred (np.ndarray[N, K, 2]): Predicted keypoint location.

								        gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.

								        mask (np.ndarray[N, K]): Visibility of the target. False for invisible

								            joints, and True for visible. Invisible joints will be ignored for

								            accuracy calculation.


								    Returns:

								        float: Average end-point error.

								    """


								    distances = _calc_distances(

								        pred, gt, mask,

								        np.ones((pred.shape[0], pred.shape[2]), dtype=np.float32))

								    distance_valid = distances[distances != -1]

								    return distance_valid.sum() / max(1, len(distance_valid))


								def _taylor(heatmap, coord):

								    """Distribution aware coordinate decoding method.


								    Note:

								        - heatmap height: H

								        - heatmap width: W


								    Args:

								        heatmap (np.ndarray[H, W]): Heatmap of a particular joint type.

								        coord (np.ndarray[2,]): Coordinates of the predicted keypoints.


								    Returns:

								        np.ndarray[2,]: Updated coordinates.

								    """

								    H, W = heatmap.shape[:2]

								    px, py = int(coord[0]), int(coord[1])

								    if 1 < px < W - 2 and 1 < py < H - 2:

								        dx = 0.5 * (heatmap[py][px + 1] - heatmap[py][px - 1])

								        dy = 0.5 * (heatmap[py + 1][px] - heatmap[py - 1][px])

								        dxx = 0.25 * (

								            heatmap[py][px + 2] - 2 * heatmap[py][px] + heatmap[py][px - 2])

								        dxy = 0.25 * (

								            heatmap[py + 1][px + 1] - heatmap[py - 1][px + 1] -

								            heatmap[py + 1][px - 1] + heatmap[py - 1][px - 1])

								        dyy = 0.25 * (

								            heatmap[py + 2 * 1][px] - 2 * heatmap[py][px] +

								            heatmap[py - 2 * 1][px])

								        derivative = np.array([[dx], [dy]])

								        hessian = np.array([[dxx, dxy], [dxy, dyy]])

								        if dxx * dyy - dxy**2 != 0:

								            hessianinv = np.linalg.inv(hessian)

								            offset = -hessianinv @ derivative

								            offset = np.squeeze(np.array(offset.T), axis=0)

								            coord += offset

								    return coord


								def post_dark_udp(coords, batch_heatmaps, kernel=3):

								    """DARK post-pocessing. Implemented by udp. Paper ref: Huang et al. The

								    Devil is in the Details: Delving into Unbiased Data Processing for Human

								    Pose Estimation (CVPR 2020). Zhang et al. Distribution-Aware Coordinate

								    Representation for Human Pose Estimation (CVPR 2020).


								    Note:

								        - batch size: B

								        - num keypoints: K

								        - num persons: N

								        - height of heatmaps: H

								        - width of heatmaps: W


								        B=1 for bottom_up paradigm where all persons share the same heatmap.

								        B=N for top_down paradigm where each person has its own heatmaps.


								    Args:

								        coords (np.ndarray[N, K, 2]): Initial coordinates of human pose.

								        batch_heatmaps (np.ndarray[B, K, H, W]): batch_heatmaps

								        kernel (int): Gaussian kernel size (K) for modulation.


								    Returns:

								        np.ndarray([N, K, 2]): Refined coordinates.

								    """

								    if not isinstance(batch_heatmaps, np.ndarray):

								        batch_heatmaps = batch_heatmaps.cpu().numpy()

								    B, K, H, W = batch_heatmaps.shape

								    N = coords.shape[0]

								    assert (B == 1 or B == N)

								    for heatmaps in batch_heatmaps:

								        for heatmap in heatmaps:

								            cv2.GaussianBlur(heatmap, (kernel, kernel), 0, heatmap)

								    np.clip(batch_heatmaps, 0.001, 50, batch_heatmaps)

								    np.log(batch_heatmaps, batch_heatmaps)


								    batch_heatmaps_pad = np.pad(

								        batch_heatmaps, ((0, 0), (0, 0), (1, 1), (1, 1)),

								        mode='edge').flatten()


								    index = coords[..., 0] + 1 + (coords[..., 1] + 1) * (W + 2)

								    index += (W + 2) * (H + 2) * np.arange(0, B * K).reshape(-1, K)

								    index = index.astype(int).reshape(-1, 1)

								    i_ = batch_heatmaps_pad[index]

								    ix1 = batch_heatmaps_pad[index + 1]

								    iy1 = batch_heatmaps_pad[index + W + 2]

								    ix1y1 = batch_heatmaps_pad[index + W + 3]

								    ix1_y1_ = batch_heatmaps_pad[index - W - 3]

								    ix1_ = batch_heatmaps_pad[index - 1]

								    iy1_ = batch_heatmaps_pad[index - 2 - W]


								    dx = 0.5 * (ix1 - ix1_)

								    dy = 0.5 * (iy1 - iy1_)

								    derivative = np.concatenate([dx, dy], axis=1)

								    derivative = derivative.reshape(N, K, 2, 1)

								    dxx = ix1 - 2 * i_ + ix1_

								    dyy = iy1 - 2 * i_ + iy1_

								    dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)

								    hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1)

								    hessian = hessian.reshape(N, K, 2, 2)

								    hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))

								    coords -= np.einsum('ijmn,ijnk->ijmk', hessian, derivative).squeeze()

								    return coords


								def _gaussian_blur(heatmaps, kernel=11):

								    """Modulate heatmap distribution with Gaussian.

								     sigma = 0.3*((kernel_size-1)*0.5-1)+0.8

								     sigma~=3 if k=17

								     sigma=2 if k=11;

								     sigma~=1.5 if k=7;

								     sigma~=1 if k=3;


								    Note:

								        - batch_size: N

								        - num_keypoints: K

								        - heatmap height: H

								        - heatmap width: W


								    Args:

								        heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.

								        kernel (int): Gaussian kernel size (K) for modulation, which should

								            match the heatmap gaussian sigma when training.

								            K=17 for sigma=3 and k=11 for sigma=2.


								    Returns:

								        np.ndarray ([N, K, H, W]): Modulated heatmap distribution.

								    """

								    assert kernel % 2 == 1


								    border = (kernel - 1) // 2

								    batch_size = heatmaps.shape[0]

								    num_joints = heatmaps.shape[1]

								    height = heatmaps.shape[2]

								    width = heatmaps.shape[3]

								    for i in range(batch_size):

								        for j in range(num_joints):

								            origin_max = np.max(heatmaps[i, j])

								            dr = np.zeros((height + 2 * border, width + 2 * border),

								                          dtype=np.float32)

								            dr[border:-border, border:-border] = heatmaps[i, j].copy()

								            dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)

								            heatmaps[i, j] = dr[border:-border, border:-border].copy()

								            heatmaps[i, j] *= origin_max / np.max(heatmaps[i, j])

								    return heatmaps


								def keypoints_from_regression(regression_preds, center, scale, img_size):

								    """Get final keypoint predictions from regression vectors and transform

								    them back to the image.


								    Note:

								        - batch_size: N

								        - num_keypoints: K


								    Args:

								        regression_preds (np.ndarray[N, K, 2]): model prediction.

								        center (np.ndarray[N, 2]): Center of the bounding box (x, y).

								        scale (np.ndarray[N, 2]): Scale of the bounding box

								            wrt height/width.

								        img_size (list(img_width, img_height)): model input image size.


								    Returns:

								        tuple:


								        - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.

								        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.

								    """

								    N, K, _ = regression_preds.shape

								    preds, maxvals = regression_preds, np.ones((N, K, 1), dtype=np.float32)


								    preds = preds * img_size


								    # Transform back to the image

								    for i in range(N):

								        preds[i] = transform_preds(preds[i], center[i], scale[i], img_size)


								    return preds, maxvals


								def keypoints_from_heatmaps(heatmaps,

								                            center,

								                            scale,

								                            unbiased=False,

								                            post_process='default',

								                            kernel=11,

								                            valid_radius_factor=0.0546875,

								                            use_udp=False,

								                            target_type='GaussianHeatmap'):

								    """Get final keypoint predictions from heatmaps and transform them back to

								    the image.


								    Note:

								        - batch size: N

								        - num keypoints: K

								        - heatmap height: H

								        - heatmap width: W


								    Args:

								        heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.

								        center (np.ndarray[N, 2]): Center of the bounding box (x, y).

								        scale (np.ndarray[N, 2]): Scale of the bounding box

								            wrt height/width.

								        post_process (str/None): Choice of methods to post-process

								            heatmaps. Currently supported: None, 'default', 'unbiased',

								            'megvii'.

								        unbiased (bool): Option to use unbiased decoding. Mutually

								            exclusive with megvii.

								            Note: this arg is deprecated and unbiased=True can be replaced

								            by post_process='unbiased'

								            Paper ref: Zhang et al. Distribution-Aware Coordinate

								            Representation for Human Pose Estimation (CVPR 2020).

								        kernel (int): Gaussian kernel size (K) for modulation, which should

								            match the heatmap gaussian sigma when training.

								            K=17 for sigma=3 and k=11 for sigma=2.

								        valid_radius_factor (float): The radius factor of the positive area

								            in classification heatmap for UDP.

								        use_udp (bool): Use unbiased data processing.

								        target_type (str): 'GaussianHeatmap' or 'CombinedTarget'.

								            GaussianHeatmap: Classification target with gaussian distribution.

								            CombinedTarget: The combination of classification target

								            (response map) and regression target (offset map).

								            Paper ref: Huang et al. The Devil is in the Details: Delving into

								            Unbiased Data Processing for Human Pose Estimation (CVPR 2020).


								    Returns:

								        tuple: A tuple containing keypoint predictions and scores.


								        - preds (np.ndarray[N, K, 2]): Predicted keypoint location in images.

								        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.

								    """

								    # Avoid being affected

								    heatmaps = heatmaps.copy()


								    # detect conflicts

								    if unbiased:

								        assert post_process not in [False, None, 'megvii']

								    if post_process in ['megvii', 'unbiased']:

								        assert kernel > 0

								    if use_udp:

								        assert not post_process == 'megvii'


								    # normalize configs

								    if post_process is False:

								        warnings.warn(

								            'post_process=False is deprecated, '

								            'please use post_process=None instead', DeprecationWarning)

								        post_process = None

								    elif post_process is True:

								        if unbiased is True:

								            warnings.warn(

								                'post_process=True, unbiased=True is deprecated,'

								                " please use post_process='unbiased' instead",

								                DeprecationWarning)

								            post_process = 'unbiased'

								        else:

								            warnings.warn(

								                'post_process=True, unbiased=False is deprecated, '

								                "please use post_process='default' instead",

								                DeprecationWarning)

								            post_process = 'default'

								    elif post_process == 'default':

								        if unbiased is True:

								            warnings.warn(

								                'unbiased=True is deprecated, please use '

								                "post_process='unbiased' instead", DeprecationWarning)

								            post_process = 'unbiased'


								    # start processing

								    if post_process == 'megvii':

								        heatmaps = _gaussian_blur(heatmaps, kernel=kernel)


								    N, K, H, W = heatmaps.shape

								    if use_udp:

								        if target_type.lower() == 'GaussianHeatMap'.lower():

								            preds, maxvals = _get_max_preds(heatmaps)

								            preds = post_dark_udp(preds, heatmaps, kernel=kernel)

								        elif target_type.lower() == 'CombinedTarget'.lower():

								            for person_heatmaps in heatmaps:

								                for i, heatmap in enumerate(person_heatmaps):

								                    kt = 2 * kernel + 1 if i % 3 == 0 else kernel

								                    cv2.GaussianBlur(heatmap, (kt, kt), 0, heatmap)

								            # valid radius is in direct proportion to the height of heatmap.

								            valid_radius = valid_radius_factor * H

								            offset_x = heatmaps[:, 1::3, :].flatten() * valid_radius

								            offset_y = heatmaps[:, 2::3, :].flatten() * valid_radius

								            heatmaps = heatmaps[:, ::3, :]

								            preds, maxvals = _get_max_preds(heatmaps)

								            index = preds[..., 0] + preds[..., 1] * W

								            index += W * H * np.arange(0, N * K / 3)

								            index = index.astype(int).reshape(N, K // 3, 1)

								            preds += np.concatenate((offset_x[index], offset_y[index]), axis=2)

								        else:

								            raise ValueError('target_type should be either '

								                             "'GaussianHeatmap' or 'CombinedTarget'")

								    else:

								        preds, maxvals = _get_max_preds(heatmaps)

								        if post_process == 'unbiased':  # alleviate biased coordinate

								            # apply Gaussian distribution modulation.

								            heatmaps = np.log(

								                np.maximum(_gaussian_blur(heatmaps, kernel), 1e-10))

								            for n in range(N):

								                for k in range(K):

								                    preds[n][k] = _taylor(heatmaps[n][k], preds[n][k])

								        elif post_process is not None:

								            # add +/-0.25 shift to the predicted locations for higher acc.

								            for n in range(N):

								                for k in range(K):

								                    heatmap = heatmaps[n][k]

								                    px = int(preds[n][k][0])

								                    py = int(preds[n][k][1])

								                    if 1 < px < W - 1 and 1 < py < H - 1:

								                        diff = np.array([

								                            heatmap[py][px + 1] - heatmap[py][px - 1],

								                            heatmap[py + 1][px] - heatmap[py - 1][px]

								                        ])

								                        preds[n][k] += np.sign(diff) * .25

								                        if post_process == 'megvii':

								                            preds[n][k] += 0.5


								    # Transform back to the image

								    for i in range(N):

								        preds[i] = transform_preds(

								            preds[i], center[i], scale[i], [W, H], use_udp=use_udp)


								    if post_process == 'megvii':

								        maxvals = maxvals / 255.0 + 0.5


								    return preds, maxvals


								def keypoints_from_heatmaps3d(heatmaps, center, scale):

								    """Get final keypoint predictions from 3d heatmaps and transform them back

								    to the image.


								    Note:

								        - batch size: N

								        - num keypoints: K

								        - heatmap depth size: D

								        - heatmap height: H

								        - heatmap width: W


								    Args:

								        heatmaps (np.ndarray[N, K, D, H, W]): model predicted heatmaps.

								        center (np.ndarray[N, 2]): Center of the bounding box (x, y).

								        scale (np.ndarray[N, 2]): Scale of the bounding box

								            wrt height/width.


								    Returns:

								        tuple: A tuple containing keypoint predictions and scores.


								        - preds (np.ndarray[N, K, 3]): Predicted 3d keypoint location \

								            in images.

								        - maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints.

								    """

								    N, K, D, H, W = heatmaps.shape

								    preds, maxvals = _get_max_preds_3d(heatmaps)

								    # Transform back to the image

								    for i in range(N):

								        preds[i, :, :2] = transform_preds(preds[i, :, :2], center[i], scale[i],

								                                          [W, H])

								    return preds, maxvals


								def multilabel_classification_accuracy(pred, gt, mask, thr=0.5):

								    """Get multi-label classification accuracy.


								    Note:

								        - batch size: N

								        - label number: L


								    Args:

								        pred (np.ndarray[N, L, 2]): model predicted labels.

								        gt (np.ndarray[N, L, 2]): ground-truth labels.

								        mask (np.ndarray[N, 1] or np.ndarray[N, L] ): reliability of

								        ground-truth labels.


								    Returns:

								        float: multi-label classification accuracy.

								    """

								    # we only compute accuracy on the samples with ground-truth of all labels.

								    valid = (mask > 0).min(axis=1) if mask.ndim == 2 else (mask > 0)

								    pred, gt = pred[valid], gt[valid]


								    if pred.shape[0] == 0:

								        acc = 0.0  # when no sample is with gt labels, set acc to 0.

								    else:

								        # The classification of a sample is regarded as correct

								        # only if it's correct for all labels.

								        acc = (((pred - thr) * (gt - thr)) > 0).all(axis=1).mean()

								    return acc