You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
176 lines
5.7 KiB
176 lines
5.7 KiB
# Copyright (c) OpenMMLab. All rights reserved.
|
|
import numpy as np
|
|
import torch.nn as nn
|
|
from mmcv.cnn import normal_init
|
|
|
|
from mmpose.core.evaluation import (keypoint_pck_accuracy,
|
|
keypoints_from_regression)
|
|
from mmpose.core.post_processing import fliplr_regression
|
|
from mmpose.models.builder import HEADS, build_loss
|
|
|
|
|
|
@HEADS.register_module()
|
|
class DeepposeRegressionHead(nn.Module):
|
|
"""Deeppose regression head with fully connected layers.
|
|
|
|
"DeepPose: Human Pose Estimation via Deep Neural Networks".
|
|
|
|
Args:
|
|
in_channels (int): Number of input channels
|
|
num_joints (int): Number of joints
|
|
loss_keypoint (dict): Config for keypoint loss. Default: None.
|
|
"""
|
|
|
|
def __init__(self,
|
|
in_channels,
|
|
num_joints,
|
|
loss_keypoint=None,
|
|
train_cfg=None,
|
|
test_cfg=None):
|
|
super().__init__()
|
|
|
|
self.in_channels = in_channels
|
|
self.num_joints = num_joints
|
|
|
|
self.loss = build_loss(loss_keypoint)
|
|
|
|
self.train_cfg = {} if train_cfg is None else train_cfg
|
|
self.test_cfg = {} if test_cfg is None else test_cfg
|
|
|
|
self.fc = nn.Linear(self.in_channels, self.num_joints * 2)
|
|
|
|
def forward(self, x):
|
|
"""Forward function."""
|
|
output = self.fc(x)
|
|
N, C = output.shape
|
|
return output.reshape([N, C // 2, 2])
|
|
|
|
def get_loss(self, output, target, target_weight):
|
|
"""Calculate top-down keypoint loss.
|
|
|
|
Note:
|
|
- batch_size: N
|
|
- num_keypoints: K
|
|
|
|
Args:
|
|
output (torch.Tensor[N, K, 2]): Output keypoints.
|
|
target (torch.Tensor[N, K, 2]): Target keypoints.
|
|
target_weight (torch.Tensor[N, K, 2]):
|
|
Weights across different joint types.
|
|
"""
|
|
|
|
losses = dict()
|
|
assert not isinstance(self.loss, nn.Sequential)
|
|
assert target.dim() == 3 and target_weight.dim() == 3
|
|
losses['reg_loss'] = self.loss(output, target, target_weight)
|
|
|
|
return losses
|
|
|
|
def get_accuracy(self, output, target, target_weight):
|
|
"""Calculate accuracy for top-down keypoint loss.
|
|
|
|
Note:
|
|
- batch_size: N
|
|
- num_keypoints: K
|
|
|
|
Args:
|
|
output (torch.Tensor[N, K, 2]): Output keypoints.
|
|
target (torch.Tensor[N, K, 2]): Target keypoints.
|
|
target_weight (torch.Tensor[N, K, 2]):
|
|
Weights across different joint types.
|
|
"""
|
|
|
|
accuracy = dict()
|
|
|
|
N = output.shape[0]
|
|
|
|
_, avg_acc, cnt = keypoint_pck_accuracy(
|
|
output.detach().cpu().numpy(),
|
|
target.detach().cpu().numpy(),
|
|
target_weight[:, :, 0].detach().cpu().numpy() > 0,
|
|
thr=0.05,
|
|
normalize=np.ones((N, 2), dtype=np.float32))
|
|
accuracy['acc_pose'] = avg_acc
|
|
|
|
return accuracy
|
|
|
|
def inference_model(self, x, flip_pairs=None):
|
|
"""Inference function.
|
|
|
|
Returns:
|
|
output_regression (np.ndarray): Output regression.
|
|
|
|
Args:
|
|
x (torch.Tensor[N, K, 2]): Input features.
|
|
flip_pairs (None | list[tuple()):
|
|
Pairs of keypoints which are mirrored.
|
|
"""
|
|
output = self.forward(x)
|
|
|
|
if flip_pairs is not None:
|
|
output_regression = fliplr_regression(
|
|
output.detach().cpu().numpy(), flip_pairs)
|
|
else:
|
|
output_regression = output.detach().cpu().numpy()
|
|
return output_regression
|
|
|
|
def decode(self, img_metas, output, **kwargs):
|
|
"""Decode the keypoints from output regression.
|
|
|
|
Args:
|
|
img_metas (list(dict)): Information about data augmentation
|
|
By default this includes:
|
|
|
|
- "image_file: path to the image file
|
|
- "center": center of the bbox
|
|
- "scale": scale of the bbox
|
|
- "rotation": rotation of the bbox
|
|
- "bbox_score": score of bbox
|
|
output (np.ndarray[N, K, 2]): predicted regression vector.
|
|
kwargs: dict contains 'img_size'.
|
|
img_size (tuple(img_width, img_height)): input image size.
|
|
"""
|
|
batch_size = len(img_metas)
|
|
|
|
if 'bbox_id' in img_metas[0]:
|
|
bbox_ids = []
|
|
else:
|
|
bbox_ids = None
|
|
|
|
c = np.zeros((batch_size, 2), dtype=np.float32)
|
|
s = np.zeros((batch_size, 2), dtype=np.float32)
|
|
image_paths = []
|
|
score = np.ones(batch_size)
|
|
for i in range(batch_size):
|
|
c[i, :] = img_metas[i]['center']
|
|
s[i, :] = img_metas[i]['scale']
|
|
image_paths.append(img_metas[i]['image_file'])
|
|
|
|
if 'bbox_score' in img_metas[i]:
|
|
score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)
|
|
if bbox_ids is not None:
|
|
bbox_ids.append(img_metas[i]['bbox_id'])
|
|
|
|
preds, maxvals = keypoints_from_regression(output, c, s,
|
|
kwargs['img_size'])
|
|
|
|
all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)
|
|
all_boxes = np.zeros((batch_size, 6), dtype=np.float32)
|
|
all_preds[:, :, 0:2] = preds[:, :, 0:2]
|
|
all_preds[:, :, 2:3] = maxvals
|
|
all_boxes[:, 0:2] = c[:, 0:2]
|
|
all_boxes[:, 2:4] = s[:, 0:2]
|
|
all_boxes[:, 4] = np.prod(s * 200.0, axis=1)
|
|
all_boxes[:, 5] = score
|
|
|
|
result = {}
|
|
|
|
result['preds'] = all_preds
|
|
result['boxes'] = all_boxes
|
|
result['image_paths'] = image_paths
|
|
result['bbox_ids'] = bbox_ids
|
|
|
|
return result
|
|
|
|
def init_weights(self):
|
|
normal_init(self.fc, mean=0, std=0.01, bias=0)
|
|
|