ViTPose/tests/test_models/test_top_down_forward.py


								# Copyright (c) OpenMMLab. All rights reserved.

								import copy


								import numpy as np

								import torch


								from mmpose.models.detectors import PoseWarper, TopDown


								def test_vipnas_forward():

								    # model settings


								    channel_cfg = dict(

								        num_output_channels=17,

								        dataset_joints=17,

								        dataset_channel=[

								            [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],

								        ],

								        inference_channel=[

								            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16

								        ])


								    model_cfg = dict(

								        type='TopDown',

								        pretrained=None,

								        backbone=dict(type='ViPNAS_ResNet', depth=50),

								        keypoint_head=dict(

								            type='ViPNASHeatmapSimpleHead',

								            in_channels=608,

								            out_channels=channel_cfg['num_output_channels'],

								            loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),

								        train_cfg=dict(),

								        test_cfg=dict(

								            flip_test=True,

								            post_process='default',

								            shift_heatmap=True,

								            modulate_kernel=11))


								    detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],

								                       model_cfg['train_cfg'], model_cfg['test_cfg'],

								                       model_cfg['pretrained'])


								    input_shape = (1, 3, 256, 256)

								    mm_inputs = _demo_mm_inputs(input_shape)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)


								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)


								def test_topdown_forward():

								    model_cfg = dict(

								        type='TopDown',

								        pretrained=None,

								        backbone=dict(type='ResNet', depth=18),

								        keypoint_head=dict(

								            type='TopdownHeatmapSimpleHead',

								            in_channels=512,

								            out_channels=17,

								            loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),

								        train_cfg=dict(),

								        test_cfg=dict(

								            flip_test=True,

								            post_process='default',

								            shift_heatmap=True,

								            modulate_kernel=11))


								    detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],

								                       model_cfg['train_cfg'], model_cfg['test_cfg'],

								                       model_cfg['pretrained'])


								    detector.init_weights()


								    input_shape = (1, 3, 256, 256)

								    mm_inputs = _demo_mm_inputs(input_shape)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)


								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)


								    # flip test

								    model_cfg = dict(

								        type='TopDown',

								        pretrained=None,

								        backbone=dict(

								            type='HourglassNet',

								            num_stacks=1,

								        ),

								        keypoint_head=dict(

								            type='TopdownHeatmapMultiStageHead',

								            in_channels=256,

								            out_channels=17,

								            num_stages=1,

								            num_deconv_layers=0,

								            extra=dict(final_conv_kernel=1, ),

								            loss_keypoint=dict(type='JointsMSELoss', use_target_weight=False)),

								        train_cfg=dict(),

								        test_cfg=dict(

								            flip_test=True,

								            post_process='default',

								            shift_heatmap=True,

								            modulate_kernel=11))


								    detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],

								                       model_cfg['train_cfg'], model_cfg['test_cfg'],

								                       model_cfg['pretrained'])


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)


								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)


								    model_cfg = dict(

								        type='TopDown',

								        pretrained=None,

								        backbone=dict(

								            type='HourglassNet',

								            num_stacks=1,

								        ),

								        keypoint_head=dict(

								            type='TopdownHeatmapMultiStageHead',

								            in_channels=256,

								            out_channels=17,

								            num_stages=1,

								            num_deconv_layers=0,

								            extra=dict(final_conv_kernel=1, ),

								            loss_keypoint=[

								                dict(

								                    type='JointsMSELoss',

								                    use_target_weight=True,

								                    loss_weight=1.)

								            ]),

								        train_cfg=dict(),

								        test_cfg=dict(

								            flip_test=True,

								            post_process='default',

								            shift_heatmap=True,

								            modulate_kernel=11))


								    detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],

								                       model_cfg['train_cfg'], model_cfg['test_cfg'],

								                       model_cfg['pretrained'])


								    detector.init_weights()


								    input_shape = (1, 3, 256, 256)

								    mm_inputs = _demo_mm_inputs(input_shape, num_outputs=None)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)

								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)


								    model_cfg = dict(

								        type='TopDown',

								        pretrained=None,

								        backbone=dict(

								            type='RSN',

								            unit_channels=256,

								            num_stages=1,

								            num_units=4,

								            num_blocks=[2, 2, 2, 2],

								            num_steps=4,

								            norm_cfg=dict(type='BN')),

								        keypoint_head=dict(

								            type='TopdownHeatmapMSMUHead',

								            out_shape=(64, 48),

								            unit_channels=256,

								            out_channels=17,

								            num_stages=1,

								            num_units=4,

								            use_prm=False,

								            norm_cfg=dict(type='BN'),

								            loss_keypoint=[dict(type='JointsMSELoss', use_target_weight=True)]

								            * 3 + [dict(type='JointsOHKMMSELoss', use_target_weight=True)]),

								        train_cfg=dict(num_units=4),

								        test_cfg=dict(

								            flip_test=True,

								            post_process='default',

								            shift_heatmap=False,

								            unbiased_decoding=False,

								            modulate_kernel=5))


								    detector = TopDown(model_cfg['backbone'], None, model_cfg['keypoint_head'],

								                       model_cfg['train_cfg'], model_cfg['test_cfg'],

								                       model_cfg['pretrained'])


								    detector.init_weights()


								    input_shape = (1, 3, 256, 192)

								    mm_inputs = _demo_mm_inputs(input_shape, num_outputs=4)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)

								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

								        _ = detector.forward_dummy(imgs)


								def test_posewarper_forward():

								    # test PoseWarper

								    model_cfg = dict(

								        type='PoseWarper',

								        pretrained=None,

								        backbone=dict(

								            type='HRNet',

								            in_channels=3,

								            extra=dict(

								                stage1=dict(

								                    num_modules=1,

								                    num_branches=1,

								                    block='BOTTLENECK',

								                    num_blocks=(4, ),

								                    num_channels=(64, )),

								                stage2=dict(

								                    num_modules=1,

								                    num_branches=2,

								                    block='BASIC',

								                    num_blocks=(4, 4),

								                    num_channels=(48, 96)),

								                stage3=dict(

								                    num_modules=4,

								                    num_branches=3,

								                    block='BASIC',

								                    num_blocks=(4, 4, 4),

								                    num_channels=(48, 96, 192)),

								                stage4=dict(

								                    num_modules=3,

								                    num_branches=4,

								                    block='BASIC',

								                    num_blocks=(4, 4, 4, 4),

								                    num_channels=(48, 96, 192, 384))),

								            frozen_stages=4,

								        ),

								        concat_tensors=True,

								        neck=dict(

								            type='PoseWarperNeck',

								            in_channels=48,

								            freeze_trans_layer=True,

								            out_channels=17,

								            inner_channels=128,

								            deform_groups=17,

								            dilations=(3, 6, 12, 18, 24),

								            trans_conv_kernel=1,

								            res_blocks_cfg=dict(block='BASIC', num_blocks=20),

								            offsets_kernel=3,

								            deform_conv_kernel=3),

								        keypoint_head=dict(

								            type='TopdownHeatmapSimpleHead',

								            in_channels=17,

								            out_channels=17,

								            num_deconv_layers=0,

								            extra=dict(final_conv_kernel=0, ),

								            loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),

								        train_cfg=dict(),

								        test_cfg=dict(

								            flip_test=False,

								            post_process='default',

								            shift_heatmap=True,

								            modulate_kernel=11))


								    detector = PoseWarper(model_cfg['backbone'], model_cfg['neck'],

								                          model_cfg['keypoint_head'], model_cfg['train_cfg'],

								                          model_cfg['test_cfg'], model_cfg['pretrained'], None,

								                          model_cfg['concat_tensors'])

								    assert detector.concat_tensors


								    detector.init_weights()


								    input_shape = (2, 3, 64, 64)

								    num_frames = 2

								    mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)


								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

								        _ = detector.forward_dummy(imgs)


								    # test argument 'concat_tensors'

								    model_cfg_copy = copy.deepcopy(model_cfg)

								    model_cfg_copy['concat_tensors'] = False


								    detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],

								                          model_cfg_copy['keypoint_head'],

								                          model_cfg_copy['train_cfg'],

								                          model_cfg_copy['test_cfg'],

								                          model_cfg_copy['pretrained'], None,

								                          model_cfg_copy['concat_tensors'])

								    assert not detector.concat_tensors


								    detector.init_weights()


								    input_shape = (2, 3, 64, 64)

								    num_frames = 2

								    mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)


								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

								        _ = detector.forward_dummy(imgs)


								    # flip test

								    model_cfg_copy = copy.deepcopy(model_cfg)

								    model_cfg_copy['test_cfg']['flip_test'] = True


								    detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],

								                          model_cfg_copy['keypoint_head'],

								                          model_cfg_copy['train_cfg'],

								                          model_cfg_copy['test_cfg'],

								                          model_cfg_copy['pretrained'], None,

								                          model_cfg_copy['concat_tensors'])


								    detector.init_weights()


								    input_shape = (1, 3, 64, 64)

								    num_frames = 2

								    mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)


								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

								        _ = detector.forward_dummy(imgs)


								    # test different number of dilations

								    model_cfg_copy = copy.deepcopy(model_cfg)

								    model_cfg_copy['neck']['dilations'] = (3, 6, 12)


								    detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],

								                          model_cfg_copy['keypoint_head'],

								                          model_cfg_copy['train_cfg'],

								                          model_cfg_copy['test_cfg'],

								                          model_cfg_copy['pretrained'], None,

								                          model_cfg_copy['concat_tensors'])


								    detector.init_weights()


								    input_shape = (2, 3, 64, 64)

								    num_frames = 2

								    mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)


								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

								        _ = detector.forward_dummy(imgs)


								    # test different backbone, change head accordingly

								    model_cfg_copy = copy.deepcopy(model_cfg)

								    model_cfg_copy['backbone'] = dict(type='ResNet', depth=18)

								    model_cfg_copy['neck']['in_channels'] = 512

								    model_cfg_copy['keypoint_head'] = dict(

								        type='TopdownHeatmapSimpleHead',

								        in_channels=17,

								        out_channels=17,

								        loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True))


								    detector = PoseWarper(model_cfg_copy['backbone'], model_cfg_copy['neck'],

								                          model_cfg_copy['keypoint_head'],

								                          model_cfg_copy['train_cfg'],

								                          model_cfg_copy['test_cfg'],

								                          model_cfg_copy['pretrained'], None,

								                          model_cfg_copy['concat_tensors'])


								    detector.init_weights()


								    input_shape = (1, 3, 64, 64)

								    num_frames = 2

								    mm_inputs = _demo_mm_inputs(input_shape, None, num_frames)


								    imgs = mm_inputs.pop('imgs')

								    target = mm_inputs.pop('target')

								    target_weight = mm_inputs.pop('target_weight')

								    img_metas = mm_inputs.pop('img_metas')


								    # Test forward train

								    losses = detector.forward(

								        imgs, target, target_weight, img_metas, return_loss=True)

								    assert isinstance(losses, dict)


								    # Test forward test

								    with torch.no_grad():

								        _ = detector.forward(imgs, img_metas=img_metas, return_loss=False)

								        _ = detector.forward_dummy(imgs)


								def _demo_mm_inputs(

								        input_shape=(1, 3, 256, 256), num_outputs=None, num_frames=1):

								    """Create a superset of inputs needed to run test or train batches.


								    Args:

								        input_shape (tuple):

								            input batch dimensions

								        num_frames (int):

								            number of frames for each sample, default: 1,

								            if larger than 1, return a list of tensors

								    """

								    (N, C, H, W) = input_shape


								    rng = np.random.RandomState(0)


								    imgs = rng.rand(*input_shape)

								    if num_outputs is not None:

								        target = np.zeros([N, num_outputs, 17, H // 4, W // 4],

								                          dtype=np.float32)

								        target_weight = np.ones([N, num_outputs, 17, 1], dtype=np.float32)

								    else:

								        target = np.zeros([N, 17, H // 4, W // 4], dtype=np.float32)

								        target_weight = np.ones([N, 17, 1], dtype=np.float32)


								    img_metas = [{

								        'img_shape': (H, W, C),

								        'center': np.array([W / 2, H / 2]),

								        'scale': np.array([0.5, 0.5]),

								        'bbox_score': 1.0,

								        'bbox_id': 0,

								        'flip_pairs': [],

								        'inference_channel': np.arange(17),

								        'image_file': '<demo>.png',

								        'frame_weight': np.random.uniform(0, 1, num_frames),

								    } for _ in range(N)]


								    mm_inputs = {

								        'target': torch.FloatTensor(target),

								        'target_weight': torch.FloatTensor(target_weight),

								        'img_metas': img_metas

								    }


								    if num_frames == 1:

								        imgs = torch.FloatTensor(rng.rand(*input_shape)).requires_grad_(True)

								    else:


								        imgs = [

								            torch.FloatTensor(rng.rand(*input_shape)).requires_grad_(True)

								            for _ in range(num_frames)

								        ]


								    mm_inputs['imgs'] = imgs

								    return mm_inputs