Compare commits

...

7 Commits
main ... 233

  1. 14
      configs/vitpose_sam/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTSam_base_coco_256x192.py
  2. 30
      mmpose/models/backbones/vit_sam.py
  3. 2
      tools/train+sam.py

14
configs/vitpose_sam/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTSam_base_coco_256x192.py

@ -4,6 +4,8 @@ _base_ = [
]
evaluation = dict(interval=1, metric='mAP', save_best='AP')
checkpoint_config = dict(interval=1) # 保存模型权重文件的间隔
optimizer = dict(type='AdamW',
lr=5e-4, betas=(0.9, 0.999), weight_decay=0.1,
constructor='LayerDecayOptimizerConstructor',
@ -28,7 +30,7 @@ lr_config = dict(
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
total_epochs = 20
target_type = 'GaussianHeatmap'
channel_cfg = dict(
num_output_channels=17,
@ -59,7 +61,7 @@ model = dict(
frozen_stages=12,
freeze_attn = True,
freeze_ffn = True,
samvit_checkpoint='/root/autodl-tmp/code/ViTPose/checkpoints/sam/sam_vit_b_01ec64.pth'
samvit_checkpoint='/home/fhw/code/ViTPose/checkpoints/sam/sam_vit_b_01ec64.pth'
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
@ -79,7 +81,7 @@ model = dict(
modulate_kernel=11,
use_udp=True))
data_root = '/root/autodl-tmp/dataset/coco2017/'
data_root = '/media/D/lxb/datasets/coco'
data_cfg = dict(
image_size=[192, 256],
@ -148,10 +150,10 @@ val_pipeline = [
test_pipeline = val_pipeline
data = dict(
samples_per_gpu=12,
samples_per_gpu=6,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=12),
test_dataloader=dict(samples_per_gpu=12),
val_dataloader=dict(samples_per_gpu=6),
test_dataloader=dict(samples_per_gpu=6),
train=dict(
type='TopDownCocoDataset',
ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',

30
mmpose/models/backbones/vit_sam.py

@ -206,10 +206,6 @@ class Cross_Attention(nn.Module):
head_dim = dim // num_heads
self.scale = qk_scale or head_dim ** -0.5
self.self_attn = Attention(
dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
attn_drop=attn_drop, proj_drop=0.)
self.linear_q = nn.Linear(dim, dim, bias=qkv_bias)
self.linear_k = nn.Linear(dim, dim, bias=qkv_bias)
self.linear_v = nn.Linear(dim, dim, bias=qkv_bias)
@ -262,7 +258,8 @@ class CustomAttentionFFN(nn.Module):
self.ffn = nn.Sequential(
nn.Linear(dim, dim * 4),
nn.GELU(),
nn.Linear(dim * 4, dim)
nn.Linear(dim * 4, dim),
nn.Dropout(proj_drop)
)
self.norm1 = nn.LayerNorm(dim)
self.norm2 = nn.LayerNorm(dim)
@ -332,11 +329,22 @@ class ViTSam(BaseBackbone):
for param in self.sam_vit.parameters():
param.requires_grad = False
# self.cross_attn = Cross_Attention(embed_dim, num_heads=num_heads, qkv_bias=qkv_bias, \
# 交叉注意力
self.cross_attn = Cross_Attention(embed_dim, num_heads=num_heads, qkv_bias=qkv_bias, \
qk_scale=qk_scale, attn_drop=attn_drop_rate, proj_drop=drop_rate)
# vit_token做自注意力后,再和sam_token做交叉注意力,得到的结果再经过FFN
# self.custom_attn_ffn = CustomAttentionFFN(embed_dim, num_heads=num_heads, qkv_bias=qkv_bias, \
# qk_scale=qk_scale, attn_drop=attn_drop_rate, proj_drop=drop_rate)
self.custom_attn_ffn = CustomAttentionFFN(embed_dim, num_heads=num_heads, qkv_bias=qkv_bias, \
qk_scale=qk_scale, attn_drop=attn_drop_rate, proj_drop=drop_rate)
# 在sam_encoder后面加一层ffn
self.sam_ffn = nn.Sequential(
nn.Linear(embed_dim, embed_dim * 4),
nn.GELU(),
nn.Linear(embed_dim * 4, embed_dim),
nn.Dropout(drop_rate)
)
self.sam_norm = norm_layer(embed_dim)
def _freeze_stages(self):
"""Freeze parameters."""
@ -434,9 +442,11 @@ class ViTSam(BaseBackbone):
# end_time = time.time()
# print('SAM-ViT forward time: {:.4f}秒'.format(end_time - start_time))
# x1 = x1 + self.cross_attn(x1, x2, x2)
x2 = self.sam_norm(x2 + self.sam_ffn(x2))
x1 = x1 + self.cross_attn(x1, x2, x2)
x1 = self.custom_attn_ffn(x1, x2)
# x1 = self.custom_attn_ffn(x1, x2)
xp = x1.permute(0, 2, 1).reshape(B, -1, Hp, Wp).contiguous() # B, C, Hp, Wp
return xp

2
tools/train+sam.py

@ -22,7 +22,7 @@ import mmcv_custom
def parse_args():
parser = argparse.ArgumentParser(description='Train a pose model')
parser.add_argument('config', help='train config file path')
parser.add_argument('-c', '--checkpoint', help='checkpoint file', default='/root/autodl-tmp/code/ViTPose/checkpoints/vitpose/vitpose-b.pth')
parser.add_argument('-c', '--checkpoint', help='checkpoint file', default='checkpoints/vitpose/vitpose-b.pth')
parser.add_argument('--work-dir', help='the dir to save logs and models')
parser.add_argument(
'--resume-from', help='the checkpoint file to resume from')

Loading…
Cancel
Save