1 changed files with 86 additions and 0 deletions
@ -0,0 +1,86 @@ |
|||
#!/usr/bin/env bash |
|||
# Copyright (c) OpenMMLab. All rights reserved. |
|||
import argparse |
|||
import time |
|||
|
|||
import torch |
|||
from mmcv import Config |
|||
from mmcv.cnn import fuse_conv_bn |
|||
from mmcv.parallel import MMDataParallel |
|||
from mmcv.runner.fp16_utils import wrap_fp16_model |
|||
|
|||
from mmpose.datasets import build_dataloader, build_dataset |
|||
from mmpose.models import build_posenet |
|||
|
|||
|
|||
def parse_args(): |
|||
parser = argparse.ArgumentParser( |
|||
description='MMPose benchmark a recognizer') |
|||
parser.add_argument('config', help='test config file path') |
|||
parser.add_argument('--bz', default=32, type=int, help='test config file path') |
|||
args = parser.parse_args() |
|||
return args |
|||
|
|||
|
|||
def main(): |
|||
args = parse_args() |
|||
|
|||
cfg = Config.fromfile(args.config) |
|||
|
|||
# Since we only care about the forward speed of the network |
|||
cfg.model.pretrained=None |
|||
cfg.model.test_cfg.flip_test=False |
|||
cfg.model.test_cfg.use_udp=False |
|||
cfg.model.test_cfg.post_process='none' |
|||
|
|||
# set cudnn_benchmark |
|||
if cfg.get('cudnn_benchmark', False): |
|||
torch.backends.cudnn.benchmark = True |
|||
|
|||
# build the dataloader |
|||
dataset = build_dataset(cfg.data.val) |
|||
data_loader = build_dataloader( |
|||
dataset, |
|||
samples_per_gpu=args.bz, |
|||
workers_per_gpu=cfg.data.workers_per_gpu, |
|||
dist=False, |
|||
shuffle=False) |
|||
|
|||
# build the model and load checkpoint |
|||
model = build_posenet(cfg.model) |
|||
model = MMDataParallel(model, device_ids=[0]) |
|||
model.eval() |
|||
|
|||
# get the example data |
|||
for i, data in enumerate(data_loader): |
|||
break |
|||
|
|||
# the first several iterations may be very slow so skip them |
|||
num_warmup = 100 |
|||
inference_times = 100 |
|||
|
|||
with torch.no_grad(): |
|||
start_time = time.perf_counter() |
|||
|
|||
for i in range(num_warmup): |
|||
torch.cuda.synchronize() |
|||
model(return_loss=False, **data) |
|||
torch.cuda.synchronize() |
|||
|
|||
elapsed = time.perf_counter() - start_time |
|||
print(f'warmup cost {elapsed} time') |
|||
|
|||
start_time = time.perf_counter() |
|||
|
|||
for i in range(inference_times): |
|||
torch.cuda.synchronize() |
|||
model(return_loss=False, **data) |
|||
torch.cuda.synchronize() |
|||
|
|||
elapsed = time.perf_counter() - start_time |
|||
fps = args.bz * inference_times / elapsed |
|||
print(f'the fps is {fps}') |
|||
|
|||
|
|||
if __name__ == '__main__': |
|||
main() |
Loading…
Reference in new issue