Browse Source

register vitpose-b/l/h for coco

bug fix

update readme

rename

add giant results

sync drive path

trest
233
Yufei 3 years ago
parent
commit
33f49cf1f2
  1. 8
      CITATION.cff
  2. 203
      LICENSE
  3. 5
      MANIFEST.in
  4. 87
      README.md
  5. 384
      configs/_base_/datasets/300w.py
  6. 83
      configs/_base_/datasets/aflw.py
  7. 140
      configs/_base_/datasets/aic.py
  8. 166
      configs/_base_/datasets/animalpose.py
  9. 142
      configs/_base_/datasets/ap10k.py
  10. 144
      configs/_base_/datasets/atrw.py
  11. 181
      configs/_base_/datasets/coco.py
  12. 1154
      configs/_base_/datasets/coco_wholebody.py
  13. 448
      configs/_base_/datasets/coco_wholebody_face.py
  14. 147
      configs/_base_/datasets/coco_wholebody_hand.py
  15. 134
      configs/_base_/datasets/cofw.py
  16. 147
      configs/_base_/datasets/crowdpose.py
  17. 74
      configs/_base_/datasets/deepfashion_full.py
  18. 46
      configs/_base_/datasets/deepfashion_lower.py
  19. 60
      configs/_base_/datasets/deepfashion_upper.py
  20. 237
      configs/_base_/datasets/fly.py
  21. 144
      configs/_base_/datasets/freihand2d.py
  22. 152
      configs/_base_/datasets/h36m.py
  23. 1157
      configs/_base_/datasets/halpe.py
  24. 201
      configs/_base_/datasets/horse10.py
  25. 142
      configs/_base_/datasets/interhand2d.py
  26. 487
      configs/_base_/datasets/interhand3d.py
  27. 129
      configs/_base_/datasets/jhmdb.py
  28. 263
      configs/_base_/datasets/locust.py
  29. 183
      configs/_base_/datasets/macaque.py
  30. 156
      configs/_base_/datasets/mhp.py
  31. 132
      configs/_base_/datasets/mpi_inf_3dhp.py
  32. 155
      configs/_base_/datasets/mpii.py
  33. 380
      configs/_base_/datasets/mpii_trb.py
  34. 181
      configs/_base_/datasets/ochuman.py
  35. 142
      configs/_base_/datasets/onehand10k.py
  36. 160
      configs/_base_/datasets/panoptic_body3d.py
  37. 143
      configs/_base_/datasets/panoptic_hand2d.py
  38. 176
      configs/_base_/datasets/posetrack18.py
  39. 141
      configs/_base_/datasets/rhd2d.py
  40. 582
      configs/_base_/datasets/wflw.py
  41. 64
      configs/_base_/datasets/zebra.py
  42. 19
      configs/_base_/default_runtime.py
  43. 0
      configs/_base_/filters/gausian_filter.py
  44. 18
      configs/animal/2d_kpt_sview_rgb_img/README.md
  45. 7
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/README.md
  46. 40
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.md
  47. 40
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.yml
  48. 172
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py
  49. 172
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py
  50. 141
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
  51. 141
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
  52. 141
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
  53. 41
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.md
  54. 56
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml
  55. 41
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.md
  56. 40
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml
  57. 172
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
  58. 172
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
  59. 141
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
  60. 141
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
  61. 41
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.md
  62. 40
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml
  63. 40
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.md
  64. 40
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml
  65. 170
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
  66. 170
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
  67. 139
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py
  68. 139
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py
  69. 139
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py
  70. 41
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.md
  71. 56
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.yml
  72. 130
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py
  73. 130
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py
  74. 130
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py
  75. 44
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.md
  76. 50
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.yml
  77. 44
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.md
  78. 86
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.yml
  79. 164
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py
  80. 164
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py
  81. 164
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py
  82. 164
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py
  83. 164
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py
  84. 164
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py
  85. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py
  86. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py
  87. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py
  88. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py
  89. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py
  90. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py
  91. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py
  92. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py
  93. 133
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py
  94. 47
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.md
  95. 125
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.yml
  96. 130
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py
  97. 130
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py
  98. 130
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py
  99. 43
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.md
  100. 50
      configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.yml

8
CITATION.cff

@ -0,0 +1,8 @@
cff-version: 1.2.0
message: "If you use this software, please cite it as below."
authors:
- name: "MMPose Contributors"
title: "OpenMMLab Pose Estimation Toolbox and Benchmark"
date-released: 2020-08-31
url: "https://github.com/open-mmlab/mmpose"
license: Apache-2.0

203
LICENSE

@ -0,0 +1,203 @@
Copyright 2018-2020 Open-MMLab. All rights reserved.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2018-2020 Open-MMLab.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

5
MANIFEST.in

@ -0,0 +1,5 @@
include requirements/*.txt
include mmpose/.mim/model-index.yml
recursive-include mmpose/.mim/configs *.py *.yml
recursive-include mmpose/.mim/tools *.py *.sh
recursive-include mmpose/.mim/demo *.py

87
README.md

@ -19,15 +19,80 @@
This branch contains the pytorch implementation of <a href="https://arxiv.org/abs/2204.12484">ViTPose: Simple Vision Transformer Baselines for Human Pose Estimation</a>. It obtains 81.1 AP on MS COCO Keypoint test-dev set.
## Results from this repo on MS COCO val set
## Results from this repo on MS COCO val set (single task training)
Using detection results from a detector that obtains 56 mAP on person.
Using detection results from a detector that obtains 56 mAP on person. The configs here are for both training and test.
> With classic decoder
| Model | Pretrain | Resolution | AP | AR | config | log | weight |
| :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: |
| ViT-Base | MAE | 256x192 | 75.8 | 81.1 | config | [log](logs/vit-base.log.json) | |
| ViT-Large | MAE | 256x192 | 78.3 | 83.5 | config | [log](logs/vit-large.log.json) | |
| ViT-Huge | MAE | 256x192 | 79.1 | 84.1 | config | [log](logs/vit-huge.log.json) | |
| ViTPose-B | MAE | 256x192 | 75.8 | 81.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py) | [log](logs/vitpose-b.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSMjp1_NrV3VRSmK?e=Q1uZKs) |
| ViTPose-L | MAE | 256x192 | 78.3 | 83.5 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py) | [log](logs/vitpose-l.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSd9k_kuktPtiP4F?e=K7DGYT) |
| ViTPose-H | MAE | 256x192 | 79.1 | 84.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_coco_256x192.py) | [log](logs/vitpose-h.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgShLMI-kkmvNfF_h?e=dEhGHe) |
> With simple decoder
| Model | Pretrain | Resolution | AP | AR | config | log | weight |
| :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: |
| ViTPose-B | MAE | 256x192 | 75.5 | 80.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_simple_coco_256x192.py) | [log](logs/vitpose-b-simple.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSRPKrD5PmDRiv0R?e=jifvOe) |
| ViTPose-L | MAE | 256x192 | 78.2 | 83.4 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_simple_coco_256x192.py) | [log](logs/vitpose-l-simple.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSVS6DP2LmKwZ3sm?e=MmCvDT) |
| ViTPose-H | MAE | 256x192 | 78.9 | 84.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_simple_coco_256x192.py) | [log](logs/vitpose-h-simple.log.json) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSbHyN2mjh2n2LyG?e=y0FgMK) |
## Results from this repo on MS COCO val set (multi task training)
Using detection results from a detector that obtains 56 mAP on person. Note the configs here are only for evaluation.
| Model | Dataset | Resolution | AP | AR | config | weight |
| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
| ViTPose-B | COCO+AIC+MPII+CrowdPose | 256x192 | 77.5 | 82.6 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_base_coco_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSrlMB093JzJtqq-?e=Jr5S3R) |
| ViTPose-L | COCO+AIC+MPII+CrowdPose | 256x192 | 79.1 | 84.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_large_coco_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTBm3dCVmBUbHYT6?e=fHUrTq) |
| ViTPose-H | COCO+AIC+MPII+CrowdPose | 256x192 | 79.8 | 84.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/ViTPose_huge_coco_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgS5rLeRAJiWobCdh?e=41GsDd) |
| ViTPose-G | COCO+AIC+MPII+CrowdPose | 576x432 | 81.0 | 85.6 | | |
## Results from this repo on OCHuman test set (multi task training)
Using groundtruth bounding boxes. Note the configs here are only for evaluation.
| Model | Dataset | Resolution | AP | AR | config | weight |
| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
| ViTPose-B | COCO+AIC+MPII+CrowdPose | 256x192 | 88.2 | 90.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_base_ochuman_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSrlMB093JzJtqq-?e=Jr5S3R) |
| ViTPose-L | COCO+AIC+MPII+CrowdPose | 256x192 | 91.5 | 92.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_large_ochuman_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTBm3dCVmBUbHYT6?e=fHUrTq) |
| ViTPose-H | COCO+AIC+MPII+CrowdPose | 256x192 | 91.6 | 92.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/ochuman/ViTPose_huge_ochuman_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgS5rLeRAJiWobCdh?e=41GsDd) |
| ViTPose-G | COCO+AIC+MPII+CrowdPose | 576x432 | 93.3 | 94.3 | | |
## Results from this repo on CrowdPose test set (multi task training)
Using YOLOv3 human detector. Note the configs here are only for evaluation.
| Model | Dataset | Resolution | AP | AP(H) | config | weight |
| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
| ViTPose-B | COCO+AIC+MPII+CrowdPose | 256x192 | 74.7 | 63.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_base_crowdpose_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgStrrCb91cPlaxJx?e=6Xobo6) |
| ViTPose-L | COCO+AIC+MPII+CrowdPose | 256x192 | 76.6 | 65.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_large_crowdpose_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTK3dug-r7c6GFyu?e=1ZBpEG) |
| ViTPose-H | COCO+AIC+MPII+CrowdPose | 256x192 | 76.3 | 65.6 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/crowdpose/ViTPose_huge_crowdpose_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgS-oAvEV4MTD--Xr?e=EeW2Fu) |
| ViTPose-G | COCO+AIC+MPII+CrowdPose | 576x432 | 78.3 | 67.9 | | |
## Results from this repo on MPII val set (multi task training)
Using groundtruth bounding boxes. Note the configs here are only for evaluation. The metric is PCKh.
| Model | Dataset | Resolution | Mean | config | weight |
| :----: | :----: | :----: | :----: | :----: | :----: |
| ViTPose-B | COCO+AIC+MPII+CrowdPose | 256x192 | 93.4 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_base_mpii_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSy_OSEm906wd2LB?e=GOSg14) |
| ViTPose-L | COCO+AIC+MPII+CrowdPose | 256x192 | 93.9 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_large_mpii_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTM32I6Kpjr-esl6?e=qvh0Yl) |
| ViTPose-H | COCO+AIC+MPII+CrowdPose | 256x192 | 94.1 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/mpii/ViTPose_huge_mpii_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTT90XEQBKy-scIH?e=D2WhTS) |
| ViTPose-G | COCO+AIC+MPII+CrowdPose | 576x432 | 94.3 | | |
## Results from this repo on AI Challenger test set (multi task training)
Using groundtruth bounding boxes. Note the configs here are only for evaluation.
| Model | Dataset | Resolution | AP | AR | config | weight |
| :----: | :----: | :----: | :----: | :----: | :----: | :----: |
| ViTPose-B | COCO+AIC+MPII+CrowdPose | 256x192 | 31.9 | 36.3 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_base_aic_256x192.py) |[Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgSlvdVaXTC92SHYH?e=j7iqcp) |
| ViTPose-L | COCO+AIC+MPII+CrowdPose | 256x192 | 34.6 | 39.0 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_base_aic_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgTF06FX3FSAm0MOH?e=rYts9F) |
| ViTPose-H | COCO+AIC+MPII+CrowdPose | 256x192 | 35.3 | 39.8 | [config](configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/aic/ViTPose_base_aic_256x192.py) | [Onedrive](https://1drv.ms/u/s!AimBgYV7JjTlgS1MRmb2mcow_K04?e=q9jPab) |
| ViTPose-G | COCO+AIC+MPII+CrowdPose | 576x432 | 43.2 | 47.1 | | |
## Updates
@ -66,13 +131,21 @@ bash tools/dist_train.sh <Config PATH> <NUM GPUs> --cfg-options model.pretrained
python -m torch.distributed.launch --nnodes <Num Machines> --node_rank <Rank of Machine> --nproc_per_node <GPUs Per Machine> --master_addr <Master Addr> --master_port <Master Port> tools/train.py <Config PATH> --cfg-options model.pretrained=<Pretrained PATH> --launcher pytorch --seed 0
```
To test the pretrained models performance, please run
```bash
bash tools/dist_test.sh <Config PATH> <Checkpoint PATH> <NUM GPUs>
```
## Todo
This repo current contains modifications including:
- [ ] Upload configs and pretrained models
- [x] Upload configs and pretrained models
- [x] More models with SOTA results
- [ ] More models with SOTA results
- [ ] Upload multi-task training config
## Acknowledge
We acknowledge the excellent implementation from [mmpose](https://github.com/open-mmlab/mmdetection) and [MAE](https://github.com/facebookresearch/mae).

384
configs/_base_/datasets/300w.py

@ -0,0 +1,384 @@
dataset_info = dict(
dataset_name='300w',
paper_info=dict(
author='Sagonas, Christos and Antonakos, Epameinondas '
'and Tzimiropoulos, Georgios and Zafeiriou, Stefanos '
'and Pantic, Maja',
title='300 faces in-the-wild challenge: '
'Database and results',
container='Image and vision computing',
year='2016',
homepage='https://ibug.doc.ic.ac.uk/resources/300-W/',
),
keypoint_info={
0:
dict(
name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-16'),
1:
dict(
name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-15'),
2:
dict(
name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-14'),
3:
dict(
name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-13'),
4:
dict(
name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-12'),
5:
dict(
name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-11'),
6:
dict(
name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-10'),
7:
dict(name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-9'),
8:
dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap=''),
9:
dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-7'),
10:
dict(
name='kpt-10', id=10, color=[255, 255, 255], type='',
swap='kpt-6'),
11:
dict(
name='kpt-11', id=11, color=[255, 255, 255], type='',
swap='kpt-5'),
12:
dict(
name='kpt-12', id=12, color=[255, 255, 255], type='',
swap='kpt-4'),
13:
dict(
name='kpt-13', id=13, color=[255, 255, 255], type='',
swap='kpt-3'),
14:
dict(
name='kpt-14', id=14, color=[255, 255, 255], type='',
swap='kpt-2'),
15:
dict(
name='kpt-15', id=15, color=[255, 255, 255], type='',
swap='kpt-1'),
16:
dict(
name='kpt-16', id=16, color=[255, 255, 255], type='',
swap='kpt-0'),
17:
dict(
name='kpt-17',
id=17,
color=[255, 255, 255],
type='',
swap='kpt-26'),
18:
dict(
name='kpt-18',
id=18,
color=[255, 255, 255],
type='',
swap='kpt-25'),
19:
dict(
name='kpt-19',
id=19,
color=[255, 255, 255],
type='',
swap='kpt-24'),
20:
dict(
name='kpt-20',
id=20,
color=[255, 255, 255],
type='',
swap='kpt-23'),
21:
dict(
name='kpt-21',
id=21,
color=[255, 255, 255],
type='',
swap='kpt-22'),
22:
dict(
name='kpt-22',
id=22,
color=[255, 255, 255],
type='',
swap='kpt-21'),
23:
dict(
name='kpt-23',
id=23,
color=[255, 255, 255],
type='',
swap='kpt-20'),
24:
dict(
name='kpt-24',
id=24,
color=[255, 255, 255],
type='',
swap='kpt-19'),
25:
dict(
name='kpt-25',
id=25,
color=[255, 255, 255],
type='',
swap='kpt-18'),
26:
dict(
name='kpt-26',
id=26,
color=[255, 255, 255],
type='',
swap='kpt-17'),
27:
dict(name='kpt-27', id=27, color=[255, 255, 255], type='', swap=''),
28:
dict(name='kpt-28', id=28, color=[255, 255, 255], type='', swap=''),
29:
dict(name='kpt-29', id=29, color=[255, 255, 255], type='', swap=''),
30:
dict(name='kpt-30', id=30, color=[255, 255, 255], type='', swap=''),
31:
dict(
name='kpt-31',
id=31,
color=[255, 255, 255],
type='',
swap='kpt-35'),
32:
dict(
name='kpt-32',
id=32,
color=[255, 255, 255],
type='',
swap='kpt-34'),
33:
dict(name='kpt-33', id=33, color=[255, 255, 255], type='', swap=''),
34:
dict(
name='kpt-34',
id=34,
color=[255, 255, 255],
type='',
swap='kpt-32'),
35:
dict(
name='kpt-35',
id=35,
color=[255, 255, 255],
type='',
swap='kpt-31'),
36:
dict(
name='kpt-36',
id=36,
color=[255, 255, 255],
type='',
swap='kpt-45'),
37:
dict(
name='kpt-37',
id=37,
color=[255, 255, 255],
type='',
swap='kpt-44'),
38:
dict(
name='kpt-38',
id=38,
color=[255, 255, 255],
type='',
swap='kpt-43'),
39:
dict(
name='kpt-39',
id=39,
color=[255, 255, 255],
type='',
swap='kpt-42'),
40:
dict(
name='kpt-40',
id=40,
color=[255, 255, 255],
type='',
swap='kpt-47'),
41:
dict(
name='kpt-41',
id=41,
color=[255, 255, 255],
type='',
swap='kpt-46'),
42:
dict(
name='kpt-42',
id=42,
color=[255, 255, 255],
type='',
swap='kpt-39'),
43:
dict(
name='kpt-43',
id=43,
color=[255, 255, 255],
type='',
swap='kpt-38'),
44:
dict(
name='kpt-44',
id=44,
color=[255, 255, 255],
type='',
swap='kpt-37'),
45:
dict(
name='kpt-45',
id=45,
color=[255, 255, 255],
type='',
swap='kpt-36'),
46:
dict(
name='kpt-46',
id=46,
color=[255, 255, 255],
type='',
swap='kpt-41'),
47:
dict(
name='kpt-47',
id=47,
color=[255, 255, 255],
type='',
swap='kpt-40'),
48:
dict(
name='kpt-48',
id=48,
color=[255, 255, 255],
type='',
swap='kpt-54'),
49:
dict(
name='kpt-49',
id=49,
color=[255, 255, 255],
type='',
swap='kpt-53'),
50:
dict(
name='kpt-50',
id=50,
color=[255, 255, 255],
type='',
swap='kpt-52'),
51:
dict(name='kpt-51', id=51, color=[255, 255, 255], type='', swap=''),
52:
dict(
name='kpt-52',
id=52,
color=[255, 255, 255],
type='',
swap='kpt-50'),
53:
dict(
name='kpt-53',
id=53,
color=[255, 255, 255],
type='',
swap='kpt-49'),
54:
dict(
name='kpt-54',
id=54,
color=[255, 255, 255],
type='',
swap='kpt-48'),
55:
dict(
name='kpt-55',
id=55,
color=[255, 255, 255],
type='',
swap='kpt-59'),
56:
dict(
name='kpt-56',
id=56,
color=[255, 255, 255],
type='',
swap='kpt-58'),
57:
dict(name='kpt-57', id=57, color=[255, 255, 255], type='', swap=''),
58:
dict(
name='kpt-58',
id=58,
color=[255, 255, 255],
type='',
swap='kpt-56'),
59:
dict(
name='kpt-59',
id=59,
color=[255, 255, 255],
type='',
swap='kpt-55'),
60:
dict(
name='kpt-60',
id=60,
color=[255, 255, 255],
type='',
swap='kpt-64'),
61:
dict(
name='kpt-61',
id=61,
color=[255, 255, 255],
type='',
swap='kpt-63'),
62:
dict(name='kpt-62', id=62, color=[255, 255, 255], type='', swap=''),
63:
dict(
name='kpt-63',
id=63,
color=[255, 255, 255],
type='',
swap='kpt-61'),
64:
dict(
name='kpt-64',
id=64,
color=[255, 255, 255],
type='',
swap='kpt-60'),
65:
dict(
name='kpt-65',
id=65,
color=[255, 255, 255],
type='',
swap='kpt-67'),
66:
dict(name='kpt-66', id=66, color=[255, 255, 255], type='', swap=''),
67:
dict(
name='kpt-67',
id=67,
color=[255, 255, 255],
type='',
swap='kpt-65'),
},
skeleton_info={},
joint_weights=[1.] * 68,
sigmas=[])

83
configs/_base_/datasets/aflw.py

@ -0,0 +1,83 @@
dataset_info = dict(
dataset_name='aflw',
paper_info=dict(
author='Koestinger, Martin and Wohlhart, Paul and '
'Roth, Peter M and Bischof, Horst',
title='Annotated facial landmarks in the wild: '
'A large-scale, real-world database for facial '
'landmark localization',
container='2011 IEEE international conference on computer '
'vision workshops (ICCV workshops)',
year='2011',
homepage='https://www.tugraz.at/institute/icg/research/'
'team-bischof/lrs/downloads/aflw/',
),
keypoint_info={
0:
dict(name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-5'),
1:
dict(name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-4'),
2:
dict(name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-3'),
3:
dict(name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-2'),
4:
dict(name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-1'),
5:
dict(name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-0'),
6:
dict(
name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-11'),
7:
dict(
name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-10'),
8:
dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-9'),
9:
dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-8'),
10:
dict(
name='kpt-10', id=10, color=[255, 255, 255], type='',
swap='kpt-7'),
11:
dict(
name='kpt-11', id=11, color=[255, 255, 255], type='',
swap='kpt-6'),
12:
dict(
name='kpt-12',
id=12,
color=[255, 255, 255],
type='',
swap='kpt-14'),
13:
dict(name='kpt-13', id=13, color=[255, 255, 255], type='', swap=''),
14:
dict(
name='kpt-14',
id=14,
color=[255, 255, 255],
type='',
swap='kpt-12'),
15:
dict(
name='kpt-15',
id=15,
color=[255, 255, 255],
type='',
swap='kpt-17'),
16:
dict(name='kpt-16', id=16, color=[255, 255, 255], type='', swap=''),
17:
dict(
name='kpt-17',
id=17,
color=[255, 255, 255],
type='',
swap='kpt-15'),
18:
dict(name='kpt-18', id=18, color=[255, 255, 255], type='', swap='')
},
skeleton_info={},
joint_weights=[1.] * 19,
sigmas=[])

140
configs/_base_/datasets/aic.py

@ -0,0 +1,140 @@
dataset_info = dict(
dataset_name='aic',
paper_info=dict(
author='Wu, Jiahong and Zheng, He and Zhao, Bo and '
'Li, Yixin and Yan, Baoming and Liang, Rui and '
'Wang, Wenjia and Zhou, Shipei and Lin, Guosen and '
'Fu, Yanwei and others',
title='Ai challenger: A large-scale dataset for going '
'deeper in image understanding',
container='arXiv',
year='2017',
homepage='https://github.com/AIChallenger/AI_Challenger_2017',
),
keypoint_info={
0:
dict(
name='right_shoulder',
id=0,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
1:
dict(
name='right_elbow',
id=1,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
2:
dict(
name='right_wrist',
id=2,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
3:
dict(
name='left_shoulder',
id=3,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
4:
dict(
name='left_elbow',
id=4,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
5:
dict(
name='left_wrist',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
6:
dict(
name='right_hip',
id=6,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
7:
dict(
name='right_knee',
id=7,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
8:
dict(
name='right_ankle',
id=8,
color=[255, 128, 0],
type='lower',
swap='left_ankle'),
9:
dict(
name='left_hip',
id=9,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
10:
dict(
name='left_knee',
id=10,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
11:
dict(
name='left_ankle',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
12:
dict(
name='head_top',
id=12,
color=[51, 153, 255],
type='upper',
swap=''),
13:
dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap='')
},
skeleton_info={
0:
dict(link=('right_wrist', 'right_elbow'), id=0, color=[255, 128, 0]),
1: dict(
link=('right_elbow', 'right_shoulder'), id=1, color=[255, 128, 0]),
2: dict(link=('right_shoulder', 'neck'), id=2, color=[51, 153, 255]),
3: dict(link=('neck', 'left_shoulder'), id=3, color=[51, 153, 255]),
4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
6: dict(link=('right_ankle', 'right_knee'), id=6, color=[255, 128, 0]),
7: dict(link=('right_knee', 'right_hip'), id=7, color=[255, 128, 0]),
8: dict(link=('right_hip', 'left_hip'), id=8, color=[51, 153, 255]),
9: dict(link=('left_hip', 'left_knee'), id=9, color=[0, 255, 0]),
10: dict(link=('left_knee', 'left_ankle'), id=10, color=[0, 255, 0]),
11: dict(link=('head_top', 'neck'), id=11, color=[51, 153, 255]),
12: dict(
link=('right_shoulder', 'right_hip'), id=12, color=[51, 153, 255]),
13:
dict(link=('left_shoulder', 'left_hip'), id=13, color=[51, 153, 255])
},
joint_weights=[
1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.
],
# 'https://github.com/AIChallenger/AI_Challenger_2017/blob/master/'
# 'Evaluation/keypoint_eval/keypoint_eval.py#L50'
# delta = 2 x sigma
sigmas=[
0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144,
0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081,
0.01291456, 0.01236173
])

166
configs/_base_/datasets/animalpose.py

@ -0,0 +1,166 @@
dataset_info = dict(
dataset_name='animalpose',
paper_info=dict(
author='Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and '
'Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing',
title='Cross-Domain Adaptation for Animal Pose Estimation',
container='The IEEE International Conference on '
'Computer Vision (ICCV)',
year='2019',
homepage='https://sites.google.com/view/animal-pose/',
),
keypoint_info={
0:
dict(
name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
1:
dict(
name='R_Eye',
id=1,
color=[255, 128, 0],
type='upper',
swap='L_Eye'),
2:
dict(
name='L_EarBase',
id=2,
color=[0, 255, 0],
type='upper',
swap='R_EarBase'),
3:
dict(
name='R_EarBase',
id=3,
color=[255, 128, 0],
type='upper',
swap='L_EarBase'),
4:
dict(name='Nose', id=4, color=[51, 153, 255], type='upper', swap=''),
5:
dict(name='Throat', id=5, color=[51, 153, 255], type='upper', swap=''),
6:
dict(
name='TailBase', id=6, color=[51, 153, 255], type='lower',
swap=''),
7:
dict(
name='Withers', id=7, color=[51, 153, 255], type='upper', swap=''),
8:
dict(
name='L_F_Elbow',
id=8,
color=[0, 255, 0],
type='upper',
swap='R_F_Elbow'),
9:
dict(
name='R_F_Elbow',
id=9,
color=[255, 128, 0],
type='upper',
swap='L_F_Elbow'),
10:
dict(
name='L_B_Elbow',
id=10,
color=[0, 255, 0],
type='lower',
swap='R_B_Elbow'),
11:
dict(
name='R_B_Elbow',
id=11,
color=[255, 128, 0],
type='lower',
swap='L_B_Elbow'),
12:
dict(
name='L_F_Knee',
id=12,
color=[0, 255, 0],
type='upper',
swap='R_F_Knee'),
13:
dict(
name='R_F_Knee',
id=13,
color=[255, 128, 0],
type='upper',
swap='L_F_Knee'),
14:
dict(
name='L_B_Knee',
id=14,
color=[0, 255, 0],
type='lower',
swap='R_B_Knee'),
15:
dict(
name='R_B_Knee',
id=15,
color=[255, 128, 0],
type='lower',
swap='L_B_Knee'),
16:
dict(
name='L_F_Paw',
id=16,
color=[0, 255, 0],
type='upper',
swap='R_F_Paw'),
17:
dict(
name='R_F_Paw',
id=17,
color=[255, 128, 0],
type='upper',
swap='L_F_Paw'),
18:
dict(
name='L_B_Paw',
id=18,
color=[0, 255, 0],
type='lower',
swap='R_B_Paw'),
19:
dict(
name='R_B_Paw',
id=19,
color=[255, 128, 0],
type='lower',
swap='L_B_Paw')
},
skeleton_info={
0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[51, 153, 255]),
1: dict(link=('L_Eye', 'L_EarBase'), id=1, color=[0, 255, 0]),
2: dict(link=('R_Eye', 'R_EarBase'), id=2, color=[255, 128, 0]),
3: dict(link=('L_Eye', 'Nose'), id=3, color=[0, 255, 0]),
4: dict(link=('R_Eye', 'Nose'), id=4, color=[255, 128, 0]),
5: dict(link=('Nose', 'Throat'), id=5, color=[51, 153, 255]),
6: dict(link=('Throat', 'Withers'), id=6, color=[51, 153, 255]),
7: dict(link=('TailBase', 'Withers'), id=7, color=[51, 153, 255]),
8: dict(link=('Throat', 'L_F_Elbow'), id=8, color=[0, 255, 0]),
9: dict(link=('L_F_Elbow', 'L_F_Knee'), id=9, color=[0, 255, 0]),
10: dict(link=('L_F_Knee', 'L_F_Paw'), id=10, color=[0, 255, 0]),
11: dict(link=('Throat', 'R_F_Elbow'), id=11, color=[255, 128, 0]),
12: dict(link=('R_F_Elbow', 'R_F_Knee'), id=12, color=[255, 128, 0]),
13: dict(link=('R_F_Knee', 'R_F_Paw'), id=13, color=[255, 128, 0]),
14: dict(link=('TailBase', 'L_B_Elbow'), id=14, color=[0, 255, 0]),
15: dict(link=('L_B_Elbow', 'L_B_Knee'), id=15, color=[0, 255, 0]),
16: dict(link=('L_B_Knee', 'L_B_Paw'), id=16, color=[0, 255, 0]),
17: dict(link=('TailBase', 'R_B_Elbow'), id=17, color=[255, 128, 0]),
18: dict(link=('R_B_Elbow', 'R_B_Knee'), id=18, color=[255, 128, 0]),
19: dict(link=('R_B_Knee', 'R_B_Paw'), id=19, color=[255, 128, 0])
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2,
1.5, 1.5, 1.5, 1.5
],
# Note: The original paper did not provide enough information about
# the sigmas. We modified from 'https://github.com/cocodataset/'
# 'cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py#L523'
sigmas=[
0.025, 0.025, 0.026, 0.035, 0.035, 0.10, 0.10, 0.10, 0.107, 0.107,
0.107, 0.107, 0.087, 0.087, 0.087, 0.087, 0.089, 0.089, 0.089, 0.089
])

142
configs/_base_/datasets/ap10k.py

@ -0,0 +1,142 @@
dataset_info = dict(
dataset_name='ap10k',
paper_info=dict(
author='Yu, Hang and Xu, Yufei and Zhang, Jing and '
'Zhao, Wei and Guan, Ziyu and Tao, Dacheng',
title='AP-10K: A Benchmark for Animal Pose Estimation in the Wild',
container='35th Conference on Neural Information Processing Systems '
'(NeurIPS 2021) Track on Datasets and Bench-marks.',
year='2021',
homepage='https://github.com/AlexTheBad/AP-10K',
),
keypoint_info={
0:
dict(
name='L_Eye', id=0, color=[0, 255, 0], type='upper', swap='R_Eye'),
1:
dict(
name='R_Eye',
id=1,
color=[255, 128, 0],
type='upper',
swap='L_Eye'),
2:
dict(name='Nose', id=2, color=[51, 153, 255], type='upper', swap=''),
3:
dict(name='Neck', id=3, color=[51, 153, 255], type='upper', swap=''),
4:
dict(
name='Root of tail',
id=4,
color=[51, 153, 255],
type='lower',
swap=''),
5:
dict(
name='L_Shoulder',
id=5,
color=[51, 153, 255],
type='upper',
swap='R_Shoulder'),
6:
dict(
name='L_Elbow',
id=6,
color=[51, 153, 255],
type='upper',
swap='R_Elbow'),
7:
dict(
name='L_F_Paw',
id=7,
color=[0, 255, 0],
type='upper',
swap='R_F_Paw'),
8:
dict(
name='R_Shoulder',
id=8,
color=[0, 255, 0],
type='upper',
swap='L_Shoulder'),
9:
dict(
name='R_Elbow',
id=9,
color=[255, 128, 0],
type='upper',
swap='L_Elbow'),
10:
dict(
name='R_F_Paw',
id=10,
color=[0, 255, 0],
type='lower',
swap='L_F_Paw'),
11:
dict(
name='L_Hip',
id=11,
color=[255, 128, 0],
type='lower',
swap='R_Hip'),
12:
dict(
name='L_Knee',
id=12,
color=[255, 128, 0],
type='lower',
swap='R_Knee'),
13:
dict(
name='L_B_Paw',
id=13,
color=[0, 255, 0],
type='lower',
swap='R_B_Paw'),
14:
dict(
name='R_Hip', id=14, color=[0, 255, 0], type='lower',
swap='L_Hip'),
15:
dict(
name='R_Knee',
id=15,
color=[0, 255, 0],
type='lower',
swap='L_Knee'),
16:
dict(
name='R_B_Paw',
id=16,
color=[0, 255, 0],
type='lower',
swap='L_B_Paw'),
},
skeleton_info={
0: dict(link=('L_Eye', 'R_Eye'), id=0, color=[0, 0, 255]),
1: dict(link=('L_Eye', 'Nose'), id=1, color=[0, 0, 255]),
2: dict(link=('R_Eye', 'Nose'), id=2, color=[0, 0, 255]),
3: dict(link=('Nose', 'Neck'), id=3, color=[0, 255, 0]),
4: dict(link=('Neck', 'Root of tail'), id=4, color=[0, 255, 0]),
5: dict(link=('Neck', 'L_Shoulder'), id=5, color=[0, 255, 255]),
6: dict(link=('L_Shoulder', 'L_Elbow'), id=6, color=[0, 255, 255]),
7: dict(link=('L_Elbow', 'L_F_Paw'), id=6, color=[0, 255, 255]),
8: dict(link=('Neck', 'R_Shoulder'), id=7, color=[6, 156, 250]),
9: dict(link=('R_Shoulder', 'R_Elbow'), id=8, color=[6, 156, 250]),
10: dict(link=('R_Elbow', 'R_F_Paw'), id=9, color=[6, 156, 250]),
11: dict(link=('Root of tail', 'L_Hip'), id=10, color=[0, 255, 255]),
12: dict(link=('L_Hip', 'L_Knee'), id=11, color=[0, 255, 255]),
13: dict(link=('L_Knee', 'L_B_Paw'), id=12, color=[0, 255, 255]),
14: dict(link=('Root of tail', 'R_Hip'), id=13, color=[6, 156, 250]),
15: dict(link=('R_Hip', 'R_Knee'), id=14, color=[6, 156, 250]),
16: dict(link=('R_Knee', 'R_B_Paw'), id=15, color=[6, 156, 250]),
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
1.5
],
sigmas=[
0.025, 0.025, 0.026, 0.035, 0.035, 0.079, 0.072, 0.062, 0.079, 0.072,
0.062, 0.107, 0.087, 0.089, 0.107, 0.087, 0.089
])

144
configs/_base_/datasets/atrw.py

@ -0,0 +1,144 @@
dataset_info = dict(
dataset_name='atrw',
paper_info=dict(
author='Li, Shuyuan and Li, Jianguo and Tang, Hanlin '
'and Qian, Rui and Lin, Weiyao',
title='ATRW: A Benchmark for Amur Tiger '
'Re-identification in the Wild',
container='Proceedings of the 28th ACM '
'International Conference on Multimedia',
year='2020',
homepage='https://cvwc2019.github.io/challenge.html',
),
keypoint_info={
0:
dict(
name='left_ear',
id=0,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
1:
dict(
name='right_ear',
id=1,
color=[51, 153, 255],
type='upper',
swap='left_ear'),
2:
dict(name='nose', id=2, color=[51, 153, 255], type='upper', swap=''),
3:
dict(
name='right_shoulder',
id=3,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
4:
dict(
name='right_front_paw',
id=4,
color=[255, 128, 0],
type='upper',
swap='left_front_paw'),
5:
dict(
name='left_shoulder',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
6:
dict(
name='left_front_paw',
id=6,
color=[0, 255, 0],
type='upper',
swap='right_front_paw'),
7:
dict(
name='right_hip',
id=7,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
8:
dict(
name='right_knee',
id=8,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
9:
dict(
name='right_back_paw',
id=9,
color=[255, 128, 0],
type='lower',
swap='left_back_paw'),
10:
dict(
name='left_hip',
id=10,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
11:
dict(
name='left_knee',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
12:
dict(
name='left_back_paw',
id=12,
color=[0, 255, 0],
type='lower',
swap='right_back_paw'),
13:
dict(name='tail', id=13, color=[51, 153, 255], type='lower', swap=''),
14:
dict(
name='center', id=14, color=[51, 153, 255], type='lower', swap=''),
},
skeleton_info={
0:
dict(link=('left_ear', 'nose'), id=0, color=[51, 153, 255]),
1:
dict(link=('right_ear', 'nose'), id=1, color=[51, 153, 255]),
2:
dict(link=('nose', 'center'), id=2, color=[51, 153, 255]),
3:
dict(
link=('left_shoulder', 'left_front_paw'), id=3, color=[0, 255, 0]),
4:
dict(link=('left_shoulder', 'center'), id=4, color=[0, 255, 0]),
5:
dict(
link=('right_shoulder', 'right_front_paw'),
id=5,
color=[255, 128, 0]),
6:
dict(link=('right_shoulder', 'center'), id=6, color=[255, 128, 0]),
7:
dict(link=('tail', 'center'), id=7, color=[51, 153, 255]),
8:
dict(link=('right_back_paw', 'right_knee'), id=8, color=[255, 128, 0]),
9:
dict(link=('right_knee', 'right_hip'), id=9, color=[255, 128, 0]),
10:
dict(link=('right_hip', 'tail'), id=10, color=[255, 128, 0]),
11:
dict(link=('left_back_paw', 'left_knee'), id=11, color=[0, 255, 0]),
12:
dict(link=('left_knee', 'left_hip'), id=12, color=[0, 255, 0]),
13:
dict(link=('left_hip', 'tail'), id=13, color=[0, 255, 0]),
},
joint_weights=[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
sigmas=[
0.0277, 0.0823, 0.0831, 0.0202, 0.0716, 0.0263, 0.0646, 0.0302, 0.0440,
0.0316, 0.0333, 0.0547, 0.0263, 0.0683, 0.0539
])

181
configs/_base_/datasets/coco.py

@ -0,0 +1,181 @@
dataset_info = dict(
dataset_name='coco',
paper_info=dict(
author='Lin, Tsung-Yi and Maire, Michael and '
'Belongie, Serge and Hays, James and '
'Perona, Pietro and Ramanan, Deva and '
r'Doll{\'a}r, Piotr and Zitnick, C Lawrence',
title='Microsoft coco: Common objects in context',
container='European conference on computer vision',
year='2014',
homepage='http://cocodataset.org/',
),
keypoint_info={
0:
dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
1:
dict(
name='left_eye',
id=1,
color=[51, 153, 255],
type='upper',
swap='right_eye'),
2:
dict(
name='right_eye',
id=2,
color=[51, 153, 255],
type='upper',
swap='left_eye'),
3:
dict(
name='left_ear',
id=3,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
4:
dict(
name='right_ear',
id=4,
color=[51, 153, 255],
type='upper',
swap='left_ear'),
5:
dict(
name='left_shoulder',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
6:
dict(
name='right_shoulder',
id=6,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
7:
dict(
name='left_elbow',
id=7,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
8:
dict(
name='right_elbow',
id=8,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
9:
dict(
name='left_wrist',
id=9,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
10:
dict(
name='right_wrist',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
11:
dict(
name='left_hip',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
12:
dict(
name='right_hip',
id=12,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
13:
dict(
name='left_knee',
id=13,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
14:
dict(
name='right_knee',
id=14,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
15:
dict(
name='left_ankle',
id=15,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
16:
dict(
name='right_ankle',
id=16,
color=[255, 128, 0],
type='lower',
swap='left_ankle')
},
skeleton_info={
0:
dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
1:
dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
2:
dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
3:
dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
4:
dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
5:
dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
6:
dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
7:
dict(
link=('left_shoulder', 'right_shoulder'),
id=7,
color=[51, 153, 255]),
8:
dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
9:
dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
10:
dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
13:
dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
14:
dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
15:
dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
16:
dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
17:
dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
18:
dict(
link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
1.5
],
sigmas=[
0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
])

1154
configs/_base_/datasets/coco_wholebody.py

File diff suppressed because it is too large

448
configs/_base_/datasets/coco_wholebody_face.py

@ -0,0 +1,448 @@
dataset_info = dict(
dataset_name='coco_wholebody_face',
paper_info=dict(
author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
'Wang, Can and Liu, Wentao and '
'Qian, Chen and Ouyang, Wanli and Luo, Ping',
title='Whole-Body Human Pose Estimation in the Wild',
container='Proceedings of the European '
'Conference on Computer Vision (ECCV)',
year='2020',
homepage='https://github.com/jin-s13/COCO-WholeBody/',
),
keypoint_info={
0:
dict(
name='face-0',
id=0,
color=[255, 255, 255],
type='',
swap='face-16'),
1:
dict(
name='face-1',
id=1,
color=[255, 255, 255],
type='',
swap='face-15'),
2:
dict(
name='face-2',
id=2,
color=[255, 255, 255],
type='',
swap='face-14'),
3:
dict(
name='face-3',
id=3,
color=[255, 255, 255],
type='',
swap='face-13'),
4:
dict(
name='face-4',
id=4,
color=[255, 255, 255],
type='',
swap='face-12'),
5:
dict(
name='face-5',
id=5,
color=[255, 255, 255],
type='',
swap='face-11'),
6:
dict(
name='face-6',
id=6,
color=[255, 255, 255],
type='',
swap='face-10'),
7:
dict(
name='face-7', id=7, color=[255, 255, 255], type='',
swap='face-9'),
8:
dict(name='face-8', id=8, color=[255, 255, 255], type='', swap=''),
9:
dict(
name='face-9', id=9, color=[255, 255, 255], type='',
swap='face-7'),
10:
dict(
name='face-10',
id=10,
color=[255, 255, 255],
type='',
swap='face-6'),
11:
dict(
name='face-11',
id=11,
color=[255, 255, 255],
type='',
swap='face-5'),
12:
dict(
name='face-12',
id=12,
color=[255, 255, 255],
type='',
swap='face-4'),
13:
dict(
name='face-13',
id=13,
color=[255, 255, 255],
type='',
swap='face-3'),
14:
dict(
name='face-14',
id=14,
color=[255, 255, 255],
type='',
swap='face-2'),
15:
dict(
name='face-15',
id=15,
color=[255, 255, 255],
type='',
swap='face-1'),
16:
dict(
name='face-16',
id=16,
color=[255, 255, 255],
type='',
swap='face-0'),
17:
dict(
name='face-17',
id=17,
color=[255, 255, 255],
type='',
swap='face-26'),
18:
dict(
name='face-18',
id=18,
color=[255, 255, 255],
type='',
swap='face-25'),
19:
dict(
name='face-19',
id=19,
color=[255, 255, 255],
type='',
swap='face-24'),
20:
dict(
name='face-20',
id=20,
color=[255, 255, 255],
type='',
swap='face-23'),
21:
dict(
name='face-21',
id=21,
color=[255, 255, 255],
type='',
swap='face-22'),
22:
dict(
name='face-22',
id=22,
color=[255, 255, 255],
type='',
swap='face-21'),
23:
dict(
name='face-23',
id=23,
color=[255, 255, 255],
type='',
swap='face-20'),
24:
dict(
name='face-24',
id=24,
color=[255, 255, 255],
type='',
swap='face-19'),
25:
dict(
name='face-25',
id=25,
color=[255, 255, 255],
type='',
swap='face-18'),
26:
dict(
name='face-26',
id=26,
color=[255, 255, 255],
type='',
swap='face-17'),
27:
dict(name='face-27', id=27, color=[255, 255, 255], type='', swap=''),
28:
dict(name='face-28', id=28, color=[255, 255, 255], type='', swap=''),
29:
dict(name='face-29', id=29, color=[255, 255, 255], type='', swap=''),
30:
dict(name='face-30', id=30, color=[255, 255, 255], type='', swap=''),
31:
dict(
name='face-31',
id=31,
color=[255, 255, 255],
type='',
swap='face-35'),
32:
dict(
name='face-32',
id=32,
color=[255, 255, 255],
type='',
swap='face-34'),
33:
dict(name='face-33', id=33, color=[255, 255, 255], type='', swap=''),
34:
dict(
name='face-34',
id=34,
color=[255, 255, 255],
type='',
swap='face-32'),
35:
dict(
name='face-35',
id=35,
color=[255, 255, 255],
type='',
swap='face-31'),
36:
dict(
name='face-36',
id=36,
color=[255, 255, 255],
type='',
swap='face-45'),
37:
dict(
name='face-37',
id=37,
color=[255, 255, 255],
type='',
swap='face-44'),
38:
dict(
name='face-38',
id=38,
color=[255, 255, 255],
type='',
swap='face-43'),
39:
dict(
name='face-39',
id=39,
color=[255, 255, 255],
type='',
swap='face-42'),
40:
dict(
name='face-40',
id=40,
color=[255, 255, 255],
type='',
swap='face-47'),
41:
dict(
name='face-41',
id=41,
color=[255, 255, 255],
type='',
swap='face-46'),
42:
dict(
name='face-42',
id=42,
color=[255, 255, 255],
type='',
swap='face-39'),
43:
dict(
name='face-43',
id=43,
color=[255, 255, 255],
type='',
swap='face-38'),
44:
dict(
name='face-44',
id=44,
color=[255, 255, 255],
type='',
swap='face-37'),
45:
dict(
name='face-45',
id=45,
color=[255, 255, 255],
type='',
swap='face-36'),
46:
dict(
name='face-46',
id=46,
color=[255, 255, 255],
type='',
swap='face-41'),
47:
dict(
name='face-47',
id=47,
color=[255, 255, 255],
type='',
swap='face-40'),
48:
dict(
name='face-48',
id=48,
color=[255, 255, 255],
type='',
swap='face-54'),
49:
dict(
name='face-49',
id=49,
color=[255, 255, 255],
type='',
swap='face-53'),
50:
dict(
name='face-50',
id=50,
color=[255, 255, 255],
type='',
swap='face-52'),
51:
dict(name='face-51', id=52, color=[255, 255, 255], type='', swap=''),
52:
dict(
name='face-52',
id=52,
color=[255, 255, 255],
type='',
swap='face-50'),
53:
dict(
name='face-53',
id=53,
color=[255, 255, 255],
type='',
swap='face-49'),
54:
dict(
name='face-54',
id=54,
color=[255, 255, 255],
type='',
swap='face-48'),
55:
dict(
name='face-55',
id=55,
color=[255, 255, 255],
type='',
swap='face-59'),
56:
dict(
name='face-56',
id=56,
color=[255, 255, 255],
type='',
swap='face-58'),
57:
dict(name='face-57', id=57, color=[255, 255, 255], type='', swap=''),
58:
dict(
name='face-58',
id=58,
color=[255, 255, 255],
type='',
swap='face-56'),
59:
dict(
name='face-59',
id=59,
color=[255, 255, 255],
type='',
swap='face-55'),
60:
dict(
name='face-60',
id=60,
color=[255, 255, 255],
type='',
swap='face-64'),
61:
dict(
name='face-61',
id=61,
color=[255, 255, 255],
type='',
swap='face-63'),
62:
dict(name='face-62', id=62, color=[255, 255, 255], type='', swap=''),
63:
dict(
name='face-63',
id=63,
color=[255, 255, 255],
type='',
swap='face-61'),
64:
dict(
name='face-64',
id=64,
color=[255, 255, 255],
type='',
swap='face-60'),
65:
dict(
name='face-65',
id=65,
color=[255, 255, 255],
type='',
swap='face-67'),
66:
dict(name='face-66', id=66, color=[255, 255, 255], type='', swap=''),
67:
dict(
name='face-67',
id=67,
color=[255, 255, 255],
type='',
swap='face-65')
},
skeleton_info={},
joint_weights=[1.] * 68,
# 'https://github.com/jin-s13/COCO-WholeBody/blob/master/'
# 'evaluation/myeval_wholebody.py#L177'
sigmas=[
0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023,
0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011,
0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007,
0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011,
0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008,
0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007,
0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008
])

147
configs/_base_/datasets/coco_wholebody_hand.py

@ -0,0 +1,147 @@
dataset_info = dict(
dataset_name='coco_wholebody_hand',
paper_info=dict(
author='Jin, Sheng and Xu, Lumin and Xu, Jin and '
'Wang, Can and Liu, Wentao and '
'Qian, Chen and Ouyang, Wanli and Luo, Ping',
title='Whole-Body Human Pose Estimation in the Wild',
container='Proceedings of the European '
'Conference on Computer Vision (ECCV)',
year='2020',
homepage='https://github.com/jin-s13/COCO-WholeBody/',
),
keypoint_info={
0:
dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
1:
dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
2:
dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
3:
dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
4:
dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
5:
dict(
name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
6:
dict(
name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
7:
dict(
name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
8:
dict(
name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
9:
dict(
name='middle_finger1',
id=9,
color=[102, 178, 255],
type='',
swap=''),
10:
dict(
name='middle_finger2',
id=10,
color=[102, 178, 255],
type='',
swap=''),
11:
dict(
name='middle_finger3',
id=11,
color=[102, 178, 255],
type='',
swap=''),
12:
dict(
name='middle_finger4',
id=12,
color=[102, 178, 255],
type='',
swap=''),
13:
dict(
name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
14:
dict(
name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
15:
dict(
name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
16:
dict(
name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
17:
dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
18:
dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
19:
dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
20:
dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
},
skeleton_info={
0:
dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
1:
dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
2:
dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
3:
dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
4:
dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
5:
dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
6:
dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
7:
dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
8:
dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
9:
dict(
link=('middle_finger1', 'middle_finger2'),
id=9,
color=[102, 178, 255]),
10:
dict(
link=('middle_finger2', 'middle_finger3'),
id=10,
color=[102, 178, 255]),
11:
dict(
link=('middle_finger3', 'middle_finger4'),
id=11,
color=[102, 178, 255]),
12:
dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
13:
dict(
link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
14:
dict(
link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
15:
dict(
link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
16:
dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
17:
dict(
link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
18:
dict(
link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
19:
dict(
link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
},
joint_weights=[1.] * 21,
sigmas=[
0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018,
0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022,
0.031
])

134
configs/_base_/datasets/cofw.py

@ -0,0 +1,134 @@
dataset_info = dict(
dataset_name='cofw',
paper_info=dict(
author='Burgos-Artizzu, Xavier P and Perona, '
r'Pietro and Doll{\'a}r, Piotr',
title='Robust face landmark estimation under occlusion',
container='Proceedings of the IEEE international '
'conference on computer vision',
year='2013',
homepage='http://www.vision.caltech.edu/xpburgos/ICCV13/',
),
keypoint_info={
0:
dict(name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-1'),
1:
dict(name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-0'),
2:
dict(name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-3'),
3:
dict(name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-2'),
4:
dict(name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-6'),
5:
dict(name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-7'),
6:
dict(name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-4'),
7:
dict(name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-5'),
8:
dict(name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-9'),
9:
dict(name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-8'),
10:
dict(
name='kpt-10',
id=10,
color=[255, 255, 255],
type='',
swap='kpt-11'),
11:
dict(
name='kpt-11',
id=11,
color=[255, 255, 255],
type='',
swap='kpt-10'),
12:
dict(
name='kpt-12',
id=12,
color=[255, 255, 255],
type='',
swap='kpt-14'),
13:
dict(
name='kpt-13',
id=13,
color=[255, 255, 255],
type='',
swap='kpt-15'),
14:
dict(
name='kpt-14',
id=14,
color=[255, 255, 255],
type='',
swap='kpt-12'),
15:
dict(
name='kpt-15',
id=15,
color=[255, 255, 255],
type='',
swap='kpt-13'),
16:
dict(
name='kpt-16',
id=16,
color=[255, 255, 255],
type='',
swap='kpt-17'),
17:
dict(
name='kpt-17',
id=17,
color=[255, 255, 255],
type='',
swap='kpt-16'),
18:
dict(
name='kpt-18',
id=18,
color=[255, 255, 255],
type='',
swap='kpt-19'),
19:
dict(
name='kpt-19',
id=19,
color=[255, 255, 255],
type='',
swap='kpt-18'),
20:
dict(name='kpt-20', id=20, color=[255, 255, 255], type='', swap=''),
21:
dict(name='kpt-21', id=21, color=[255, 255, 255], type='', swap=''),
22:
dict(
name='kpt-22',
id=22,
color=[255, 255, 255],
type='',
swap='kpt-23'),
23:
dict(
name='kpt-23',
id=23,
color=[255, 255, 255],
type='',
swap='kpt-22'),
24:
dict(name='kpt-24', id=24, color=[255, 255, 255], type='', swap=''),
25:
dict(name='kpt-25', id=25, color=[255, 255, 255], type='', swap=''),
26:
dict(name='kpt-26', id=26, color=[255, 255, 255], type='', swap=''),
27:
dict(name='kpt-27', id=27, color=[255, 255, 255], type='', swap=''),
28:
dict(name='kpt-28', id=28, color=[255, 255, 255], type='', swap='')
},
skeleton_info={},
joint_weights=[1.] * 29,
sigmas=[])

147
configs/_base_/datasets/crowdpose.py

@ -0,0 +1,147 @@
dataset_info = dict(
dataset_name='crowdpose',
paper_info=dict(
author='Li, Jiefeng and Wang, Can and Zhu, Hao and '
'Mao, Yihuan and Fang, Hao-Shu and Lu, Cewu',
title='CrowdPose: Efficient Crowded Scenes Pose Estimation '
'and A New Benchmark',
container='Proceedings of IEEE Conference on Computer '
'Vision and Pattern Recognition (CVPR)',
year='2019',
homepage='https://github.com/Jeff-sjtu/CrowdPose',
),
keypoint_info={
0:
dict(
name='left_shoulder',
id=0,
color=[51, 153, 255],
type='upper',
swap='right_shoulder'),
1:
dict(
name='right_shoulder',
id=1,
color=[51, 153, 255],
type='upper',
swap='left_shoulder'),
2:
dict(
name='left_elbow',
id=2,
color=[51, 153, 255],
type='upper',
swap='right_elbow'),
3:
dict(
name='right_elbow',
id=3,
color=[51, 153, 255],
type='upper',
swap='left_elbow'),
4:
dict(
name='left_wrist',
id=4,
color=[51, 153, 255],
type='upper',
swap='right_wrist'),
5:
dict(
name='right_wrist',
id=5,
color=[0, 255, 0],
type='upper',
swap='left_wrist'),
6:
dict(
name='left_hip',
id=6,
color=[255, 128, 0],
type='lower',
swap='right_hip'),
7:
dict(
name='right_hip',
id=7,
color=[0, 255, 0],
type='lower',
swap='left_hip'),
8:
dict(
name='left_knee',
id=8,
color=[255, 128, 0],
type='lower',
swap='right_knee'),
9:
dict(
name='right_knee',
id=9,
color=[0, 255, 0],
type='lower',
swap='left_knee'),
10:
dict(
name='left_ankle',
id=10,
color=[255, 128, 0],
type='lower',
swap='right_ankle'),
11:
dict(
name='right_ankle',
id=11,
color=[0, 255, 0],
type='lower',
swap='left_ankle'),
12:
dict(
name='top_head', id=12, color=[255, 128, 0], type='upper',
swap=''),
13:
dict(name='neck', id=13, color=[0, 255, 0], type='upper', swap='')
},
skeleton_info={
0:
dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
1:
dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
2:
dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
3:
dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
4:
dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
5:
dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
6:
dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
7:
dict(
link=('left_shoulder', 'right_shoulder'),
id=7,
color=[51, 153, 255]),
8:
dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
9:
dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
10:
dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('top_head', 'neck'), id=12, color=[51, 153, 255]),
13:
dict(link=('right_shoulder', 'neck'), id=13, color=[51, 153, 255]),
14:
dict(link=('left_shoulder', 'neck'), id=14, color=[51, 153, 255])
},
joint_weights=[
0.2, 0.2, 0.2, 1.3, 1.5, 0.2, 1.3, 1.5, 0.2, 0.2, 0.5, 0.2, 0.2, 0.5
],
sigmas=[
0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087,
0.089, 0.089, 0.079, 0.079
])

74
configs/_base_/datasets/deepfashion_full.py

@ -0,0 +1,74 @@
dataset_info = dict(
dataset_name='deepfashion_full',
paper_info=dict(
author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
'and Wang, Xiaogang and Tang, Xiaoou',
title='DeepFashion: Powering Robust Clothes Recognition '
'and Retrieval with Rich Annotations',
container='Proceedings of IEEE Conference on Computer '
'Vision and Pattern Recognition (CVPR)',
year='2016',
homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
'DeepFashion/LandmarkDetection.html',
),
keypoint_info={
0:
dict(
name='left collar',
id=0,
color=[255, 255, 255],
type='',
swap='right collar'),
1:
dict(
name='right collar',
id=1,
color=[255, 255, 255],
type='',
swap='left collar'),
2:
dict(
name='left sleeve',
id=2,
color=[255, 255, 255],
type='',
swap='right sleeve'),
3:
dict(
name='right sleeve',
id=3,
color=[255, 255, 255],
type='',
swap='left sleeve'),
4:
dict(
name='left waistline',
id=0,
color=[255, 255, 255],
type='',
swap='right waistline'),
5:
dict(
name='right waistline',
id=1,
color=[255, 255, 255],
type='',
swap='left waistline'),
6:
dict(
name='left hem',
id=2,
color=[255, 255, 255],
type='',
swap='right hem'),
7:
dict(
name='right hem',
id=3,
color=[255, 255, 255],
type='',
swap='left hem'),
},
skeleton_info={},
joint_weights=[1.] * 8,
sigmas=[])

46
configs/_base_/datasets/deepfashion_lower.py

@ -0,0 +1,46 @@
dataset_info = dict(
dataset_name='deepfashion_lower',
paper_info=dict(
author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
'and Wang, Xiaogang and Tang, Xiaoou',
title='DeepFashion: Powering Robust Clothes Recognition '
'and Retrieval with Rich Annotations',
container='Proceedings of IEEE Conference on Computer '
'Vision and Pattern Recognition (CVPR)',
year='2016',
homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
'DeepFashion/LandmarkDetection.html',
),
keypoint_info={
0:
dict(
name='left waistline',
id=0,
color=[255, 255, 255],
type='',
swap='right waistline'),
1:
dict(
name='right waistline',
id=1,
color=[255, 255, 255],
type='',
swap='left waistline'),
2:
dict(
name='left hem',
id=2,
color=[255, 255, 255],
type='',
swap='right hem'),
3:
dict(
name='right hem',
id=3,
color=[255, 255, 255],
type='',
swap='left hem'),
},
skeleton_info={},
joint_weights=[1.] * 4,
sigmas=[])

60
configs/_base_/datasets/deepfashion_upper.py

@ -0,0 +1,60 @@
dataset_info = dict(
dataset_name='deepfashion_upper',
paper_info=dict(
author='Liu, Ziwei and Luo, Ping and Qiu, Shi '
'and Wang, Xiaogang and Tang, Xiaoou',
title='DeepFashion: Powering Robust Clothes Recognition '
'and Retrieval with Rich Annotations',
container='Proceedings of IEEE Conference on Computer '
'Vision and Pattern Recognition (CVPR)',
year='2016',
homepage='http://mmlab.ie.cuhk.edu.hk/projects/'
'DeepFashion/LandmarkDetection.html',
),
keypoint_info={
0:
dict(
name='left collar',
id=0,
color=[255, 255, 255],
type='',
swap='right collar'),
1:
dict(
name='right collar',
id=1,
color=[255, 255, 255],
type='',
swap='left collar'),
2:
dict(
name='left sleeve',
id=2,
color=[255, 255, 255],
type='',
swap='right sleeve'),
3:
dict(
name='right sleeve',
id=3,
color=[255, 255, 255],
type='',
swap='left sleeve'),
4:
dict(
name='left hem',
id=4,
color=[255, 255, 255],
type='',
swap='right hem'),
5:
dict(
name='right hem',
id=5,
color=[255, 255, 255],
type='',
swap='left hem'),
},
skeleton_info={},
joint_weights=[1.] * 6,
sigmas=[])

237
configs/_base_/datasets/fly.py

@ -0,0 +1,237 @@
dataset_info = dict(
dataset_name='fly',
paper_info=dict(
author='Pereira, Talmo D and Aldarondo, Diego E and '
'Willmore, Lindsay and Kislin, Mikhail and '
'Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W',
title='Fast animal pose estimation using deep neural networks',
container='Nature methods',
year='2019',
homepage='https://github.com/jgraving/DeepPoseKit-Data',
),
keypoint_info={
0:
dict(name='head', id=0, color=[255, 255, 255], type='', swap=''),
1:
dict(name='eyeL', id=1, color=[255, 255, 255], type='', swap='eyeR'),
2:
dict(name='eyeR', id=2, color=[255, 255, 255], type='', swap='eyeL'),
3:
dict(name='neck', id=3, color=[255, 255, 255], type='', swap=''),
4:
dict(name='thorax', id=4, color=[255, 255, 255], type='', swap=''),
5:
dict(name='abdomen', id=5, color=[255, 255, 255], type='', swap=''),
6:
dict(
name='forelegR1',
id=6,
color=[255, 255, 255],
type='',
swap='forelegL1'),
7:
dict(
name='forelegR2',
id=7,
color=[255, 255, 255],
type='',
swap='forelegL2'),
8:
dict(
name='forelegR3',
id=8,
color=[255, 255, 255],
type='',
swap='forelegL3'),
9:
dict(
name='forelegR4',
id=9,
color=[255, 255, 255],
type='',
swap='forelegL4'),
10:
dict(
name='midlegR1',
id=10,
color=[255, 255, 255],
type='',
swap='midlegL1'),
11:
dict(
name='midlegR2',
id=11,
color=[255, 255, 255],
type='',
swap='midlegL2'),
12:
dict(
name='midlegR3',
id=12,
color=[255, 255, 255],
type='',
swap='midlegL3'),
13:
dict(
name='midlegR4',
id=13,
color=[255, 255, 255],
type='',
swap='midlegL4'),
14:
dict(
name='hindlegR1',
id=14,
color=[255, 255, 255],
type='',
swap='hindlegL1'),
15:
dict(
name='hindlegR2',
id=15,
color=[255, 255, 255],
type='',
swap='hindlegL2'),
16:
dict(
name='hindlegR3',
id=16,
color=[255, 255, 255],
type='',
swap='hindlegL3'),
17:
dict(
name='hindlegR4',
id=17,
color=[255, 255, 255],
type='',
swap='hindlegL4'),
18:
dict(
name='forelegL1',
id=18,
color=[255, 255, 255],
type='',
swap='forelegR1'),
19:
dict(
name='forelegL2',
id=19,
color=[255, 255, 255],
type='',
swap='forelegR2'),
20:
dict(
name='forelegL3',
id=20,
color=[255, 255, 255],
type='',
swap='forelegR3'),
21:
dict(
name='forelegL4',
id=21,
color=[255, 255, 255],
type='',
swap='forelegR4'),
22:
dict(
name='midlegL1',
id=22,
color=[255, 255, 255],
type='',
swap='midlegR1'),
23:
dict(
name='midlegL2',
id=23,
color=[255, 255, 255],
type='',
swap='midlegR2'),
24:
dict(
name='midlegL3',
id=24,
color=[255, 255, 255],
type='',
swap='midlegR3'),
25:
dict(
name='midlegL4',
id=25,
color=[255, 255, 255],
type='',
swap='midlegR4'),
26:
dict(
name='hindlegL1',
id=26,
color=[255, 255, 255],
type='',
swap='hindlegR1'),
27:
dict(
name='hindlegL2',
id=27,
color=[255, 255, 255],
type='',
swap='hindlegR2'),
28:
dict(
name='hindlegL3',
id=28,
color=[255, 255, 255],
type='',
swap='hindlegR3'),
29:
dict(
name='hindlegL4',
id=29,
color=[255, 255, 255],
type='',
swap='hindlegR4'),
30:
dict(
name='wingL', id=30, color=[255, 255, 255], type='', swap='wingR'),
31:
dict(
name='wingR', id=31, color=[255, 255, 255], type='', swap='wingL'),
},
skeleton_info={
0: dict(link=('eyeL', 'head'), id=0, color=[255, 255, 255]),
1: dict(link=('eyeR', 'head'), id=1, color=[255, 255, 255]),
2: dict(link=('neck', 'head'), id=2, color=[255, 255, 255]),
3: dict(link=('thorax', 'neck'), id=3, color=[255, 255, 255]),
4: dict(link=('abdomen', 'thorax'), id=4, color=[255, 255, 255]),
5: dict(link=('forelegR2', 'forelegR1'), id=5, color=[255, 255, 255]),
6: dict(link=('forelegR3', 'forelegR2'), id=6, color=[255, 255, 255]),
7: dict(link=('forelegR4', 'forelegR3'), id=7, color=[255, 255, 255]),
8: dict(link=('midlegR2', 'midlegR1'), id=8, color=[255, 255, 255]),
9: dict(link=('midlegR3', 'midlegR2'), id=9, color=[255, 255, 255]),
10: dict(link=('midlegR4', 'midlegR3'), id=10, color=[255, 255, 255]),
11:
dict(link=('hindlegR2', 'hindlegR1'), id=11, color=[255, 255, 255]),
12:
dict(link=('hindlegR3', 'hindlegR2'), id=12, color=[255, 255, 255]),
13:
dict(link=('hindlegR4', 'hindlegR3'), id=13, color=[255, 255, 255]),
14:
dict(link=('forelegL2', 'forelegL1'), id=14, color=[255, 255, 255]),
15:
dict(link=('forelegL3', 'forelegL2'), id=15, color=[255, 255, 255]),
16:
dict(link=('forelegL4', 'forelegL3'), id=16, color=[255, 255, 255]),
17: dict(link=('midlegL2', 'midlegL1'), id=17, color=[255, 255, 255]),
18: dict(link=('midlegL3', 'midlegL2'), id=18, color=[255, 255, 255]),
19: dict(link=('midlegL4', 'midlegL3'), id=19, color=[255, 255, 255]),
20:
dict(link=('hindlegL2', 'hindlegL1'), id=20, color=[255, 255, 255]),
21:
dict(link=('hindlegL3', 'hindlegL2'), id=21, color=[255, 255, 255]),
22:
dict(link=('hindlegL4', 'hindlegL3'), id=22, color=[255, 255, 255]),
23: dict(link=('wingL', 'neck'), id=23, color=[255, 255, 255]),
24: dict(link=('wingR', 'neck'), id=24, color=[255, 255, 255])
},
joint_weights=[1.] * 32,
sigmas=[])

144
configs/_base_/datasets/freihand2d.py

@ -0,0 +1,144 @@
dataset_info = dict(
dataset_name='freihand',
paper_info=dict(
author='Zimmermann, Christian and Ceylan, Duygu and '
'Yang, Jimei and Russell, Bryan and '
'Argus, Max and Brox, Thomas',
title='Freihand: A dataset for markerless capture of hand pose '
'and shape from single rgb images',
container='Proceedings of the IEEE International '
'Conference on Computer Vision',
year='2019',
homepage='https://lmb.informatik.uni-freiburg.de/projects/freihand/',
),
keypoint_info={
0:
dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
1:
dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
2:
dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
3:
dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
4:
dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
5:
dict(
name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
6:
dict(
name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
7:
dict(
name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
8:
dict(
name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
9:
dict(
name='middle_finger1',
id=9,
color=[102, 178, 255],
type='',
swap=''),
10:
dict(
name='middle_finger2',
id=10,
color=[102, 178, 255],
type='',
swap=''),
11:
dict(
name='middle_finger3',
id=11,
color=[102, 178, 255],
type='',
swap=''),
12:
dict(
name='middle_finger4',
id=12,
color=[102, 178, 255],
type='',
swap=''),
13:
dict(
name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
14:
dict(
name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
15:
dict(
name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
16:
dict(
name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
17:
dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
18:
dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
19:
dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
20:
dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
},
skeleton_info={
0:
dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
1:
dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
2:
dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
3:
dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
4:
dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
5:
dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
6:
dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
7:
dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
8:
dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
9:
dict(
link=('middle_finger1', 'middle_finger2'),
id=9,
color=[102, 178, 255]),
10:
dict(
link=('middle_finger2', 'middle_finger3'),
id=10,
color=[102, 178, 255]),
11:
dict(
link=('middle_finger3', 'middle_finger4'),
id=11,
color=[102, 178, 255]),
12:
dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
13:
dict(
link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
14:
dict(
link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
15:
dict(
link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
16:
dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
17:
dict(
link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
18:
dict(
link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
19:
dict(
link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
},
joint_weights=[1.] * 21,
sigmas=[])

152
configs/_base_/datasets/h36m.py

@ -0,0 +1,152 @@
dataset_info = dict(
dataset_name='h36m',
paper_info=dict(
author='Ionescu, Catalin and Papava, Dragos and '
'Olaru, Vlad and Sminchisescu, Cristian',
title='Human3.6M: Large Scale Datasets and Predictive '
'Methods for 3D Human Sensing in Natural Environments',
container='IEEE Transactions on Pattern Analysis and '
'Machine Intelligence',
year='2014',
homepage='http://vision.imar.ro/human3.6m/description.php',
),
keypoint_info={
0:
dict(name='root', id=0, color=[51, 153, 255], type='lower', swap=''),
1:
dict(
name='right_hip',
id=1,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
2:
dict(
name='right_knee',
id=2,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
3:
dict(
name='right_foot',
id=3,
color=[255, 128, 0],
type='lower',
swap='left_foot'),
4:
dict(
name='left_hip',
id=4,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
5:
dict(
name='left_knee',
id=5,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
6:
dict(
name='left_foot',
id=6,
color=[0, 255, 0],
type='lower',
swap='right_foot'),
7:
dict(name='spine', id=7, color=[51, 153, 255], type='upper', swap=''),
8:
dict(name='thorax', id=8, color=[51, 153, 255], type='upper', swap=''),
9:
dict(
name='neck_base',
id=9,
color=[51, 153, 255],
type='upper',
swap=''),
10:
dict(name='head', id=10, color=[51, 153, 255], type='upper', swap=''),
11:
dict(
name='left_shoulder',
id=11,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
12:
dict(
name='left_elbow',
id=12,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
13:
dict(
name='left_wrist',
id=13,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
14:
dict(
name='right_shoulder',
id=14,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
15:
dict(
name='right_elbow',
id=15,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
16:
dict(
name='right_wrist',
id=16,
color=[255, 128, 0],
type='upper',
swap='left_wrist')
},
skeleton_info={
0:
dict(link=('root', 'left_hip'), id=0, color=[0, 255, 0]),
1:
dict(link=('left_hip', 'left_knee'), id=1, color=[0, 255, 0]),
2:
dict(link=('left_knee', 'left_foot'), id=2, color=[0, 255, 0]),
3:
dict(link=('root', 'right_hip'), id=3, color=[255, 128, 0]),
4:
dict(link=('right_hip', 'right_knee'), id=4, color=[255, 128, 0]),
5:
dict(link=('right_knee', 'right_foot'), id=5, color=[255, 128, 0]),
6:
dict(link=('root', 'spine'), id=6, color=[51, 153, 255]),
7:
dict(link=('spine', 'thorax'), id=7, color=[51, 153, 255]),
8:
dict(link=('thorax', 'neck_base'), id=8, color=[51, 153, 255]),
9:
dict(link=('neck_base', 'head'), id=9, color=[51, 153, 255]),
10:
dict(link=('thorax', 'left_shoulder'), id=10, color=[0, 255, 0]),
11:
dict(link=('left_shoulder', 'left_elbow'), id=11, color=[0, 255, 0]),
12:
dict(link=('left_elbow', 'left_wrist'), id=12, color=[0, 255, 0]),
13:
dict(link=('thorax', 'right_shoulder'), id=13, color=[255, 128, 0]),
14:
dict(
link=('right_shoulder', 'right_elbow'), id=14, color=[255, 128,
0]),
15:
dict(link=('right_elbow', 'right_wrist'), id=15, color=[255, 128, 0])
},
joint_weights=[1.] * 17,
sigmas=[],
stats_info=dict(bbox_center=(528., 427.), bbox_scale=400.))

1157
configs/_base_/datasets/halpe.py

File diff suppressed because it is too large

201
configs/_base_/datasets/horse10.py

@ -0,0 +1,201 @@
dataset_info = dict(
dataset_name='horse10',
paper_info=dict(
author='Mathis, Alexander and Biasi, Thomas and '
'Schneider, Steffen and '
'Yuksekgonul, Mert and Rogers, Byron and '
'Bethge, Matthias and '
'Mathis, Mackenzie W',
title='Pretraining boosts out-of-domain robustness '
'for pose estimation',
container='Proceedings of the IEEE/CVF Winter Conference on '
'Applications of Computer Vision',
year='2021',
homepage='http://www.mackenziemathislab.org/horse10',
),
keypoint_info={
0:
dict(name='Nose', id=0, color=[255, 153, 255], type='upper', swap=''),
1:
dict(name='Eye', id=1, color=[255, 153, 255], type='upper', swap=''),
2:
dict(
name='Nearknee',
id=2,
color=[255, 102, 255],
type='upper',
swap=''),
3:
dict(
name='Nearfrontfetlock',
id=3,
color=[255, 102, 255],
type='upper',
swap=''),
4:
dict(
name='Nearfrontfoot',
id=4,
color=[255, 102, 255],
type='upper',
swap=''),
5:
dict(
name='Offknee', id=5, color=[255, 102, 255], type='upper',
swap=''),
6:
dict(
name='Offfrontfetlock',
id=6,
color=[255, 102, 255],
type='upper',
swap=''),
7:
dict(
name='Offfrontfoot',
id=7,
color=[255, 102, 255],
type='upper',
swap=''),
8:
dict(
name='Shoulder',
id=8,
color=[255, 153, 255],
type='upper',
swap=''),
9:
dict(
name='Midshoulder',
id=9,
color=[255, 153, 255],
type='upper',
swap=''),
10:
dict(
name='Elbow', id=10, color=[255, 153, 255], type='upper', swap=''),
11:
dict(
name='Girth', id=11, color=[255, 153, 255], type='upper', swap=''),
12:
dict(
name='Wither', id=12, color=[255, 153, 255], type='upper',
swap=''),
13:
dict(
name='Nearhindhock',
id=13,
color=[255, 51, 255],
type='lower',
swap=''),
14:
dict(
name='Nearhindfetlock',
id=14,
color=[255, 51, 255],
type='lower',
swap=''),
15:
dict(
name='Nearhindfoot',
id=15,
color=[255, 51, 255],
type='lower',
swap=''),
16:
dict(name='Hip', id=16, color=[255, 153, 255], type='lower', swap=''),
17:
dict(
name='Stifle', id=17, color=[255, 153, 255], type='lower',
swap=''),
18:
dict(
name='Offhindhock',
id=18,
color=[255, 51, 255],
type='lower',
swap=''),
19:
dict(
name='Offhindfetlock',
id=19,
color=[255, 51, 255],
type='lower',
swap=''),
20:
dict(
name='Offhindfoot',
id=20,
color=[255, 51, 255],
type='lower',
swap=''),
21:
dict(
name='Ischium',
id=21,
color=[255, 153, 255],
type='lower',
swap='')
},
skeleton_info={
0:
dict(link=('Nose', 'Eye'), id=0, color=[255, 153, 255]),
1:
dict(link=('Eye', 'Wither'), id=1, color=[255, 153, 255]),
2:
dict(link=('Wither', 'Hip'), id=2, color=[255, 153, 255]),
3:
dict(link=('Hip', 'Ischium'), id=3, color=[255, 153, 255]),
4:
dict(link=('Ischium', 'Stifle'), id=4, color=[255, 153, 255]),
5:
dict(link=('Stifle', 'Girth'), id=5, color=[255, 153, 255]),
6:
dict(link=('Girth', 'Elbow'), id=6, color=[255, 153, 255]),
7:
dict(link=('Elbow', 'Shoulder'), id=7, color=[255, 153, 255]),
8:
dict(link=('Shoulder', 'Midshoulder'), id=8, color=[255, 153, 255]),
9:
dict(link=('Midshoulder', 'Wither'), id=9, color=[255, 153, 255]),
10:
dict(
link=('Nearknee', 'Nearfrontfetlock'),
id=10,
color=[255, 102, 255]),
11:
dict(
link=('Nearfrontfetlock', 'Nearfrontfoot'),
id=11,
color=[255, 102, 255]),
12:
dict(
link=('Offknee', 'Offfrontfetlock'), id=12, color=[255, 102, 255]),
13:
dict(
link=('Offfrontfetlock', 'Offfrontfoot'),
id=13,
color=[255, 102, 255]),
14:
dict(
link=('Nearhindhock', 'Nearhindfetlock'),
id=14,
color=[255, 51, 255]),
15:
dict(
link=('Nearhindfetlock', 'Nearhindfoot'),
id=15,
color=[255, 51, 255]),
16:
dict(
link=('Offhindhock', 'Offhindfetlock'),
id=16,
color=[255, 51, 255]),
17:
dict(
link=('Offhindfetlock', 'Offhindfoot'),
id=17,
color=[255, 51, 255])
},
joint_weights=[1.] * 22,
sigmas=[])

142
configs/_base_/datasets/interhand2d.py

@ -0,0 +1,142 @@
dataset_info = dict(
dataset_name='interhand2d',
paper_info=dict(
author='Moon, Gyeongsik and Yu, Shoou-I and Wen, He and '
'Shiratori, Takaaki and Lee, Kyoung Mu',
title='InterHand2.6M: A dataset and baseline for 3D '
'interacting hand pose estimation from a single RGB image',
container='arXiv',
year='2020',
homepage='https://mks0601.github.io/InterHand2.6M/',
),
keypoint_info={
0:
dict(name='thumb4', id=0, color=[255, 128, 0], type='', swap=''),
1:
dict(name='thumb3', id=1, color=[255, 128, 0], type='', swap=''),
2:
dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
3:
dict(name='thumb1', id=3, color=[255, 128, 0], type='', swap=''),
4:
dict(
name='forefinger4', id=4, color=[255, 153, 255], type='', swap=''),
5:
dict(
name='forefinger3', id=5, color=[255, 153, 255], type='', swap=''),
6:
dict(
name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
7:
dict(
name='forefinger1', id=7, color=[255, 153, 255], type='', swap=''),
8:
dict(
name='middle_finger4',
id=8,
color=[102, 178, 255],
type='',
swap=''),
9:
dict(
name='middle_finger3',
id=9,
color=[102, 178, 255],
type='',
swap=''),
10:
dict(
name='middle_finger2',
id=10,
color=[102, 178, 255],
type='',
swap=''),
11:
dict(
name='middle_finger1',
id=11,
color=[102, 178, 255],
type='',
swap=''),
12:
dict(
name='ring_finger4', id=12, color=[255, 51, 51], type='', swap=''),
13:
dict(
name='ring_finger3', id=13, color=[255, 51, 51], type='', swap=''),
14:
dict(
name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
15:
dict(
name='ring_finger1', id=15, color=[255, 51, 51], type='', swap=''),
16:
dict(name='pinky_finger4', id=16, color=[0, 255, 0], type='', swap=''),
17:
dict(name='pinky_finger3', id=17, color=[0, 255, 0], type='', swap=''),
18:
dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
19:
dict(name='pinky_finger1', id=19, color=[0, 255, 0], type='', swap=''),
20:
dict(name='wrist', id=20, color=[255, 255, 255], type='', swap='')
},
skeleton_info={
0:
dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
1:
dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
2:
dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
3:
dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
4:
dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
5:
dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
6:
dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
7:
dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
8:
dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
9:
dict(
link=('middle_finger1', 'middle_finger2'),
id=9,
color=[102, 178, 255]),
10:
dict(
link=('middle_finger2', 'middle_finger3'),
id=10,
color=[102, 178, 255]),
11:
dict(
link=('middle_finger3', 'middle_finger4'),
id=11,
color=[102, 178, 255]),
12:
dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
13:
dict(
link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
14:
dict(
link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
15:
dict(
link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
16:
dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
17:
dict(
link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
18:
dict(
link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
19:
dict(
link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
},
joint_weights=[1.] * 21,
sigmas=[])

487
configs/_base_/datasets/interhand3d.py

@ -0,0 +1,487 @@
dataset_info = dict(
dataset_name='interhand3d',
paper_info=dict(
author='Moon, Gyeongsik and Yu, Shoou-I and Wen, He and '
'Shiratori, Takaaki and Lee, Kyoung Mu',
title='InterHand2.6M: A dataset and baseline for 3D '
'interacting hand pose estimation from a single RGB image',
container='arXiv',
year='2020',
homepage='https://mks0601.github.io/InterHand2.6M/',
),
keypoint_info={
0:
dict(
name='right_thumb4',
id=0,
color=[255, 128, 0],
type='',
swap='left_thumb4'),
1:
dict(
name='right_thumb3',
id=1,
color=[255, 128, 0],
type='',
swap='left_thumb3'),
2:
dict(
name='right_thumb2',
id=2,
color=[255, 128, 0],
type='',
swap='left_thumb2'),
3:
dict(
name='right_thumb1',
id=3,
color=[255, 128, 0],
type='',
swap='left_thumb1'),
4:
dict(
name='right_forefinger4',
id=4,
color=[255, 153, 255],
type='',
swap='left_forefinger4'),
5:
dict(
name='right_forefinger3',
id=5,
color=[255, 153, 255],
type='',
swap='left_forefinger3'),
6:
dict(
name='right_forefinger2',
id=6,
color=[255, 153, 255],
type='',
swap='left_forefinger2'),
7:
dict(
name='right_forefinger1',
id=7,
color=[255, 153, 255],
type='',
swap='left_forefinger1'),
8:
dict(
name='right_middle_finger4',
id=8,
color=[102, 178, 255],
type='',
swap='left_middle_finger4'),
9:
dict(
name='right_middle_finger3',
id=9,
color=[102, 178, 255],
type='',
swap='left_middle_finger3'),
10:
dict(
name='right_middle_finger2',
id=10,
color=[102, 178, 255],
type='',
swap='left_middle_finger2'),
11:
dict(
name='right_middle_finger1',
id=11,
color=[102, 178, 255],
type='',
swap='left_middle_finger1'),
12:
dict(
name='right_ring_finger4',
id=12,
color=[255, 51, 51],
type='',
swap='left_ring_finger4'),
13:
dict(
name='right_ring_finger3',
id=13,
color=[255, 51, 51],
type='',
swap='left_ring_finger3'),
14:
dict(
name='right_ring_finger2',
id=14,
color=[255, 51, 51],
type='',
swap='left_ring_finger2'),
15:
dict(
name='right_ring_finger1',
id=15,
color=[255, 51, 51],
type='',
swap='left_ring_finger1'),
16:
dict(
name='right_pinky_finger4',
id=16,
color=[0, 255, 0],
type='',
swap='left_pinky_finger4'),
17:
dict(
name='right_pinky_finger3',
id=17,
color=[0, 255, 0],
type='',
swap='left_pinky_finger3'),
18:
dict(
name='right_pinky_finger2',
id=18,
color=[0, 255, 0],
type='',
swap='left_pinky_finger2'),
19:
dict(
name='right_pinky_finger1',
id=19,
color=[0, 255, 0],
type='',
swap='left_pinky_finger1'),
20:
dict(
name='right_wrist',
id=20,
color=[255, 255, 255],
type='',
swap='left_wrist'),
21:
dict(
name='left_thumb4',
id=21,
color=[255, 128, 0],
type='',
swap='right_thumb4'),
22:
dict(
name='left_thumb3',
id=22,
color=[255, 128, 0],
type='',
swap='right_thumb3'),
23:
dict(
name='left_thumb2',
id=23,
color=[255, 128, 0],
type='',
swap='right_thumb2'),
24:
dict(
name='left_thumb1',
id=24,
color=[255, 128, 0],
type='',
swap='right_thumb1'),
25:
dict(
name='left_forefinger4',
id=25,
color=[255, 153, 255],
type='',
swap='right_forefinger4'),
26:
dict(
name='left_forefinger3',
id=26,
color=[255, 153, 255],
type='',
swap='right_forefinger3'),
27:
dict(
name='left_forefinger2',
id=27,
color=[255, 153, 255],
type='',
swap='right_forefinger2'),
28:
dict(
name='left_forefinger1',
id=28,
color=[255, 153, 255],
type='',
swap='right_forefinger1'),
29:
dict(
name='left_middle_finger4',
id=29,
color=[102, 178, 255],
type='',
swap='right_middle_finger4'),
30:
dict(
name='left_middle_finger3',
id=30,
color=[102, 178, 255],
type='',
swap='right_middle_finger3'),
31:
dict(
name='left_middle_finger2',
id=31,
color=[102, 178, 255],
type='',
swap='right_middle_finger2'),
32:
dict(
name='left_middle_finger1',
id=32,
color=[102, 178, 255],
type='',
swap='right_middle_finger1'),
33:
dict(
name='left_ring_finger4',
id=33,
color=[255, 51, 51],
type='',
swap='right_ring_finger4'),
34:
dict(
name='left_ring_finger3',
id=34,
color=[255, 51, 51],
type='',
swap='right_ring_finger3'),
35:
dict(
name='left_ring_finger2',
id=35,
color=[255, 51, 51],
type='',
swap='right_ring_finger2'),
36:
dict(
name='left_ring_finger1',
id=36,
color=[255, 51, 51],
type='',
swap='right_ring_finger1'),
37:
dict(
name='left_pinky_finger4',
id=37,
color=[0, 255, 0],
type='',
swap='right_pinky_finger4'),
38:
dict(
name='left_pinky_finger3',
id=38,
color=[0, 255, 0],
type='',
swap='right_pinky_finger3'),
39:
dict(
name='left_pinky_finger2',
id=39,
color=[0, 255, 0],
type='',
swap='right_pinky_finger2'),
40:
dict(
name='left_pinky_finger1',
id=40,
color=[0, 255, 0],
type='',
swap='right_pinky_finger1'),
41:
dict(
name='left_wrist',
id=41,
color=[255, 255, 255],
type='',
swap='right_wrist'),
},
skeleton_info={
0:
dict(link=('right_wrist', 'right_thumb1'), id=0, color=[255, 128, 0]),
1:
dict(link=('right_thumb1', 'right_thumb2'), id=1, color=[255, 128, 0]),
2:
dict(link=('right_thumb2', 'right_thumb3'), id=2, color=[255, 128, 0]),
3:
dict(link=('right_thumb3', 'right_thumb4'), id=3, color=[255, 128, 0]),
4:
dict(
link=('right_wrist', 'right_forefinger1'),
id=4,
color=[255, 153, 255]),
5:
dict(
link=('right_forefinger1', 'right_forefinger2'),
id=5,
color=[255, 153, 255]),
6:
dict(
link=('right_forefinger2', 'right_forefinger3'),
id=6,
color=[255, 153, 255]),
7:
dict(
link=('right_forefinger3', 'right_forefinger4'),
id=7,
color=[255, 153, 255]),
8:
dict(
link=('right_wrist', 'right_middle_finger1'),
id=8,
color=[102, 178, 255]),
9:
dict(
link=('right_middle_finger1', 'right_middle_finger2'),
id=9,
color=[102, 178, 255]),
10:
dict(
link=('right_middle_finger2', 'right_middle_finger3'),
id=10,
color=[102, 178, 255]),
11:
dict(
link=('right_middle_finger3', 'right_middle_finger4'),
id=11,
color=[102, 178, 255]),
12:
dict(
link=('right_wrist', 'right_ring_finger1'),
id=12,
color=[255, 51, 51]),
13:
dict(
link=('right_ring_finger1', 'right_ring_finger2'),
id=13,
color=[255, 51, 51]),
14:
dict(
link=('right_ring_finger2', 'right_ring_finger3'),
id=14,
color=[255, 51, 51]),
15:
dict(
link=('right_ring_finger3', 'right_ring_finger4'),
id=15,
color=[255, 51, 51]),
16:
dict(
link=('right_wrist', 'right_pinky_finger1'),
id=16,
color=[0, 255, 0]),
17:
dict(
link=('right_pinky_finger1', 'right_pinky_finger2'),
id=17,
color=[0, 255, 0]),
18:
dict(
link=('right_pinky_finger2', 'right_pinky_finger3'),
id=18,
color=[0, 255, 0]),
19:
dict(
link=('right_pinky_finger3', 'right_pinky_finger4'),
id=19,
color=[0, 255, 0]),
20:
dict(link=('left_wrist', 'left_thumb1'), id=20, color=[255, 128, 0]),
21:
dict(link=('left_thumb1', 'left_thumb2'), id=21, color=[255, 128, 0]),
22:
dict(link=('left_thumb2', 'left_thumb3'), id=22, color=[255, 128, 0]),
23:
dict(link=('left_thumb3', 'left_thumb4'), id=23, color=[255, 128, 0]),
24:
dict(
link=('left_wrist', 'left_forefinger1'),
id=24,
color=[255, 153, 255]),
25:
dict(
link=('left_forefinger1', 'left_forefinger2'),
id=25,
color=[255, 153, 255]),
26:
dict(
link=('left_forefinger2', 'left_forefinger3'),
id=26,
color=[255, 153, 255]),
27:
dict(
link=('left_forefinger3', 'left_forefinger4'),
id=27,
color=[255, 153, 255]),
28:
dict(
link=('left_wrist', 'left_middle_finger1'),
id=28,
color=[102, 178, 255]),
29:
dict(
link=('left_middle_finger1', 'left_middle_finger2'),
id=29,
color=[102, 178, 255]),
30:
dict(
link=('left_middle_finger2', 'left_middle_finger3'),
id=30,
color=[102, 178, 255]),
31:
dict(
link=('left_middle_finger3', 'left_middle_finger4'),
id=31,
color=[102, 178, 255]),
32:
dict(
link=('left_wrist', 'left_ring_finger1'),
id=32,
color=[255, 51, 51]),
33:
dict(
link=('left_ring_finger1', 'left_ring_finger2'),
id=33,
color=[255, 51, 51]),
34:
dict(
link=('left_ring_finger2', 'left_ring_finger3'),
id=34,
color=[255, 51, 51]),
35:
dict(
link=('left_ring_finger3', 'left_ring_finger4'),
id=35,
color=[255, 51, 51]),
36:
dict(
link=('left_wrist', 'left_pinky_finger1'),
id=36,
color=[0, 255, 0]),
37:
dict(
link=('left_pinky_finger1', 'left_pinky_finger2'),
id=37,
color=[0, 255, 0]),
38:
dict(
link=('left_pinky_finger2', 'left_pinky_finger3'),
id=38,
color=[0, 255, 0]),
39:
dict(
link=('left_pinky_finger3', 'left_pinky_finger4'),
id=39,
color=[0, 255, 0]),
},
joint_weights=[1.] * 42,
sigmas=[])

129
configs/_base_/datasets/jhmdb.py

@ -0,0 +1,129 @@
dataset_info = dict(
dataset_name='jhmdb',
paper_info=dict(
author='H. Jhuang and J. Gall and S. Zuffi and '
'C. Schmid and M. J. Black',
title='Towards understanding action recognition',
container='International Conf. on Computer Vision (ICCV)',
year='2013',
homepage='http://jhmdb.is.tue.mpg.de/dataset',
),
keypoint_info={
0:
dict(name='neck', id=0, color=[255, 128, 0], type='upper', swap=''),
1:
dict(name='belly', id=1, color=[255, 128, 0], type='upper', swap=''),
2:
dict(name='head', id=2, color=[255, 128, 0], type='upper', swap=''),
3:
dict(
name='right_shoulder',
id=3,
color=[0, 255, 0],
type='upper',
swap='left_shoulder'),
4:
dict(
name='left_shoulder',
id=4,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
5:
dict(
name='right_hip',
id=5,
color=[0, 255, 0],
type='lower',
swap='left_hip'),
6:
dict(
name='left_hip',
id=6,
color=[51, 153, 255],
type='lower',
swap='right_hip'),
7:
dict(
name='right_elbow',
id=7,
color=[51, 153, 255],
type='upper',
swap='left_elbow'),
8:
dict(
name='left_elbow',
id=8,
color=[51, 153, 255],
type='upper',
swap='right_elbow'),
9:
dict(
name='right_knee',
id=9,
color=[51, 153, 255],
type='lower',
swap='left_knee'),
10:
dict(
name='left_knee',
id=10,
color=[255, 128, 0],
type='lower',
swap='right_knee'),
11:
dict(
name='right_wrist',
id=11,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
12:
dict(
name='left_wrist',
id=12,
color=[255, 128, 0],
type='upper',
swap='right_wrist'),
13:
dict(
name='right_ankle',
id=13,
color=[0, 255, 0],
type='lower',
swap='left_ankle'),
14:
dict(
name='left_ankle',
id=14,
color=[0, 255, 0],
type='lower',
swap='right_ankle')
},
skeleton_info={
0: dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
1: dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
2: dict(link=('right_hip', 'belly'), id=2, color=[255, 128, 0]),
3: dict(link=('belly', 'left_hip'), id=3, color=[0, 255, 0]),
4: dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
5: dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
6: dict(link=('belly', 'neck'), id=6, color=[51, 153, 255]),
7: dict(link=('neck', 'head'), id=7, color=[51, 153, 255]),
8: dict(link=('neck', 'right_shoulder'), id=8, color=[255, 128, 0]),
9: dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
10:
dict(link=('right_elbow', 'right_wrist'), id=10, color=[255, 128, 0]),
11: dict(link=('neck', 'left_shoulder'), id=11, color=[0, 255, 0]),
12:
dict(link=('left_shoulder', 'left_elbow'), id=12, color=[0, 255, 0]),
13: dict(link=('left_elbow', 'left_wrist'), id=13, color=[0, 255, 0])
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, 1.5, 1.5, 1.5, 1.5
],
# Adapted from COCO dataset.
sigmas=[
0.025, 0.107, 0.025, 0.079, 0.079, 0.107, 0.107, 0.072, 0.072, 0.087,
0.087, 0.062, 0.062, 0.089, 0.089
])

263
configs/_base_/datasets/locust.py

@ -0,0 +1,263 @@
dataset_info = dict(
dataset_name='locust',
paper_info=dict(
author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and '
'Li, Liang and Koger, Benjamin and Costelloe, Blair R and '
'Couzin, Iain D',
title='DeepPoseKit, a software toolkit for fast and robust '
'animal pose estimation using deep learning',
container='Elife',
year='2019',
homepage='https://github.com/jgraving/DeepPoseKit-Data',
),
keypoint_info={
0:
dict(name='head', id=0, color=[255, 255, 255], type='', swap=''),
1:
dict(name='neck', id=1, color=[255, 255, 255], type='', swap=''),
2:
dict(name='thorax', id=2, color=[255, 255, 255], type='', swap=''),
3:
dict(name='abdomen1', id=3, color=[255, 255, 255], type='', swap=''),
4:
dict(name='abdomen2', id=4, color=[255, 255, 255], type='', swap=''),
5:
dict(
name='anttipL',
id=5,
color=[255, 255, 255],
type='',
swap='anttipR'),
6:
dict(
name='antbaseL',
id=6,
color=[255, 255, 255],
type='',
swap='antbaseR'),
7:
dict(name='eyeL', id=7, color=[255, 255, 255], type='', swap='eyeR'),
8:
dict(
name='forelegL1',
id=8,
color=[255, 255, 255],
type='',
swap='forelegR1'),
9:
dict(
name='forelegL2',
id=9,
color=[255, 255, 255],
type='',
swap='forelegR2'),
10:
dict(
name='forelegL3',
id=10,
color=[255, 255, 255],
type='',
swap='forelegR3'),
11:
dict(
name='forelegL4',
id=11,
color=[255, 255, 255],
type='',
swap='forelegR4'),
12:
dict(
name='midlegL1',
id=12,
color=[255, 255, 255],
type='',
swap='midlegR1'),
13:
dict(
name='midlegL2',
id=13,
color=[255, 255, 255],
type='',
swap='midlegR2'),
14:
dict(
name='midlegL3',
id=14,
color=[255, 255, 255],
type='',
swap='midlegR3'),
15:
dict(
name='midlegL4',
id=15,
color=[255, 255, 255],
type='',
swap='midlegR4'),
16:
dict(
name='hindlegL1',
id=16,
color=[255, 255, 255],
type='',
swap='hindlegR1'),
17:
dict(
name='hindlegL2',
id=17,
color=[255, 255, 255],
type='',
swap='hindlegR2'),
18:
dict(
name='hindlegL3',
id=18,
color=[255, 255, 255],
type='',
swap='hindlegR3'),
19:
dict(
name='hindlegL4',
id=19,
color=[255, 255, 255],
type='',
swap='hindlegR4'),
20:
dict(
name='anttipR',
id=20,
color=[255, 255, 255],
type='',
swap='anttipL'),
21:
dict(
name='antbaseR',
id=21,
color=[255, 255, 255],
type='',
swap='antbaseL'),
22:
dict(name='eyeR', id=22, color=[255, 255, 255], type='', swap='eyeL'),
23:
dict(
name='forelegR1',
id=23,
color=[255, 255, 255],
type='',
swap='forelegL1'),
24:
dict(
name='forelegR2',
id=24,
color=[255, 255, 255],
type='',
swap='forelegL2'),
25:
dict(
name='forelegR3',
id=25,
color=[255, 255, 255],
type='',
swap='forelegL3'),
26:
dict(
name='forelegR4',
id=26,
color=[255, 255, 255],
type='',
swap='forelegL4'),
27:
dict(
name='midlegR1',
id=27,
color=[255, 255, 255],
type='',
swap='midlegL1'),
28:
dict(
name='midlegR2',
id=28,
color=[255, 255, 255],
type='',
swap='midlegL2'),
29:
dict(
name='midlegR3',
id=29,
color=[255, 255, 255],
type='',
swap='midlegL3'),
30:
dict(
name='midlegR4',
id=30,
color=[255, 255, 255],
type='',
swap='midlegL4'),
31:
dict(
name='hindlegR1',
id=31,
color=[255, 255, 255],
type='',
swap='hindlegL1'),
32:
dict(
name='hindlegR2',
id=32,
color=[255, 255, 255],
type='',
swap='hindlegL2'),
33:
dict(
name='hindlegR3',
id=33,
color=[255, 255, 255],
type='',
swap='hindlegL3'),
34:
dict(
name='hindlegR4',
id=34,
color=[255, 255, 255],
type='',
swap='hindlegL4')
},
skeleton_info={
0: dict(link=('neck', 'head'), id=0, color=[255, 255, 255]),
1: dict(link=('thorax', 'neck'), id=1, color=[255, 255, 255]),
2: dict(link=('abdomen1', 'thorax'), id=2, color=[255, 255, 255]),
3: dict(link=('abdomen2', 'abdomen1'), id=3, color=[255, 255, 255]),
4: dict(link=('antbaseL', 'anttipL'), id=4, color=[255, 255, 255]),
5: dict(link=('eyeL', 'antbaseL'), id=5, color=[255, 255, 255]),
6: dict(link=('forelegL2', 'forelegL1'), id=6, color=[255, 255, 255]),
7: dict(link=('forelegL3', 'forelegL2'), id=7, color=[255, 255, 255]),
8: dict(link=('forelegL4', 'forelegL3'), id=8, color=[255, 255, 255]),
9: dict(link=('midlegL2', 'midlegL1'), id=9, color=[255, 255, 255]),
10: dict(link=('midlegL3', 'midlegL2'), id=10, color=[255, 255, 255]),
11: dict(link=('midlegL4', 'midlegL3'), id=11, color=[255, 255, 255]),
12:
dict(link=('hindlegL2', 'hindlegL1'), id=12, color=[255, 255, 255]),
13:
dict(link=('hindlegL3', 'hindlegL2'), id=13, color=[255, 255, 255]),
14:
dict(link=('hindlegL4', 'hindlegL3'), id=14, color=[255, 255, 255]),
15: dict(link=('antbaseR', 'anttipR'), id=15, color=[255, 255, 255]),
16: dict(link=('eyeR', 'antbaseR'), id=16, color=[255, 255, 255]),
17:
dict(link=('forelegR2', 'forelegR1'), id=17, color=[255, 255, 255]),
18:
dict(link=('forelegR3', 'forelegR2'), id=18, color=[255, 255, 255]),
19:
dict(link=('forelegR4', 'forelegR3'), id=19, color=[255, 255, 255]),
20: dict(link=('midlegR2', 'midlegR1'), id=20, color=[255, 255, 255]),
21: dict(link=('midlegR3', 'midlegR2'), id=21, color=[255, 255, 255]),
22: dict(link=('midlegR4', 'midlegR3'), id=22, color=[255, 255, 255]),
23:
dict(link=('hindlegR2', 'hindlegR1'), id=23, color=[255, 255, 255]),
24:
dict(link=('hindlegR3', 'hindlegR2'), id=24, color=[255, 255, 255]),
25:
dict(link=('hindlegR4', 'hindlegR3'), id=25, color=[255, 255, 255])
},
joint_weights=[1.] * 35,
sigmas=[])

183
configs/_base_/datasets/macaque.py

@ -0,0 +1,183 @@
dataset_info = dict(
dataset_name='macaque',
paper_info=dict(
author='Labuguen, Rollyn and Matsumoto, Jumpei and '
'Negrete, Salvador and Nishimaru, Hiroshi and '
'Nishijo, Hisao and Takada, Masahiko and '
'Go, Yasuhiro and Inoue, Ken-ichi and Shibata, Tomohiro',
title='MacaquePose: A novel "in the wild" macaque monkey pose dataset '
'for markerless motion capture',
container='bioRxiv',
year='2020',
homepage='http://www.pri.kyoto-u.ac.jp/datasets/'
'macaquepose/index.html',
),
keypoint_info={
0:
dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
1:
dict(
name='left_eye',
id=1,
color=[51, 153, 255],
type='upper',
swap='right_eye'),
2:
dict(
name='right_eye',
id=2,
color=[51, 153, 255],
type='upper',
swap='left_eye'),
3:
dict(
name='left_ear',
id=3,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
4:
dict(
name='right_ear',
id=4,
color=[51, 153, 255],
type='upper',
swap='left_ear'),
5:
dict(
name='left_shoulder',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
6:
dict(
name='right_shoulder',
id=6,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
7:
dict(
name='left_elbow',
id=7,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
8:
dict(
name='right_elbow',
id=8,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
9:
dict(
name='left_wrist',
id=9,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
10:
dict(
name='right_wrist',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
11:
dict(
name='left_hip',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
12:
dict(
name='right_hip',
id=12,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
13:
dict(
name='left_knee',
id=13,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
14:
dict(
name='right_knee',
id=14,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
15:
dict(
name='left_ankle',
id=15,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
16:
dict(
name='right_ankle',
id=16,
color=[255, 128, 0],
type='lower',
swap='left_ankle')
},
skeleton_info={
0:
dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
1:
dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
2:
dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
3:
dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
4:
dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
5:
dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
6:
dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
7:
dict(
link=('left_shoulder', 'right_shoulder'),
id=7,
color=[51, 153, 255]),
8:
dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
9:
dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
10:
dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
13:
dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
14:
dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
15:
dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
16:
dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
17:
dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
18:
dict(
link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
1.5
],
sigmas=[
0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
])

156
configs/_base_/datasets/mhp.py

@ -0,0 +1,156 @@
dataset_info = dict(
dataset_name='mhp',
paper_info=dict(
author='Zhao, Jian and Li, Jianshu and Cheng, Yu and '
'Sim, Terence and Yan, Shuicheng and Feng, Jiashi',
title='Understanding humans in crowded scenes: '
'Deep nested adversarial learning and a '
'new benchmark for multi-human parsing',
container='Proceedings of the 26th ACM '
'international conference on Multimedia',
year='2018',
homepage='https://lv-mhp.github.io/dataset',
),
keypoint_info={
0:
dict(
name='right_ankle',
id=0,
color=[255, 128, 0],
type='lower',
swap='left_ankle'),
1:
dict(
name='right_knee',
id=1,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
2:
dict(
name='right_hip',
id=2,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
3:
dict(
name='left_hip',
id=3,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
4:
dict(
name='left_knee',
id=4,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
5:
dict(
name='left_ankle',
id=5,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
6:
dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''),
7:
dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''),
8:
dict(
name='upper_neck',
id=8,
color=[51, 153, 255],
type='upper',
swap=''),
9:
dict(
name='head_top', id=9, color=[51, 153, 255], type='upper',
swap=''),
10:
dict(
name='right_wrist',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
11:
dict(
name='right_elbow',
id=11,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
12:
dict(
name='right_shoulder',
id=12,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
13:
dict(
name='left_shoulder',
id=13,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
14:
dict(
name='left_elbow',
id=14,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
15:
dict(
name='left_wrist',
id=15,
color=[0, 255, 0],
type='upper',
swap='right_wrist')
},
skeleton_info={
0:
dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
1:
dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
2:
dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]),
3:
dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]),
4:
dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
5:
dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
6:
dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]),
7:
dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]),
8:
dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]),
9:
dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]),
10:
dict(
link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128,
0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]),
13:
dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]),
14:
dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0])
},
joint_weights=[
1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5
],
# Adapted from COCO dataset.
sigmas=[
0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026,
0.062, 0.072, 0.179, 0.179, 0.072, 0.062
])

132
configs/_base_/datasets/mpi_inf_3dhp.py

@ -0,0 +1,132 @@
dataset_info = dict(
dataset_name='mpi_inf_3dhp',
paper_info=dict(
author='ehta, Dushyant and Rhodin, Helge and Casas, Dan and '
'Fua, Pascal and Sotnychenko, Oleksandr and Xu, Weipeng and '
'Theobalt, Christian',
title='Monocular 3D Human Pose Estimation In The Wild Using Improved '
'CNN Supervision',
container='2017 international conference on 3D vision (3DV)',
year='2017',
homepage='http://gvv.mpi-inf.mpg.de/3dhp-dataset',
),
keypoint_info={
0:
dict(
name='head_top', id=0, color=[51, 153, 255], type='upper',
swap=''),
1:
dict(name='neck', id=1, color=[51, 153, 255], type='upper', swap=''),
2:
dict(
name='right_shoulder',
id=2,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
3:
dict(
name='right_elbow',
id=3,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
4:
dict(
name='right_wrist',
id=4,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
5:
dict(
name='left_shoulder',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
6:
dict(
name='left_elbow',
id=6,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
7:
dict(
name='left_wrist',
id=7,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
8:
dict(
name='right_hip',
id=8,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
9:
dict(
name='right_knee',
id=9,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
10:
dict(
name='right_ankle',
id=10,
color=[255, 128, 0],
type='lower',
swap='left_ankle'),
11:
dict(
name='left_hip',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
12:
dict(
name='left_knee',
id=12,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
13:
dict(
name='left_ankle',
id=13,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
14:
dict(name='root', id=14, color=[51, 153, 255], type='lower', swap=''),
15:
dict(name='spine', id=15, color=[51, 153, 255], type='upper', swap=''),
16:
dict(name='head', id=16, color=[51, 153, 255], type='upper', swap='')
},
skeleton_info={
0: dict(link=('neck', 'right_shoulder'), id=0, color=[255, 128, 0]),
1: dict(
link=('right_shoulder', 'right_elbow'), id=1, color=[255, 128, 0]),
2:
dict(link=('right_elbow', 'right_wrist'), id=2, color=[255, 128, 0]),
3: dict(link=('neck', 'left_shoulder'), id=3, color=[0, 255, 0]),
4: dict(link=('left_shoulder', 'left_elbow'), id=4, color=[0, 255, 0]),
5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
6: dict(link=('root', 'right_hip'), id=6, color=[255, 128, 0]),
7: dict(link=('right_hip', 'right_knee'), id=7, color=[255, 128, 0]),
8: dict(link=('right_knee', 'right_ankle'), id=8, color=[255, 128, 0]),
9: dict(link=('root', 'left_hip'), id=9, color=[0, 255, 0]),
10: dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 0]),
11: dict(link=('left_knee', 'left_ankle'), id=11, color=[0, 255, 0]),
12: dict(link=('head_top', 'head'), id=12, color=[51, 153, 255]),
13: dict(link=('head', 'neck'), id=13, color=[51, 153, 255]),
14: dict(link=('neck', 'spine'), id=14, color=[51, 153, 255]),
15: dict(link=('spine', 'root'), id=15, color=[51, 153, 255])
},
joint_weights=[1.] * 17,
sigmas=[])

155
configs/_base_/datasets/mpii.py

@ -0,0 +1,155 @@
dataset_info = dict(
dataset_name='mpii',
paper_info=dict(
author='Mykhaylo Andriluka and Leonid Pishchulin and '
'Peter Gehler and Schiele, Bernt',
title='2D Human Pose Estimation: New Benchmark and '
'State of the Art Analysis',
container='IEEE Conference on Computer Vision and '
'Pattern Recognition (CVPR)',
year='2014',
homepage='http://human-pose.mpi-inf.mpg.de/',
),
keypoint_info={
0:
dict(
name='right_ankle',
id=0,
color=[255, 128, 0],
type='lower',
swap='left_ankle'),
1:
dict(
name='right_knee',
id=1,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
2:
dict(
name='right_hip',
id=2,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
3:
dict(
name='left_hip',
id=3,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
4:
dict(
name='left_knee',
id=4,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
5:
dict(
name='left_ankle',
id=5,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
6:
dict(name='pelvis', id=6, color=[51, 153, 255], type='lower', swap=''),
7:
dict(name='thorax', id=7, color=[51, 153, 255], type='upper', swap=''),
8:
dict(
name='upper_neck',
id=8,
color=[51, 153, 255],
type='upper',
swap=''),
9:
dict(
name='head_top', id=9, color=[51, 153, 255], type='upper',
swap=''),
10:
dict(
name='right_wrist',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
11:
dict(
name='right_elbow',
id=11,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
12:
dict(
name='right_shoulder',
id=12,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
13:
dict(
name='left_shoulder',
id=13,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
14:
dict(
name='left_elbow',
id=14,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
15:
dict(
name='left_wrist',
id=15,
color=[0, 255, 0],
type='upper',
swap='right_wrist')
},
skeleton_info={
0:
dict(link=('right_ankle', 'right_knee'), id=0, color=[255, 128, 0]),
1:
dict(link=('right_knee', 'right_hip'), id=1, color=[255, 128, 0]),
2:
dict(link=('right_hip', 'pelvis'), id=2, color=[255, 128, 0]),
3:
dict(link=('pelvis', 'left_hip'), id=3, color=[0, 255, 0]),
4:
dict(link=('left_hip', 'left_knee'), id=4, color=[0, 255, 0]),
5:
dict(link=('left_knee', 'left_ankle'), id=5, color=[0, 255, 0]),
6:
dict(link=('pelvis', 'thorax'), id=6, color=[51, 153, 255]),
7:
dict(link=('thorax', 'upper_neck'), id=7, color=[51, 153, 255]),
8:
dict(link=('upper_neck', 'head_top'), id=8, color=[51, 153, 255]),
9:
dict(link=('upper_neck', 'right_shoulder'), id=9, color=[255, 128, 0]),
10:
dict(
link=('right_shoulder', 'right_elbow'), id=10, color=[255, 128,
0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('upper_neck', 'left_shoulder'), id=12, color=[0, 255, 0]),
13:
dict(link=('left_shoulder', 'left_elbow'), id=13, color=[0, 255, 0]),
14:
dict(link=('left_elbow', 'left_wrist'), id=14, color=[0, 255, 0])
},
joint_weights=[
1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5
],
# Adapted from COCO dataset.
sigmas=[
0.089, 0.083, 0.107, 0.107, 0.083, 0.089, 0.026, 0.026, 0.026, 0.026,
0.062, 0.072, 0.179, 0.179, 0.072, 0.062
])

380
configs/_base_/datasets/mpii_trb.py

@ -0,0 +1,380 @@
dataset_info = dict(
dataset_name='mpii_trb',
paper_info=dict(
author='Duan, Haodong and Lin, Kwan-Yee and Jin, Sheng and '
'Liu, Wentao and Qian, Chen and Ouyang, Wanli',
title='TRB: A Novel Triplet Representation for '
'Understanding 2D Human Body',
container='Proceedings of the IEEE International '
'Conference on Computer Vision',
year='2019',
homepage='https://github.com/kennymckormick/'
'Triplet-Representation-of-human-Body',
),
keypoint_info={
0:
dict(
name='left_shoulder',
id=0,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
1:
dict(
name='right_shoulder',
id=1,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
2:
dict(
name='left_elbow',
id=2,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
3:
dict(
name='right_elbow',
id=3,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
4:
dict(
name='left_wrist',
id=4,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
5:
dict(
name='right_wrist',
id=5,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
6:
dict(
name='left_hip',
id=6,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
7:
dict(
name='right_hip',
id=7,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
8:
dict(
name='left_knee',
id=8,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
9:
dict(
name='right_knee',
id=9,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
10:
dict(
name='left_ankle',
id=10,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
11:
dict(
name='right_ankle',
id=11,
color=[255, 128, 0],
type='lower',
swap='left_ankle'),
12:
dict(name='head', id=12, color=[51, 153, 255], type='upper', swap=''),
13:
dict(name='neck', id=13, color=[51, 153, 255], type='upper', swap=''),
14:
dict(
name='right_neck',
id=14,
color=[255, 255, 255],
type='upper',
swap='left_neck'),
15:
dict(
name='left_neck',
id=15,
color=[255, 255, 255],
type='upper',
swap='right_neck'),
16:
dict(
name='medial_right_shoulder',
id=16,
color=[255, 255, 255],
type='upper',
swap='medial_left_shoulder'),
17:
dict(
name='lateral_right_shoulder',
id=17,
color=[255, 255, 255],
type='upper',
swap='lateral_left_shoulder'),
18:
dict(
name='medial_right_bow',
id=18,
color=[255, 255, 255],
type='upper',
swap='medial_left_bow'),
19:
dict(
name='lateral_right_bow',
id=19,
color=[255, 255, 255],
type='upper',
swap='lateral_left_bow'),
20:
dict(
name='medial_right_wrist',
id=20,
color=[255, 255, 255],
type='upper',
swap='medial_left_wrist'),
21:
dict(
name='lateral_right_wrist',
id=21,
color=[255, 255, 255],
type='upper',
swap='lateral_left_wrist'),
22:
dict(
name='medial_left_shoulder',
id=22,
color=[255, 255, 255],
type='upper',
swap='medial_right_shoulder'),
23:
dict(
name='lateral_left_shoulder',
id=23,
color=[255, 255, 255],
type='upper',
swap='lateral_right_shoulder'),
24:
dict(
name='medial_left_bow',
id=24,
color=[255, 255, 255],
type='upper',
swap='medial_right_bow'),
25:
dict(
name='lateral_left_bow',
id=25,
color=[255, 255, 255],
type='upper',
swap='lateral_right_bow'),
26:
dict(
name='medial_left_wrist',
id=26,
color=[255, 255, 255],
type='upper',
swap='medial_right_wrist'),
27:
dict(
name='lateral_left_wrist',
id=27,
color=[255, 255, 255],
type='upper',
swap='lateral_right_wrist'),
28:
dict(
name='medial_right_hip',
id=28,
color=[255, 255, 255],
type='lower',
swap='medial_left_hip'),
29:
dict(
name='lateral_right_hip',
id=29,
color=[255, 255, 255],
type='lower',
swap='lateral_left_hip'),
30:
dict(
name='medial_right_knee',
id=30,
color=[255, 255, 255],
type='lower',
swap='medial_left_knee'),
31:
dict(
name='lateral_right_knee',
id=31,
color=[255, 255, 255],
type='lower',
swap='lateral_left_knee'),
32:
dict(
name='medial_right_ankle',
id=32,
color=[255, 255, 255],
type='lower',
swap='medial_left_ankle'),
33:
dict(
name='lateral_right_ankle',
id=33,
color=[255, 255, 255],
type='lower',
swap='lateral_left_ankle'),
34:
dict(
name='medial_left_hip',
id=34,
color=[255, 255, 255],
type='lower',
swap='medial_right_hip'),
35:
dict(
name='lateral_left_hip',
id=35,
color=[255, 255, 255],
type='lower',
swap='lateral_right_hip'),
36:
dict(
name='medial_left_knee',
id=36,
color=[255, 255, 255],
type='lower',
swap='medial_right_knee'),
37:
dict(
name='lateral_left_knee',
id=37,
color=[255, 255, 255],
type='lower',
swap='lateral_right_knee'),
38:
dict(
name='medial_left_ankle',
id=38,
color=[255, 255, 255],
type='lower',
swap='medial_right_ankle'),
39:
dict(
name='lateral_left_ankle',
id=39,
color=[255, 255, 255],
type='lower',
swap='lateral_right_ankle'),
},
skeleton_info={
0:
dict(link=('head', 'neck'), id=0, color=[51, 153, 255]),
1:
dict(link=('neck', 'left_shoulder'), id=1, color=[51, 153, 255]),
2:
dict(link=('neck', 'right_shoulder'), id=2, color=[51, 153, 255]),
3:
dict(link=('left_shoulder', 'left_elbow'), id=3, color=[0, 255, 0]),
4:
dict(
link=('right_shoulder', 'right_elbow'), id=4, color=[255, 128, 0]),
5:
dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
6:
dict(link=('right_elbow', 'right_wrist'), id=6, color=[255, 128, 0]),
7:
dict(link=('left_shoulder', 'left_hip'), id=7, color=[51, 153, 255]),
8:
dict(link=('right_shoulder', 'right_hip'), id=8, color=[51, 153, 255]),
9:
dict(link=('left_hip', 'right_hip'), id=9, color=[51, 153, 255]),
10:
dict(link=('left_hip', 'left_knee'), id=10, color=[0, 255, 0]),
11:
dict(link=('right_hip', 'right_knee'), id=11, color=[255, 128, 0]),
12:
dict(link=('left_knee', 'left_ankle'), id=12, color=[0, 255, 0]),
13:
dict(link=('right_knee', 'right_ankle'), id=13, color=[255, 128, 0]),
14:
dict(link=('right_neck', 'left_neck'), id=14, color=[255, 255, 255]),
15:
dict(
link=('medial_right_shoulder', 'lateral_right_shoulder'),
id=15,
color=[255, 255, 255]),
16:
dict(
link=('medial_right_bow', 'lateral_right_bow'),
id=16,
color=[255, 255, 255]),
17:
dict(
link=('medial_right_wrist', 'lateral_right_wrist'),
id=17,
color=[255, 255, 255]),
18:
dict(
link=('medial_left_shoulder', 'lateral_left_shoulder'),
id=18,
color=[255, 255, 255]),
19:
dict(
link=('medial_left_bow', 'lateral_left_bow'),
id=19,
color=[255, 255, 255]),
20:
dict(
link=('medial_left_wrist', 'lateral_left_wrist'),
id=20,
color=[255, 255, 255]),
21:
dict(
link=('medial_right_hip', 'lateral_right_hip'),
id=21,
color=[255, 255, 255]),
22:
dict(
link=('medial_right_knee', 'lateral_right_knee'),
id=22,
color=[255, 255, 255]),
23:
dict(
link=('medial_right_ankle', 'lateral_right_ankle'),
id=23,
color=[255, 255, 255]),
24:
dict(
link=('medial_left_hip', 'lateral_left_hip'),
id=24,
color=[255, 255, 255]),
25:
dict(
link=('medial_left_knee', 'lateral_left_knee'),
id=25,
color=[255, 255, 255]),
26:
dict(
link=('medial_left_ankle', 'lateral_left_ankle'),
id=26,
color=[255, 255, 255])
},
joint_weights=[1.] * 40,
sigmas=[])

181
configs/_base_/datasets/ochuman.py

@ -0,0 +1,181 @@
dataset_info = dict(
dataset_name='ochuman',
paper_info=dict(
author='Zhang, Song-Hai and Li, Ruilong and Dong, Xin and '
'Rosin, Paul and Cai, Zixi and Han, Xi and '
'Yang, Dingcheng and Huang, Haozhi and Hu, Shi-Min',
title='Pose2seg: Detection free human instance segmentation',
container='Proceedings of the IEEE conference on computer '
'vision and pattern recognition',
year='2019',
homepage='https://github.com/liruilong940607/OCHumanApi',
),
keypoint_info={
0:
dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
1:
dict(
name='left_eye',
id=1,
color=[51, 153, 255],
type='upper',
swap='right_eye'),
2:
dict(
name='right_eye',
id=2,
color=[51, 153, 255],
type='upper',
swap='left_eye'),
3:
dict(
name='left_ear',
id=3,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
4:
dict(
name='right_ear',
id=4,
color=[51, 153, 255],
type='upper',
swap='left_ear'),
5:
dict(
name='left_shoulder',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
6:
dict(
name='right_shoulder',
id=6,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
7:
dict(
name='left_elbow',
id=7,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
8:
dict(
name='right_elbow',
id=8,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
9:
dict(
name='left_wrist',
id=9,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
10:
dict(
name='right_wrist',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
11:
dict(
name='left_hip',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
12:
dict(
name='right_hip',
id=12,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
13:
dict(
name='left_knee',
id=13,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
14:
dict(
name='right_knee',
id=14,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
15:
dict(
name='left_ankle',
id=15,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
16:
dict(
name='right_ankle',
id=16,
color=[255, 128, 0],
type='lower',
swap='left_ankle')
},
skeleton_info={
0:
dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
1:
dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
2:
dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
3:
dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
4:
dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
5:
dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
6:
dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
7:
dict(
link=('left_shoulder', 'right_shoulder'),
id=7,
color=[51, 153, 255]),
8:
dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
9:
dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
10:
dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('left_eye', 'right_eye'), id=12, color=[51, 153, 255]),
13:
dict(link=('nose', 'left_eye'), id=13, color=[51, 153, 255]),
14:
dict(link=('nose', 'right_eye'), id=14, color=[51, 153, 255]),
15:
dict(link=('left_eye', 'left_ear'), id=15, color=[51, 153, 255]),
16:
dict(link=('right_eye', 'right_ear'), id=16, color=[51, 153, 255]),
17:
dict(link=('left_ear', 'left_shoulder'), id=17, color=[51, 153, 255]),
18:
dict(
link=('right_ear', 'right_shoulder'), id=18, color=[51, 153, 255])
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
1.5
],
sigmas=[
0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
])

142
configs/_base_/datasets/onehand10k.py

@ -0,0 +1,142 @@
dataset_info = dict(
dataset_name='onehand10k',
paper_info=dict(
author='Wang, Yangang and Peng, Cong and Liu, Yebin',
title='Mask-pose cascaded cnn for 2d hand pose estimation '
'from single color image',
container='IEEE Transactions on Circuits and Systems '
'for Video Technology',
year='2018',
homepage='https://www.yangangwang.com/papers/WANG-MCC-2018-10.html',
),
keypoint_info={
0:
dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
1:
dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
2:
dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
3:
dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
4:
dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
5:
dict(
name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
6:
dict(
name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
7:
dict(
name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
8:
dict(
name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
9:
dict(
name='middle_finger1',
id=9,
color=[102, 178, 255],
type='',
swap=''),
10:
dict(
name='middle_finger2',
id=10,
color=[102, 178, 255],
type='',
swap=''),
11:
dict(
name='middle_finger3',
id=11,
color=[102, 178, 255],
type='',
swap=''),
12:
dict(
name='middle_finger4',
id=12,
color=[102, 178, 255],
type='',
swap=''),
13:
dict(
name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
14:
dict(
name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
15:
dict(
name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
16:
dict(
name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
17:
dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
18:
dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
19:
dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
20:
dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
},
skeleton_info={
0:
dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
1:
dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
2:
dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
3:
dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
4:
dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
5:
dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
6:
dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
7:
dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
8:
dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
9:
dict(
link=('middle_finger1', 'middle_finger2'),
id=9,
color=[102, 178, 255]),
10:
dict(
link=('middle_finger2', 'middle_finger3'),
id=10,
color=[102, 178, 255]),
11:
dict(
link=('middle_finger3', 'middle_finger4'),
id=11,
color=[102, 178, 255]),
12:
dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
13:
dict(
link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
14:
dict(
link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
15:
dict(
link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
16:
dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
17:
dict(
link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
18:
dict(
link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
19:
dict(
link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
},
joint_weights=[1.] * 21,
sigmas=[])

160
configs/_base_/datasets/panoptic_body3d.py

@ -0,0 +1,160 @@
dataset_info = dict(
dataset_name='panoptic_pose_3d',
paper_info=dict(
author='Joo, Hanbyul and Simon, Tomas and Li, Xulong'
'and Liu, Hao and Tan, Lei and Gui, Lin and Banerjee, Sean'
'and Godisart, Timothy and Nabbe, Bart and Matthews, Iain'
'and Kanade, Takeo and Nobuhara, Shohei and Sheikh, Yaser',
title='Panoptic Studio: A Massively Multiview System '
'for Interaction Motion Capture',
container='IEEE Transactions on Pattern Analysis'
' and Machine Intelligence',
year='2017',
homepage='http://domedb.perception.cs.cmu.edu',
),
keypoint_info={
0:
dict(name='neck', id=0, color=[51, 153, 255], type='upper', swap=''),
1:
dict(name='nose', id=1, color=[51, 153, 255], type='upper', swap=''),
2:
dict(name='mid_hip', id=2, color=[0, 255, 0], type='lower', swap=''),
3:
dict(
name='left_shoulder',
id=3,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
4:
dict(
name='left_elbow',
id=4,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
5:
dict(
name='left_wrist',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
6:
dict(
name='left_hip',
id=6,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
7:
dict(
name='left_knee',
id=7,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
8:
dict(
name='left_ankle',
id=8,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
9:
dict(
name='right_shoulder',
id=9,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
10:
dict(
name='right_elbow',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
11:
dict(
name='right_wrist',
id=11,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
12:
dict(
name='right_hip',
id=12,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
13:
dict(
name='right_knee',
id=13,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
14:
dict(
name='right_ankle',
id=14,
color=[255, 128, 0],
type='lower',
swap='left_ankle'),
15:
dict(
name='left_eye',
id=15,
color=[51, 153, 255],
type='upper',
swap='right_eye'),
16:
dict(
name='left_ear',
id=16,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
17:
dict(
name='right_eye',
id=17,
color=[51, 153, 255],
type='upper',
swap='left_eye'),
18:
dict(
name='right_ear',
id=18,
color=[51, 153, 255],
type='upper',
swap='left_ear')
},
skeleton_info={
0: dict(link=('nose', 'neck'), id=0, color=[51, 153, 255]),
1: dict(link=('neck', 'left_shoulder'), id=1, color=[0, 255, 0]),
2: dict(link=('neck', 'right_shoulder'), id=2, color=[255, 128, 0]),
3: dict(link=('left_shoulder', 'left_elbow'), id=3, color=[0, 255, 0]),
4: dict(
link=('right_shoulder', 'right_elbow'), id=4, color=[255, 128, 0]),
5: dict(link=('left_elbow', 'left_wrist'), id=5, color=[0, 255, 0]),
6:
dict(link=('right_elbow', 'right_wrist'), id=6, color=[255, 128, 0]),
7: dict(link=('left_ankle', 'left_knee'), id=7, color=[0, 255, 0]),
8: dict(link=('left_knee', 'left_hip'), id=8, color=[0, 255, 0]),
9: dict(link=('right_ankle', 'right_knee'), id=9, color=[255, 128, 0]),
10: dict(link=('right_knee', 'right_hip'), id=10, color=[255, 128, 0]),
11: dict(link=('mid_hip', 'left_hip'), id=11, color=[0, 255, 0]),
12: dict(link=('mid_hip', 'right_hip'), id=12, color=[255, 128, 0]),
13: dict(link=('mid_hip', 'neck'), id=13, color=[51, 153, 255]),
},
joint_weights=[
1.0, 1.0, 1.0, 1.0, 1.2, 1.5, 1.0, 1.2, 1.5, 1.0, 1.2, 1.5, 1.0, 1.2,
1.5, 1.0, 1.0, 1.0, 1.0
],
sigmas=[
0.026, 0.026, 0.107, 0.079, 0.072, 0.062, 0.107, 0.087, 0.089, 0.079,
0.072, 0.062, 0.107, 0.087, 0.089, 0.025, 0.035, 0.025, 0.035
])

143
configs/_base_/datasets/panoptic_hand2d.py

@ -0,0 +1,143 @@
dataset_info = dict(
dataset_name='panoptic_hand2d',
paper_info=dict(
author='Simon, Tomas and Joo, Hanbyul and '
'Matthews, Iain and Sheikh, Yaser',
title='Hand keypoint detection in single images using '
'multiview bootstrapping',
container='Proceedings of the IEEE conference on '
'Computer Vision and Pattern Recognition',
year='2017',
homepage='http://domedb.perception.cs.cmu.edu/handdb.html',
),
keypoint_info={
0:
dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
1:
dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
2:
dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
3:
dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
4:
dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
5:
dict(
name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
6:
dict(
name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
7:
dict(
name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
8:
dict(
name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
9:
dict(
name='middle_finger1',
id=9,
color=[102, 178, 255],
type='',
swap=''),
10:
dict(
name='middle_finger2',
id=10,
color=[102, 178, 255],
type='',
swap=''),
11:
dict(
name='middle_finger3',
id=11,
color=[102, 178, 255],
type='',
swap=''),
12:
dict(
name='middle_finger4',
id=12,
color=[102, 178, 255],
type='',
swap=''),
13:
dict(
name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
14:
dict(
name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
15:
dict(
name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
16:
dict(
name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
17:
dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
18:
dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
19:
dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
20:
dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
},
skeleton_info={
0:
dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
1:
dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
2:
dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
3:
dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
4:
dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
5:
dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
6:
dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
7:
dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
8:
dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
9:
dict(
link=('middle_finger1', 'middle_finger2'),
id=9,
color=[102, 178, 255]),
10:
dict(
link=('middle_finger2', 'middle_finger3'),
id=10,
color=[102, 178, 255]),
11:
dict(
link=('middle_finger3', 'middle_finger4'),
id=11,
color=[102, 178, 255]),
12:
dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
13:
dict(
link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
14:
dict(
link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
15:
dict(
link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
16:
dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
17:
dict(
link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
18:
dict(
link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
19:
dict(
link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
},
joint_weights=[1.] * 21,
sigmas=[])

176
configs/_base_/datasets/posetrack18.py

@ -0,0 +1,176 @@
dataset_info = dict(
dataset_name='posetrack18',
paper_info=dict(
author='Andriluka, Mykhaylo and Iqbal, Umar and '
'Insafutdinov, Eldar and Pishchulin, Leonid and '
'Milan, Anton and Gall, Juergen and Schiele, Bernt',
title='Posetrack: A benchmark for human pose estimation and tracking',
container='Proceedings of the IEEE Conference on '
'Computer Vision and Pattern Recognition',
year='2018',
homepage='https://posetrack.net/users/download.php',
),
keypoint_info={
0:
dict(name='nose', id=0, color=[51, 153, 255], type='upper', swap=''),
1:
dict(
name='head_bottom',
id=1,
color=[51, 153, 255],
type='upper',
swap=''),
2:
dict(
name='head_top', id=2, color=[51, 153, 255], type='upper',
swap=''),
3:
dict(
name='left_ear',
id=3,
color=[51, 153, 255],
type='upper',
swap='right_ear'),
4:
dict(
name='right_ear',
id=4,
color=[51, 153, 255],
type='upper',
swap='left_ear'),
5:
dict(
name='left_shoulder',
id=5,
color=[0, 255, 0],
type='upper',
swap='right_shoulder'),
6:
dict(
name='right_shoulder',
id=6,
color=[255, 128, 0],
type='upper',
swap='left_shoulder'),
7:
dict(
name='left_elbow',
id=7,
color=[0, 255, 0],
type='upper',
swap='right_elbow'),
8:
dict(
name='right_elbow',
id=8,
color=[255, 128, 0],
type='upper',
swap='left_elbow'),
9:
dict(
name='left_wrist',
id=9,
color=[0, 255, 0],
type='upper',
swap='right_wrist'),
10:
dict(
name='right_wrist',
id=10,
color=[255, 128, 0],
type='upper',
swap='left_wrist'),
11:
dict(
name='left_hip',
id=11,
color=[0, 255, 0],
type='lower',
swap='right_hip'),
12:
dict(
name='right_hip',
id=12,
color=[255, 128, 0],
type='lower',
swap='left_hip'),
13:
dict(
name='left_knee',
id=13,
color=[0, 255, 0],
type='lower',
swap='right_knee'),
14:
dict(
name='right_knee',
id=14,
color=[255, 128, 0],
type='lower',
swap='left_knee'),
15:
dict(
name='left_ankle',
id=15,
color=[0, 255, 0],
type='lower',
swap='right_ankle'),
16:
dict(
name='right_ankle',
id=16,
color=[255, 128, 0],
type='lower',
swap='left_ankle')
},
skeleton_info={
0:
dict(link=('left_ankle', 'left_knee'), id=0, color=[0, 255, 0]),
1:
dict(link=('left_knee', 'left_hip'), id=1, color=[0, 255, 0]),
2:
dict(link=('right_ankle', 'right_knee'), id=2, color=[255, 128, 0]),
3:
dict(link=('right_knee', 'right_hip'), id=3, color=[255, 128, 0]),
4:
dict(link=('left_hip', 'right_hip'), id=4, color=[51, 153, 255]),
5:
dict(link=('left_shoulder', 'left_hip'), id=5, color=[51, 153, 255]),
6:
dict(link=('right_shoulder', 'right_hip'), id=6, color=[51, 153, 255]),
7:
dict(
link=('left_shoulder', 'right_shoulder'),
id=7,
color=[51, 153, 255]),
8:
dict(link=('left_shoulder', 'left_elbow'), id=8, color=[0, 255, 0]),
9:
dict(
link=('right_shoulder', 'right_elbow'), id=9, color=[255, 128, 0]),
10:
dict(link=('left_elbow', 'left_wrist'), id=10, color=[0, 255, 0]),
11:
dict(link=('right_elbow', 'right_wrist'), id=11, color=[255, 128, 0]),
12:
dict(link=('nose', 'head_bottom'), id=12, color=[51, 153, 255]),
13:
dict(link=('nose', 'head_top'), id=13, color=[51, 153, 255]),
14:
dict(
link=('head_bottom', 'left_shoulder'), id=14, color=[51, 153,
255]),
15:
dict(
link=('head_bottom', 'right_shoulder'),
id=15,
color=[51, 153, 255])
},
joint_weights=[
1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5,
1.5
],
sigmas=[
0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062,
0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089
])

141
configs/_base_/datasets/rhd2d.py

@ -0,0 +1,141 @@
dataset_info = dict(
dataset_name='rhd2d',
paper_info=dict(
author='Christian Zimmermann and Thomas Brox',
title='Learning to Estimate 3D Hand Pose from Single RGB Images',
container='arXiv',
year='2017',
homepage='https://lmb.informatik.uni-freiburg.de/resources/'
'datasets/RenderedHandposeDataset.en.html',
),
keypoint_info={
0:
dict(name='wrist', id=0, color=[255, 255, 255], type='', swap=''),
1:
dict(name='thumb1', id=1, color=[255, 128, 0], type='', swap=''),
2:
dict(name='thumb2', id=2, color=[255, 128, 0], type='', swap=''),
3:
dict(name='thumb3', id=3, color=[255, 128, 0], type='', swap=''),
4:
dict(name='thumb4', id=4, color=[255, 128, 0], type='', swap=''),
5:
dict(
name='forefinger1', id=5, color=[255, 153, 255], type='', swap=''),
6:
dict(
name='forefinger2', id=6, color=[255, 153, 255], type='', swap=''),
7:
dict(
name='forefinger3', id=7, color=[255, 153, 255], type='', swap=''),
8:
dict(
name='forefinger4', id=8, color=[255, 153, 255], type='', swap=''),
9:
dict(
name='middle_finger1',
id=9,
color=[102, 178, 255],
type='',
swap=''),
10:
dict(
name='middle_finger2',
id=10,
color=[102, 178, 255],
type='',
swap=''),
11:
dict(
name='middle_finger3',
id=11,
color=[102, 178, 255],
type='',
swap=''),
12:
dict(
name='middle_finger4',
id=12,
color=[102, 178, 255],
type='',
swap=''),
13:
dict(
name='ring_finger1', id=13, color=[255, 51, 51], type='', swap=''),
14:
dict(
name='ring_finger2', id=14, color=[255, 51, 51], type='', swap=''),
15:
dict(
name='ring_finger3', id=15, color=[255, 51, 51], type='', swap=''),
16:
dict(
name='ring_finger4', id=16, color=[255, 51, 51], type='', swap=''),
17:
dict(name='pinky_finger1', id=17, color=[0, 255, 0], type='', swap=''),
18:
dict(name='pinky_finger2', id=18, color=[0, 255, 0], type='', swap=''),
19:
dict(name='pinky_finger3', id=19, color=[0, 255, 0], type='', swap=''),
20:
dict(name='pinky_finger4', id=20, color=[0, 255, 0], type='', swap='')
},
skeleton_info={
0:
dict(link=('wrist', 'thumb1'), id=0, color=[255, 128, 0]),
1:
dict(link=('thumb1', 'thumb2'), id=1, color=[255, 128, 0]),
2:
dict(link=('thumb2', 'thumb3'), id=2, color=[255, 128, 0]),
3:
dict(link=('thumb3', 'thumb4'), id=3, color=[255, 128, 0]),
4:
dict(link=('wrist', 'forefinger1'), id=4, color=[255, 153, 255]),
5:
dict(link=('forefinger1', 'forefinger2'), id=5, color=[255, 153, 255]),
6:
dict(link=('forefinger2', 'forefinger3'), id=6, color=[255, 153, 255]),
7:
dict(link=('forefinger3', 'forefinger4'), id=7, color=[255, 153, 255]),
8:
dict(link=('wrist', 'middle_finger1'), id=8, color=[102, 178, 255]),
9:
dict(
link=('middle_finger1', 'middle_finger2'),
id=9,
color=[102, 178, 255]),
10:
dict(
link=('middle_finger2', 'middle_finger3'),
id=10,
color=[102, 178, 255]),
11:
dict(
link=('middle_finger3', 'middle_finger4'),
id=11,
color=[102, 178, 255]),
12:
dict(link=('wrist', 'ring_finger1'), id=12, color=[255, 51, 51]),
13:
dict(
link=('ring_finger1', 'ring_finger2'), id=13, color=[255, 51, 51]),
14:
dict(
link=('ring_finger2', 'ring_finger3'), id=14, color=[255, 51, 51]),
15:
dict(
link=('ring_finger3', 'ring_finger4'), id=15, color=[255, 51, 51]),
16:
dict(link=('wrist', 'pinky_finger1'), id=16, color=[0, 255, 0]),
17:
dict(
link=('pinky_finger1', 'pinky_finger2'), id=17, color=[0, 255, 0]),
18:
dict(
link=('pinky_finger2', 'pinky_finger3'), id=18, color=[0, 255, 0]),
19:
dict(
link=('pinky_finger3', 'pinky_finger4'), id=19, color=[0, 255, 0])
},
joint_weights=[1.] * 21,
sigmas=[])

582
configs/_base_/datasets/wflw.py

@ -0,0 +1,582 @@
dataset_info = dict(
dataset_name='wflw',
paper_info=dict(
author='Wu, Wayne and Qian, Chen and Yang, Shuo and Wang, '
'Quan and Cai, Yici and Zhou, Qiang',
title='Look at boundary: A boundary-aware face alignment algorithm',
container='Proceedings of the IEEE conference on computer '
'vision and pattern recognition',
year='2018',
homepage='https://wywu.github.io/projects/LAB/WFLW.html',
),
keypoint_info={
0:
dict(
name='kpt-0', id=0, color=[255, 255, 255], type='', swap='kpt-32'),
1:
dict(
name='kpt-1', id=1, color=[255, 255, 255], type='', swap='kpt-31'),
2:
dict(
name='kpt-2', id=2, color=[255, 255, 255], type='', swap='kpt-30'),
3:
dict(
name='kpt-3', id=3, color=[255, 255, 255], type='', swap='kpt-29'),
4:
dict(
name='kpt-4', id=4, color=[255, 255, 255], type='', swap='kpt-28'),
5:
dict(
name='kpt-5', id=5, color=[255, 255, 255], type='', swap='kpt-27'),
6:
dict(
name='kpt-6', id=6, color=[255, 255, 255], type='', swap='kpt-26'),
7:
dict(
name='kpt-7', id=7, color=[255, 255, 255], type='', swap='kpt-25'),
8:
dict(
name='kpt-8', id=8, color=[255, 255, 255], type='', swap='kpt-24'),
9:
dict(
name='kpt-9', id=9, color=[255, 255, 255], type='', swap='kpt-23'),
10:
dict(
name='kpt-10',
id=10,
color=[255, 255, 255],
type='',
swap='kpt-22'),
11:
dict(
name='kpt-11',
id=11,
color=[255, 255, 255],
type='',
swap='kpt-21'),
12:
dict(
name='kpt-12',
id=12,
color=[255, 255, 255],
type='',
swap='kpt-20'),
13:
dict(
name='kpt-13',
id=13,
color=[255, 255, 255],
type='',
swap='kpt-19'),
14:
dict(
name='kpt-14',
id=14,
color=[255, 255, 255],
type='',
swap='kpt-18'),
15:
dict(
name='kpt-15',
id=15,
color=[255, 255, 255],
type='',
swap='kpt-17'),
16:
dict(name='kpt-16', id=16, color=[255, 255, 255], type='', swap=''),
17:
dict(
name='kpt-17',
id=17,
color=[255, 255, 255],
type='',
swap='kpt-15'),
18:
dict(
name='kpt-18',
id=18,
color=[255, 255, 255],
type='',
swap='kpt-14'),
19:
dict(
name='kpt-19',
id=19,
color=[255, 255, 255],
type='',
swap='kpt-13'),
20:
dict(
name='kpt-20',
id=20,
color=[255, 255, 255],
type='',
swap='kpt-12'),
21:
dict(
name='kpt-21',
id=21,
color=[255, 255, 255],
type='',
swap='kpt-11'),
22:
dict(
name='kpt-22',
id=22,
color=[255, 255, 255],
type='',
swap='kpt-10'),
23:
dict(
name='kpt-23', id=23, color=[255, 255, 255], type='',
swap='kpt-9'),
24:
dict(
name='kpt-24', id=24, color=[255, 255, 255], type='',
swap='kpt-8'),
25:
dict(
name='kpt-25', id=25, color=[255, 255, 255], type='',
swap='kpt-7'),
26:
dict(
name='kpt-26', id=26, color=[255, 255, 255], type='',
swap='kpt-6'),
27:
dict(
name='kpt-27', id=27, color=[255, 255, 255], type='',
swap='kpt-5'),
28:
dict(
name='kpt-28', id=28, color=[255, 255, 255], type='',
swap='kpt-4'),
29:
dict(
name='kpt-29', id=29, color=[255, 255, 255], type='',
swap='kpt-3'),
30:
dict(
name='kpt-30', id=30, color=[255, 255, 255], type='',
swap='kpt-2'),
31:
dict(
name='kpt-31', id=31, color=[255, 255, 255], type='',
swap='kpt-1'),
32:
dict(
name='kpt-32', id=32, color=[255, 255, 255], type='',
swap='kpt-0'),
33:
dict(
name='kpt-33',
id=33,
color=[255, 255, 255],
type='',
swap='kpt-46'),
34:
dict(
name='kpt-34',
id=34,
color=[255, 255, 255],
type='',
swap='kpt-45'),
35:
dict(
name='kpt-35',
id=35,
color=[255, 255, 255],
type='',
swap='kpt-44'),
36:
dict(
name='kpt-36',
id=36,
color=[255, 255, 255],
type='',
swap='kpt-43'),
37:
dict(
name='kpt-37',
id=37,
color=[255, 255, 255],
type='',
swap='kpt-42'),
38:
dict(
name='kpt-38',
id=38,
color=[255, 255, 255],
type='',
swap='kpt-50'),
39:
dict(
name='kpt-39',
id=39,
color=[255, 255, 255],
type='',
swap='kpt-49'),
40:
dict(
name='kpt-40',
id=40,
color=[255, 255, 255],
type='',
swap='kpt-48'),
41:
dict(
name='kpt-41',
id=41,
color=[255, 255, 255],
type='',
swap='kpt-47'),
42:
dict(
name='kpt-42',
id=42,
color=[255, 255, 255],
type='',
swap='kpt-37'),
43:
dict(
name='kpt-43',
id=43,
color=[255, 255, 255],
type='',
swap='kpt-36'),
44:
dict(
name='kpt-44',
id=44,
color=[255, 255, 255],
type='',
swap='kpt-35'),
45:
dict(
name='kpt-45',
id=45,
color=[255, 255, 255],
type='',
swap='kpt-34'),
46:
dict(
name='kpt-46',
id=46,
color=[255, 255, 255],
type='',
swap='kpt-33'),
47:
dict(
name='kpt-47',
id=47,
color=[255, 255, 255],
type='',
swap='kpt-41'),
48:
dict(
name='kpt-48',
id=48,
color=[255, 255, 255],
type='',
swap='kpt-40'),
49:
dict(
name='kpt-49',
id=49,
color=[255, 255, 255],
type='',
swap='kpt-39'),
50:
dict(
name='kpt-50',
id=50,
color=[255, 255, 255],
type='',
swap='kpt-38'),
51:
dict(name='kpt-51', id=51, color=[255, 255, 255], type='', swap=''),
52:
dict(name='kpt-52', id=52, color=[255, 255, 255], type='', swap=''),
53:
dict(name='kpt-53', id=53, color=[255, 255, 255], type='', swap=''),
54:
dict(name='kpt-54', id=54, color=[255, 255, 255], type='', swap=''),
55:
dict(
name='kpt-55',
id=55,
color=[255, 255, 255],
type='',
swap='kpt-59'),
56:
dict(
name='kpt-56',
id=56,
color=[255, 255, 255],
type='',
swap='kpt-58'),
57:
dict(name='kpt-57', id=57, color=[255, 255, 255], type='', swap=''),
58:
dict(
name='kpt-58',
id=58,
color=[255, 255, 255],
type='',
swap='kpt-56'),
59:
dict(
name='kpt-59',
id=59,
color=[255, 255, 255],
type='',
swap='kpt-55'),
60:
dict(
name='kpt-60',
id=60,
color=[255, 255, 255],
type='',
swap='kpt-72'),
61:
dict(
name='kpt-61',
id=61,
color=[255, 255, 255],
type='',
swap='kpt-71'),
62:
dict(
name='kpt-62',
id=62,
color=[255, 255, 255],
type='',
swap='kpt-70'),
63:
dict(
name='kpt-63',
id=63,
color=[255, 255, 255],
type='',
swap='kpt-69'),
64:
dict(
name='kpt-64',
id=64,
color=[255, 255, 255],
type='',
swap='kpt-68'),
65:
dict(
name='kpt-65',
id=65,
color=[255, 255, 255],
type='',
swap='kpt-75'),
66:
dict(
name='kpt-66',
id=66,
color=[255, 255, 255],
type='',
swap='kpt-74'),
67:
dict(
name='kpt-67',
id=67,
color=[255, 255, 255],
type='',
swap='kpt-73'),
68:
dict(
name='kpt-68',
id=68,
color=[255, 255, 255],
type='',
swap='kpt-64'),
69:
dict(
name='kpt-69',
id=69,
color=[255, 255, 255],
type='',
swap='kpt-63'),
70:
dict(
name='kpt-70',
id=70,
color=[255, 255, 255],
type='',
swap='kpt-62'),
71:
dict(
name='kpt-71',
id=71,
color=[255, 255, 255],
type='',
swap='kpt-61'),
72:
dict(
name='kpt-72',
id=72,
color=[255, 255, 255],
type='',
swap='kpt-60'),
73:
dict(
name='kpt-73',
id=73,
color=[255, 255, 255],
type='',
swap='kpt-67'),
74:
dict(
name='kpt-74',
id=74,
color=[255, 255, 255],
type='',
swap='kpt-66'),
75:
dict(
name='kpt-75',
id=75,
color=[255, 255, 255],
type='',
swap='kpt-65'),
76:
dict(
name='kpt-76',
id=76,
color=[255, 255, 255],
type='',
swap='kpt-82'),
77:
dict(
name='kpt-77',
id=77,
color=[255, 255, 255],
type='',
swap='kpt-81'),
78:
dict(
name='kpt-78',
id=78,
color=[255, 255, 255],
type='',
swap='kpt-80'),
79:
dict(name='kpt-79', id=79, color=[255, 255, 255], type='', swap=''),
80:
dict(
name='kpt-80',
id=80,
color=[255, 255, 255],
type='',
swap='kpt-78'),
81:
dict(
name='kpt-81',
id=81,
color=[255, 255, 255],
type='',
swap='kpt-77'),
82:
dict(
name='kpt-82',
id=82,
color=[255, 255, 255],
type='',
swap='kpt-76'),
83:
dict(
name='kpt-83',
id=83,
color=[255, 255, 255],
type='',
swap='kpt-87'),
84:
dict(
name='kpt-84',
id=84,
color=[255, 255, 255],
type='',
swap='kpt-86'),
85:
dict(name='kpt-85', id=85, color=[255, 255, 255], type='', swap=''),
86:
dict(
name='kpt-86',
id=86,
color=[255, 255, 255],
type='',
swap='kpt-84'),
87:
dict(
name='kpt-87',
id=87,
color=[255, 255, 255],
type='',
swap='kpt-83'),
88:
dict(
name='kpt-88',
id=88,
color=[255, 255, 255],
type='',
swap='kpt-92'),
89:
dict(
name='kpt-89',
id=89,
color=[255, 255, 255],
type='',
swap='kpt-91'),
90:
dict(name='kpt-90', id=90, color=[255, 255, 255], type='', swap=''),
91:
dict(
name='kpt-91',
id=91,
color=[255, 255, 255],
type='',
swap='kpt-89'),
92:
dict(
name='kpt-92',
id=92,
color=[255, 255, 255],
type='',
swap='kpt-88'),
93:
dict(
name='kpt-93',
id=93,
color=[255, 255, 255],
type='',
swap='kpt-95'),
94:
dict(name='kpt-94', id=94, color=[255, 255, 255], type='', swap=''),
95:
dict(
name='kpt-95',
id=95,
color=[255, 255, 255],
type='',
swap='kpt-93'),
96:
dict(
name='kpt-96',
id=96,
color=[255, 255, 255],
type='',
swap='kpt-97'),
97:
dict(
name='kpt-97',
id=97,
color=[255, 255, 255],
type='',
swap='kpt-96')
},
skeleton_info={},
joint_weights=[1.] * 98,
sigmas=[])

64
configs/_base_/datasets/zebra.py

@ -0,0 +1,64 @@
dataset_info = dict(
dataset_name='zebra',
paper_info=dict(
author='Graving, Jacob M and Chae, Daniel and Naik, Hemal and '
'Li, Liang and Koger, Benjamin and Costelloe, Blair R and '
'Couzin, Iain D',
title='DeepPoseKit, a software toolkit for fast and robust '
'animal pose estimation using deep learning',
container='Elife',
year='2019',
homepage='https://github.com/jgraving/DeepPoseKit-Data',
),
keypoint_info={
0:
dict(name='snout', id=0, color=[255, 255, 255], type='', swap=''),
1:
dict(name='head', id=1, color=[255, 255, 255], type='', swap=''),
2:
dict(name='neck', id=2, color=[255, 255, 255], type='', swap=''),
3:
dict(
name='forelegL1',
id=3,
color=[255, 255, 255],
type='',
swap='forelegR1'),
4:
dict(
name='forelegR1',
id=4,
color=[255, 255, 255],
type='',
swap='forelegL1'),
5:
dict(
name='hindlegL1',
id=5,
color=[255, 255, 255],
type='',
swap='hindlegR1'),
6:
dict(
name='hindlegR1',
id=6,
color=[255, 255, 255],
type='',
swap='hindlegL1'),
7:
dict(name='tailbase', id=7, color=[255, 255, 255], type='', swap=''),
8:
dict(name='tailtip', id=8, color=[255, 255, 255], type='', swap='')
},
skeleton_info={
0: dict(link=('head', 'snout'), id=0, color=[255, 255, 255]),
1: dict(link=('neck', 'head'), id=1, color=[255, 255, 255]),
2: dict(link=('forelegL1', 'neck'), id=2, color=[255, 255, 255]),
3: dict(link=('forelegR1', 'neck'), id=3, color=[255, 255, 255]),
4: dict(link=('hindlegL1', 'tailbase'), id=4, color=[255, 255, 255]),
5: dict(link=('hindlegR1', 'tailbase'), id=5, color=[255, 255, 255]),
6: dict(link=('tailbase', 'neck'), id=6, color=[255, 255, 255]),
7: dict(link=('tailtip', 'tailbase'), id=7, color=[255, 255, 255])
},
joint_weights=[1.] * 9,
sigmas=[])

19
configs/_base_/default_runtime.py

@ -0,0 +1,19 @@
checkpoint_config = dict(interval=10)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
log_level = 'INFO'
load_from = None
resume_from = None
dist_params = dict(backend='nccl')
workflow = [('train', 1)]
# disable opencv multithreading to avoid system being overloaded
opencv_num_threads = 0
# set multi-process start method as `fork` to speed up the training
mp_start_method = 'fork'

0
configs/_base_/filters/gausian_filter.py

18
configs/animal/2d_kpt_sview_rgb_img/README.md

@ -0,0 +1,18 @@
# 2D Animal Keypoint Detection
2D animal keypoint detection (animal pose estimation) aims to detect the key-point of different species, including rats,
dogs, macaques, and cheetah. It provides detailed behavioral analysis for neuroscience, medical and ecology applications.
## Data preparation
Please follow [DATA Preparation](/docs/en/tasks/2d_animal_keypoint.md) to prepare data.
## Demo
Please follow [DEMO](/demo/docs/2d_animal_demo.md) to generate fancy demos.
<img src="https://user-images.githubusercontent.com/11788150/114201893-4446ec00-9989-11eb-808b-5718c47c7b23.gif" height="140px" alt><br>
<img src="https://user-images.githubusercontent.com/11788150/114205282-b5d46980-998c-11eb-9d6b-85ba47f81252.gif" height="140px" alt><br>
<img src="https://user-images.githubusercontent.com/11788150/114023530-944c8280-98a5-11eb-86b0-5f6d3e232af0.gif" height="140px" alt><br>

7
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/README.md

@ -0,0 +1,7 @@
# Top-down heatmap-based pose estimation
Top-down methods divide the task into two stages: object detection and pose estimation.
They perform object detection first, followed by single-object pose estimation given object bounding boxes.
Instead of estimating keypoint coordinates directly, the pose estimator will produce heatmaps which represent the
likelihood of being a keypoint.

40
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.md

@ -0,0 +1,40 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary>
```bibtex
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ICCV_2019/html/Cao_Cross-Domain_Adaptation_for_Animal_Pose_Estimation_ICCV_2019_paper.html">Animal-Pose (ICCV'2019)</a></summary>
```bibtex
@InProceedings{Cao_2019_ICCV,
author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
title = {Cross-Domain Adaptation for Animal Pose Estimation},
booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
month = {October},
year = {2019}
}
```
</details>
Results on AnimalPose validation set (1117 instances)
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py) | 256x256 | 0.736 | 0.959 | 0.832 | 0.775 | 0.966 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256_20210426.log.json) |
| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py) | 256x256 | 0.737 | 0.959 | 0.823 | 0.778 | 0.962 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256_20210426.log.json) |

40
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_animalpose.yml

@ -0,0 +1,40 @@
Collections:
- Name: HRNet
Paper:
Title: Deep high-resolution representation learning for human pose estimation
URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py
In Collection: HRNet
Metadata:
Architecture: &id001
- HRNet
Training Data: Animal-Pose
Name: topdown_heatmap_hrnet_w32_animalpose_256x256
Results:
- Dataset: Animal-Pose
Metrics:
AP: 0.736
AP@0.5: 0.959
AP@0.75: 0.832
AR: 0.775
AR@0.5: 0.966
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_animalpose_256x256-1aa7f075_20210426.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: Animal-Pose
Name: topdown_heatmap_hrnet_w48_animalpose_256x256
Results:
- Dataset: Animal-Pose
Metrics:
AP: 0.737
AP@0.5: 0.959
AP@0.75: 0.823
AR: 0.778
AR@0.5: 0.962
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_animalpose_256x256-34644726_20210426.pth

172
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w32_animalpose_256x256.py

@ -0,0 +1,172 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/animalpose.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=20,
dataset_joints=20,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/animalpose'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_train.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

172
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/hrnet_w48_animalpose_256x256.py

@ -0,0 +1,172 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/animalpose.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=20,
dataset_joints=20,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(48, 96)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(48, 96, 192)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(48, 96, 192, 384))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=48,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/animalpose'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_train.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

141
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py

@ -0,0 +1,141 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/animalpose.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=20,
dataset_joints=20,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/animalpose'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_train.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

141
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py

@ -0,0 +1,141 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/animalpose.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=20,
dataset_joints=20,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet152',
backbone=dict(type='ResNet', depth=152),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/animalpose'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_train.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

141
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py

@ -0,0 +1,141 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/animalpose.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=20,
dataset_joints=20,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/animalpose'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_train.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalPoseDataset',
ann_file=f'{data_root}/annotations/animalpose_val.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

41
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.md

@ -0,0 +1,41 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary>
```bibtex
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ICCV_2019/html/Cao_Cross-Domain_Adaptation_for_Animal_Pose_Estimation_ICCV_2019_paper.html">Animal-Pose (ICCV'2019)</a></summary>
```bibtex
@InProceedings{Cao_2019_ICCV,
author = {Cao, Jinkun and Tang, Hongyang and Fang, Hao-Shu and Shen, Xiaoyong and Lu, Cewu and Tai, Yu-Wing},
title = {Cross-Domain Adaptation for Animal Pose Estimation},
booktitle = {The IEEE International Conference on Computer Vision (ICCV)},
month = {October},
year = {2019}
}
```
</details>
Results on AnimalPose validation set (1117 instances)
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py) | 256x256 | 0.688 | 0.945 | 0.772 | 0.733 | 0.952 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256_20210426.log.json) |
| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py) | 256x256 | 0.696 | 0.948 | 0.785 | 0.737 | 0.954 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256_20210426.log.json) |
| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py) | 256x256 | 0.709 | 0.948 | 0.797 | 0.749 | 0.951 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256_20210426.log.json) |

56
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/resnet_animalpose.yml

@ -0,0 +1,56 @@
Collections:
- Name: SimpleBaseline2D
Paper:
Title: Simple baselines for human pose estimation and tracking
URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res50_animalpose_256x256.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: &id001
- SimpleBaseline2D
Training Data: Animal-Pose
Name: topdown_heatmap_res50_animalpose_256x256
Results:
- Dataset: Animal-Pose
Metrics:
AP: 0.688
AP@0.5: 0.945
AP@0.75: 0.772
AR: 0.733
AR@0.5: 0.952
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_animalpose_256x256-e1f30bff_20210426.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res101_animalpose_256x256.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Animal-Pose
Name: topdown_heatmap_res101_animalpose_256x256
Results:
- Dataset: Animal-Pose
Metrics:
AP: 0.696
AP@0.5: 0.948
AP@0.75: 0.785
AR: 0.737
AR@0.5: 0.954
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_animalpose_256x256-85563f4a_20210426.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/animalpose/res152_animalpose_256x256.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Animal-Pose
Name: topdown_heatmap_res152_animalpose_256x256
Results:
- Dataset: Animal-Pose
Metrics:
AP: 0.709
AP@0.5: 0.948
AP@0.75: 0.797
AR: 0.749
AR@0.5: 0.951
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_animalpose_256x256-a0a7506c_20210426.pth

41
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.md

@ -0,0 +1,41 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary>
```bibtex
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/2108.12617">AP-10K (NeurIPS'2021)</a></summary>
```bibtex
@misc{yu2021ap10k,
title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
year={2021},
eprint={2108.12617},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
</details>
Results on AP-10K validation set
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP<sup>M</sup> | AP<sup>L</sup> | ckpt | log |
| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py) | 256x256 | 0.738 | 0.958 | 0.808 | 0.592 | 0.743 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.log.json) |
| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py) | 256x256 | 0.744 | 0.959 | 0.807 | 0.589 | 0.748 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.log.json) |

40
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_ap10k.yml

@ -0,0 +1,40 @@
Collections:
- Name: HRNet
Paper:
Title: Deep high-resolution representation learning for human pose estimation
URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py
In Collection: HRNet
Metadata:
Architecture: &id001
- HRNet
Training Data: AP-10K
Name: topdown_heatmap_hrnet_w32_ap10k_256x256
Results:
- Dataset: AP-10K
Metrics:
AP: 0.738
AP@0.5: 0.958
AP@0.75: 0.808
APL: 0.743
APM: 0.592
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_ap10k_256x256-18aac840_20211029.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: AP-10K
Name: topdown_heatmap_hrnet_w48_ap10k_256x256
Results:
- Dataset: AP-10K
Metrics:
AP: 0.744
AP@0.5: 0.959
AP@0.75: 0.807
APL: 0.748
APM: 0.589
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_ap10k_256x256-d95ab412_20211029.pth

172
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w32_ap10k_256x256.py

@ -0,0 +1,172 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/ap10k.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/ap10k'
data = dict(
samples_per_gpu=64,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
)

172
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/hrnet_w48_ap10k_256x256.py

@ -0,0 +1,172 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/ap10k.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(48, 96)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(48, 96, 192)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(48, 96, 192, 384))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=48,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/ap10k'
data = dict(
samples_per_gpu=64,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
)

141
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py

@ -0,0 +1,141 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/ap10k.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/ap10k'
data = dict(
samples_per_gpu=64,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
)

141
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py

@ -0,0 +1,141 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/ap10k.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=17,
dataset_joints=17,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/ap10k'
data = dict(
samples_per_gpu=64,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-train-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-val-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalAP10KDataset',
ann_file=f'{data_root}/annotations/ap10k-test-split1.json',
img_prefix=f'{data_root}/data/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
)

41
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.md

@ -0,0 +1,41 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary>
```bibtex
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/2108.12617">AP-10K (NeurIPS'2021)</a></summary>
```bibtex
@misc{yu2021ap10k,
title={AP-10K: A Benchmark for Animal Pose Estimation in the Wild},
author={Hang Yu and Yufei Xu and Jing Zhang and Wei Zhao and Ziyu Guan and Dacheng Tao},
year={2021},
eprint={2108.12617},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
</details>
Results on AP-10K validation set
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AP<sup>M</sup> | AP<sup>L</sup> | ckpt | log |
| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py) | 256x256 | 0.699 | 0.940 | 0.760 | 0.570 | 0.703 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.log.json) |
| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py) | 256x256 | 0.698 | 0.943 | 0.754 | 0.543 | 0.702 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.log.json) |

40
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/resnet_ap10k.yml

@ -0,0 +1,40 @@
Collections:
- Name: SimpleBaseline2D
Paper:
Title: Simple baselines for human pose estimation and tracking
URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res50_ap10k_256x256.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: &id001
- SimpleBaseline2D
Training Data: AP-10K
Name: topdown_heatmap_res50_ap10k_256x256
Results:
- Dataset: AP-10K
Metrics:
AP: 0.699
AP@0.5: 0.94
AP@0.75: 0.76
APL: 0.703
APM: 0.57
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_ap10k_256x256-35760eb8_20211029.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/ap10k/res101_ap10k_256x256.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: AP-10K
Name: topdown_heatmap_res101_ap10k_256x256
Results:
- Dataset: AP-10K
Metrics:
AP: 0.698
AP@0.5: 0.943
AP@0.75: 0.754
APL: 0.702
APM: 0.543
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_ap10k_256x256-9edfafb9_20211029.pth

40
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.md

@ -0,0 +1,40 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary>
```bibtex
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1906.05586">ATRW (ACM MM'2020)</a></summary>
```bibtex
@inproceedings{li2020atrw,
title={ATRW: A Benchmark for Amur Tiger Re-identification in the Wild},
author={Li, Shuyuan and Li, Jianguo and Tang, Hanlin and Qian, Rui and Lin, Weiyao},
booktitle={Proceedings of the 28th ACM International Conference on Multimedia},
pages={2590--2598},
year={2020}
}
```
</details>
Results on ATRW validation set
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py) | 256x256 | 0.912 | 0.973 | 0.959 | 0.938 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256-f027f09a_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256_20210414.log.json) |
| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py) | 256x256 | 0.911 | 0.972 | 0.946 | 0.937 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256-ac088892_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256_20210414.log.json) |

40
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_atrw.yml

@ -0,0 +1,40 @@
Collections:
- Name: HRNet
Paper:
Title: Deep high-resolution representation learning for human pose estimation
URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py
In Collection: HRNet
Metadata:
Architecture: &id001
- HRNet
Training Data: ATRW
Name: topdown_heatmap_hrnet_w32_atrw_256x256
Results:
- Dataset: ATRW
Metrics:
AP: 0.912
AP@0.5: 0.973
AP@0.75: 0.959
AR: 0.938
AR@0.5: 0.985
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_atrw_256x256-f027f09a_20210414.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: ATRW
Name: topdown_heatmap_hrnet_w48_atrw_256x256
Results:
- Dataset: ATRW
Metrics:
AP: 0.911
AP@0.5: 0.972
AP@0.75: 0.946
AR: 0.937
AR@0.5: 0.985
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_atrw_256x256-ac088892_20210414.pth

170
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w32_atrw_256x256.py

@ -0,0 +1,170 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/atrw.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=15,
dataset_joints=15,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/atrw'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_train.json',
img_prefix=f'{data_root}/images/train/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

170
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/hrnet_w48_atrw_256x256.py

@ -0,0 +1,170 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/atrw.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=15,
dataset_joints=15,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(48, 96)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(48, 96, 192)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(48, 96, 192, 384))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=48,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/atrw'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_train.json',
img_prefix=f'{data_root}/images/train/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

139
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py

@ -0,0 +1,139 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/atrw.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=15,
dataset_joints=15,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/atrw'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_train.json',
img_prefix=f'{data_root}/images/train/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

139
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py

@ -0,0 +1,139 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/atrw.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=15,
dataset_joints=15,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet152',
backbone=dict(type='ResNet', depth=152),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/atrw'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_train.json',
img_prefix=f'{data_root}/images/train/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

139
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py

@ -0,0 +1,139 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/atrw.py'
]
evaluation = dict(interval=10, metric='mAP', save_best='AP')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=15,
dataset_joints=15,
dataset_channel=[
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14],
],
inference_channel=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'],
soft_nms=False,
nms_thr=1.0,
oks_thr=0.9,
vis_thr=0.2,
use_gt_bbox=True,
det_bbox_thr=0.0,
bbox_file='',
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownHalfBodyTransform',
num_joints_half_body=8,
prob_half_body=0.3),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/atrw'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_train.json',
img_prefix=f'{data_root}/images/train/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalATRWDataset',
ann_file=f'{data_root}/annotations/keypoint_val.json',
img_prefix=f'{data_root}/images/val/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

41
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.md

@ -0,0 +1,41 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary>
```bibtex
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://arxiv.org/abs/1906.05586">ATRW (ACM MM'2020)</a></summary>
```bibtex
@inproceedings{li2020atrw,
title={ATRW: A Benchmark for Amur Tiger Re-identification in the Wild},
author={Li, Shuyuan and Li, Jianguo and Tang, Hanlin and Qian, Rui and Lin, Weiyao},
booktitle={Proceedings of the 28th ACM International Conference on Multimedia},
pages={2590--2598},
year={2020}
}
```
</details>
Results on ATRW validation set
| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py) | 256x256 | 0.900 | 0.973 | 0.932 | 0.929 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_atrw_256x256-546c4594_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_atrw_256x256_20210414.log.json) |
| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py) | 256x256 | 0.898 | 0.973 | 0.936 | 0.927 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_atrw_256x256-da93f371_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_atrw_256x256_20210414.log.json) |
| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py) | 256x256 | 0.896 | 0.973 | 0.931 | 0.927 | 0.985 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_atrw_256x256-2bb8e162_20210414.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_atrw_256x256_20210414.log.json) |

56
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/resnet_atrw.yml

@ -0,0 +1,56 @@
Collections:
- Name: SimpleBaseline2D
Paper:
Title: Simple baselines for human pose estimation and tracking
URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res50_atrw_256x256.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: &id001
- SimpleBaseline2D
Training Data: ATRW
Name: topdown_heatmap_res50_atrw_256x256
Results:
- Dataset: ATRW
Metrics:
AP: 0.9
AP@0.5: 0.973
AP@0.75: 0.932
AR: 0.929
AR@0.5: 0.985
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_atrw_256x256-546c4594_20210414.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res101_atrw_256x256.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: ATRW
Name: topdown_heatmap_res101_atrw_256x256
Results:
- Dataset: ATRW
Metrics:
AP: 0.898
AP@0.5: 0.973
AP@0.75: 0.936
AR: 0.927
AR@0.5: 0.985
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_atrw_256x256-da93f371_20210414.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/atrw/res152_atrw_256x256.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: ATRW
Name: topdown_heatmap_res152_atrw_256x256
Results:
- Dataset: ATRW
Metrics:
AP: 0.896
AP@0.5: 0.973
AP@0.75: 0.931
AR: 0.927
AR@0.5: 0.985
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_atrw_256x256-2bb8e162_20210414.pth

130
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py

@ -0,0 +1,130 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/fly.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=32,
dataset_joints=32,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[192, 192],
heatmap_size=[48, 48],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/fly'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

130
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py

@ -0,0 +1,130 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/fly.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=32,
dataset_joints=32,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet152',
backbone=dict(type='ResNet', depth=152),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[192, 192],
heatmap_size=[48, 48],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/fly'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

130
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py

@ -0,0 +1,130 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/fly.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=32,
dataset_joints=32,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[192, 192],
heatmap_size=[48, 48],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/fly'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalFlyDataset',
ann_file=f'{data_root}/annotations/fly_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

44
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.md

@ -0,0 +1,44 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary>
```bibtex
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://www.nature.com/articles/s41592-018-0234-5">Vinegar Fly (Nature Methods'2019)</a></summary>
```bibtex
@article{pereira2019fast,
title={Fast animal pose estimation using deep neural networks},
author={Pereira, Talmo D and Aldarondo, Diego E and Willmore, Lindsay and Kislin, Mikhail and Wang, Samuel S-H and Murthy, Mala and Shaevitz, Joshua W},
journal={Nature methods},
volume={16},
number={1},
pages={117--125},
year={2019},
publisher={Nature Publishing Group}
}
```
</details>
Results on Vinegar Fly test set
| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
| :-------- | :--------: | :------: | :------: | :------: |:------: |:------: |
|[pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py) | 192x192 | 0.996 | 0.910 | 2.00 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_fly_192x192-5d0ee2d9_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_fly_192x192_20210407.log.json) |
|[pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py) | 192x192 | 0.996 | 0.912 | 1.95 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_fly_192x192-41a7a6cc_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_fly_192x192_20210407.log.json) |
|[pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py) | 192x192 | 0.997 | 0.917 | 1.78 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_fly_192x192-fcafbd5a_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_fly_192x192_20210407.log.json) |

50
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/resnet_fly.yml

@ -0,0 +1,50 @@
Collections:
- Name: SimpleBaseline2D
Paper:
Title: Simple baselines for human pose estimation and tracking
URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res50_fly_192x192.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: &id001
- SimpleBaseline2D
Training Data: Vinegar Fly
Name: topdown_heatmap_res50_fly_192x192
Results:
- Dataset: Vinegar Fly
Metrics:
AUC: 0.91
EPE: 2.0
PCK@0.2: 0.996
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_fly_192x192-5d0ee2d9_20210407.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res101_fly_192x192.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Vinegar Fly
Name: topdown_heatmap_res101_fly_192x192
Results:
- Dataset: Vinegar Fly
Metrics:
AUC: 0.912
EPE: 1.95
PCK@0.2: 0.996
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_fly_192x192-41a7a6cc_20210407.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/fly/res152_fly_192x192.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Vinegar Fly
Name: topdown_heatmap_res152_fly_192x192
Results:
- Dataset: Vinegar Fly
Metrics:
AUC: 0.917
EPE: 1.78
PCK@0.2: 0.997
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_fly_192x192-fcafbd5a_20210407.pth

44
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.md

@ -0,0 +1,44 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html">HRNet (CVPR'2019)</a></summary>
```bibtex
@inproceedings{sun2019deep,
title={Deep high-resolution representation learning for human pose estimation},
author={Sun, Ke and Xiao, Bin and Liu, Dong and Wang, Jingdong},
booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
pages={5693--5703},
year={2019}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://openaccess.thecvf.com/content/WACV2021/html/Mathis_Pretraining_Boosts_Out-of-Domain_Robustness_for_Pose_Estimation_WACV_2021_paper.html">Horse-10 (WACV'2021)</a></summary>
```bibtex
@inproceedings{mathis2021pretraining,
title={Pretraining boosts out-of-domain robustness for pose estimation},
author={Mathis, Alexander and Biasi, Thomas and Schneider, Steffen and Yuksekgonul, Mert and Rogers, Byron and Bethge, Matthias and Mathis, Mackenzie W},
booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
pages={1859--1868},
year={2021}
}
```
</details>
Results on Horse-10 test set
|Set | Arch | Input Size | PCK@0.3 | NME | ckpt | log |
| :--- | :---: | :--------: | :------: | :------: |:------: |:------: |
|split1| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py) | 256x256 | 0.951 | 0.122 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split1-401d901a_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split1_20210405.log.json) |
|split2| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py) | 256x256 | 0.949 | 0.116 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split2-04840523_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split2_20210405.log.json) |
|split3| [pose_hrnet_w32](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py) | 256x256 | 0.939 | 0.153 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split3-4db47400_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split3_20210405.log.json) |
|split1| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py) | 256x256 | 0.973 | 0.095 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split1-3c950d3b_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split1_20210405.log.json) |
|split2| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py) | 256x256 | 0.969 | 0.101 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split2-8ef72b5d_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split2_20210405.log.json) |
|split3| [pose_hrnet_w48](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py) | 256x256 | 0.961 | 0.128 | [ckpt](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split3-0232ec47_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split3_20210405.log.json) |

86
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_horse10.yml

@ -0,0 +1,86 @@
Collections:
- Name: HRNet
Paper:
Title: Deep high-resolution representation learning for human pose estimation
URL: http://openaccess.thecvf.com/content_CVPR_2019/html/Sun_Deep_High-Resolution_Representation_Learning_for_Human_Pose_Estimation_CVPR_2019_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/backbones/hrnet.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py
In Collection: HRNet
Metadata:
Architecture: &id001
- HRNet
Training Data: Horse-10
Name: topdown_heatmap_hrnet_w32_horse10_256x256-split1
Results:
- Dataset: Horse-10
Metrics:
NME: 0.122
PCK@0.3: 0.951
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split1-401d901a_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_hrnet_w32_horse10_256x256-split2
Results:
- Dataset: Horse-10
Metrics:
NME: 0.116
PCK@0.3: 0.949
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split2-04840523_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_hrnet_w32_horse10_256x256-split3
Results:
- Dataset: Horse-10
Metrics:
NME: 0.153
PCK@0.3: 0.939
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w32_horse10_256x256_split3-4db47400_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_hrnet_w48_horse10_256x256-split1
Results:
- Dataset: Horse-10
Metrics:
NME: 0.095
PCK@0.3: 0.973
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split1-3c950d3b_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_hrnet_w48_horse10_256x256-split2
Results:
- Dataset: Horse-10
Metrics:
NME: 0.101
PCK@0.3: 0.969
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split2-8ef72b5d_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py
In Collection: HRNet
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_hrnet_w48_horse10_256x256-split3
Results:
- Dataset: Horse-10
Metrics:
NME: 0.128
PCK@0.3: 0.961
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/hrnet/hrnet_w48_horse10_256x256_split3-0232ec47_20210405.pth

164
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split1.py

@ -0,0 +1,164 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

164
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split2.py

@ -0,0 +1,164 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

164
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w32_horse10_256x256-split3.py

@ -0,0 +1,164 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=32,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

164
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split1.py

@ -0,0 +1,164 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(48, 96)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(48, 96, 192)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(48, 96, 192, 384))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=48,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

164
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split2.py

@ -0,0 +1,164 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(48, 96)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(48, 96, 192)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(48, 96, 192, 384))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=48,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

164
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/hrnet_w48_horse10_256x256-split3.py

@ -0,0 +1,164 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth',
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(48, 96)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(48, 96, 192)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(48, 96, 192, 384))),
),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=48,
out_channels=channel_cfg['num_output_channels'],
num_deconv_layers=0,
extra=dict(final_conv_kernel=1, ),
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet152',
backbone=dict(type='ResNet', depth=152),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet152',
backbone=dict(type='ResNet', depth=152),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet152',
backbone=dict(type='ResNet', depth=152),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split1.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split2.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

133
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py

@ -0,0 +1,133 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/horse10.py'
]
evaluation = dict(interval=10, metric='PCK', save_best='PCK')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=22,
dataset_joints=22,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 21
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
21
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[256, 256],
heatmap_size=[64, 64],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=40, scale_factor=0.5),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'bbox_score', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=[
'image_file', 'center', 'scale', 'rotation', 'bbox_score',
'flip_pairs'
]),
]
test_pipeline = val_pipeline
data_root = 'data/horse10'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-train-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalHorse10Dataset',
ann_file=f'{data_root}/annotations/horse10-test-split3.json',
img_prefix=f'{data_root}/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

47
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.md

@ -0,0 +1,47 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary>
```bibtex
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://openaccess.thecvf.com/content/WACV2021/html/Mathis_Pretraining_Boosts_Out-of-Domain_Robustness_for_Pose_Estimation_WACV_2021_paper.html">Horse-10 (WACV'2021)</a></summary>
```bibtex
@inproceedings{mathis2021pretraining,
title={Pretraining boosts out-of-domain robustness for pose estimation},
author={Mathis, Alexander and Biasi, Thomas and Schneider, Steffen and Yuksekgonul, Mert and Rogers, Byron and Bethge, Matthias and Mathis, Mackenzie W},
booktitle={Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision},
pages={1859--1868},
year={2021}
}
```
</details>
Results on Horse-10 test set
|Set | Arch | Input Size | PCK@0.3 | NME | ckpt | log |
| :--- | :---: | :--------: | :------: | :------: |:------: |:------: |
|split1| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py) | 256x256 | 0.956 | 0.113 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split1-3a3dc37e_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split1_20210405.log.json) |
|split2| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py) | 256x256 | 0.954 | 0.111 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split2-65e2a508_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split2_20210405.log.json) |
|split3| [pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py) | 256x256 | 0.946 | 0.129 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split3-9637d4eb_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split3_20210405.log.json) |
|split1| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py) | 256x256 | 0.958 | 0.115 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split1-1b7c259c_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split1_20210405.log.json) |
|split2| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py) | 256x256 | 0.955 | 0.115 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split2-30e2fa87_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split2_20210405.log.json) |
|split3| [pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py) | 256x256 | 0.946 | 0.126 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split3-2eea5bb1_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split3_20210405.log.json) |
|split1| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py) | 256x256 | 0.969 | 0.105 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split1-7e81fe2d_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split1_20210405.log.json) |
|split2| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py) | 256x256 | 0.970 | 0.103 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split2-3b3404a3_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split2_20210405.log.json) |
|split3| [pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py) | 256x256 | 0.957 | 0.131 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split3-c957dac5_20210405.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split3_20210405.log.json) |

125
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/resnet_horse10.yml

@ -0,0 +1,125 @@
Collections:
- Name: SimpleBaseline2D
Paper:
Title: Simple baselines for human pose estimation and tracking
URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split1.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: &id001
- SimpleBaseline2D
Training Data: Horse-10
Name: topdown_heatmap_res50_horse10_256x256-split1
Results:
- Dataset: Horse-10
Metrics:
NME: 0.113
PCK@0.3: 0.956
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split1-3a3dc37e_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split2.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_res50_horse10_256x256-split2
Results:
- Dataset: Horse-10
Metrics:
NME: 0.111
PCK@0.3: 0.954
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split2-65e2a508_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res50_horse10_256x256-split3.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_res50_horse10_256x256-split3
Results:
- Dataset: Horse-10
Metrics:
NME: 0.129
PCK@0.3: 0.946
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_horse10_256x256_split3-9637d4eb_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split1.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_res101_horse10_256x256-split1
Results:
- Dataset: Horse-10
Metrics:
NME: 0.115
PCK@0.3: 0.958
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split1-1b7c259c_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split2.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_res101_horse10_256x256-split2
Results:
- Dataset: Horse-10
Metrics:
NME: 0.115
PCK@0.3: 0.955
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split2-30e2fa87_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res101_horse10_256x256-split3.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_res101_horse10_256x256-split3
Results:
- Dataset: Horse-10
Metrics:
NME: 0.126
PCK@0.3: 0.946
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_horse10_256x256_split3-2eea5bb1_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split1.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_res152_horse10_256x256-split1
Results:
- Dataset: Horse-10
Metrics:
NME: 0.105
PCK@0.3: 0.969
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split1-7e81fe2d_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split2.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_res152_horse10_256x256-split2
Results:
- Dataset: Horse-10
Metrics:
NME: 0.103
PCK@0.3: 0.97
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split2-3b3404a3_20210405.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/horse10/res152_horse10_256x256-split3.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Horse-10
Name: topdown_heatmap_res152_horse10_256x256-split3
Results:
- Dataset: Horse-10
Metrics:
NME: 0.131
PCK@0.3: 0.957
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_horse10_256x256_split3-c957dac5_20210405.pth

130
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py

@ -0,0 +1,130 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/locust.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=35,
dataset_joints=35,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet101',
backbone=dict(type='ResNet', depth=101),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[160, 160],
heatmap_size=[40, 40],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/locust'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

130
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py

@ -0,0 +1,130 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/locust.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=35,
dataset_joints=35,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet152',
backbone=dict(type='ResNet', depth=152),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[160, 160],
heatmap_size=[40, 40],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/locust'
data = dict(
samples_per_gpu=32,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=test_pipeline,
dataset_info={{_base_.dataset_info}}),
)

130
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py

@ -0,0 +1,130 @@
_base_ = [
'../../../../_base_/default_runtime.py',
'../../../../_base_/datasets/locust.py'
]
evaluation = dict(interval=10, metric=['PCK', 'AUC', 'EPE'], save_best='AUC')
optimizer = dict(
type='Adam',
lr=5e-4,
)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[170, 200])
total_epochs = 210
log_config = dict(
interval=1,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
channel_cfg = dict(
num_output_channels=35,
dataset_joints=35,
dataset_channel=[
[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
],
],
inference_channel=[
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34
])
# model settings
model = dict(
type='TopDown',
pretrained='torchvision://resnet50',
backbone=dict(type='ResNet', depth=50),
keypoint_head=dict(
type='TopdownHeatmapSimpleHead',
in_channels=2048,
out_channels=channel_cfg['num_output_channels'],
loss_keypoint=dict(type='JointsMSELoss', use_target_weight=True)),
train_cfg=dict(),
test_cfg=dict(
flip_test=True,
post_process='default',
shift_heatmap=True,
modulate_kernel=11))
data_cfg = dict(
image_size=[160, 160],
heatmap_size=[40, 40],
num_output_channels=channel_cfg['num_output_channels'],
num_joints=channel_cfg['dataset_joints'],
dataset_channel=channel_cfg['dataset_channel'],
inference_channel=channel_cfg['inference_channel'])
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownRandomFlip', flip_prob=0.5),
dict(
type='TopDownGetRandomScaleRotation', rot_factor=90, scale_factor=0.3),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(type='TopDownGenerateTarget', sigma=2),
dict(
type='Collect',
keys=['img', 'target', 'target_weight'],
meta_keys=[
'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
'rotation', 'flip_pairs'
]),
]
val_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='TopDownAffine'),
dict(type='ToTensor'),
dict(
type='NormalizeTensor',
mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
dict(
type='Collect',
keys=['img'],
meta_keys=['image_file', 'center', 'scale', 'rotation', 'flip_pairs']),
]
test_pipeline = val_pipeline
data_root = 'data/locust'
data = dict(
samples_per_gpu=64,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=32),
test_dataloader=dict(samples_per_gpu=32),
train=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_train.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=train_pipeline,
dataset_info={{_base_.dataset_info}}),
val=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
test=dict(
type='AnimalLocustDataset',
ann_file=f'{data_root}/annotations/locust_test.json',
img_prefix=f'{data_root}/images/',
data_cfg=data_cfg,
pipeline=val_pipeline,
dataset_info={{_base_.dataset_info}}),
)

43
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.md

@ -0,0 +1,43 @@
<!-- [ALGORITHM] -->
<details>
<summary align="right"><a href="http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html">SimpleBaseline2D (ECCV'2018)</a></summary>
```bibtex
@inproceedings{xiao2018simple,
title={Simple baselines for human pose estimation and tracking},
author={Xiao, Bin and Wu, Haiping and Wei, Yichen},
booktitle={Proceedings of the European conference on computer vision (ECCV)},
pages={466--481},
year={2018}
}
```
</details>
<!-- [DATASET] -->
<details>
<summary align="right"><a href="https://elifesciences.org/articles/47994">Desert Locust (Elife'2019)</a></summary>
```bibtex
@article{graving2019deepposekit,
title={DeepPoseKit, a software toolkit for fast and robust animal pose estimation using deep learning},
author={Graving, Jacob M and Chae, Daniel and Naik, Hemal and Li, Liang and Koger, Benjamin and Costelloe, Blair R and Couzin, Iain D},
journal={Elife},
volume={8},
pages={e47994},
year={2019},
publisher={eLife Sciences Publications Limited}
}
```
</details>
Results on Desert Locust test set
| Arch | Input Size | PCK@0.2 | AUC | EPE | ckpt | log |
| :-------- | :--------: | :------: | :------: | :------: |:------: |:------: |
|[pose_resnet_50](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py) | 160x160 | 0.999 | 0.899 | 2.27 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160-9efca22b_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160_20210407.log.json) |
|[pose_resnet_101](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py) | 160x160 | 0.999 | 0.907 | 2.03 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160-d77986b3_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160_20210407.log.json) |
|[pose_resnet_152](/configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py) | 160x160 | 1.000 | 0.926 | 1.48 | [ckpt](https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160-4ea9b372_20210407.pth) | [log](https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160_20210407.log.json) |

50
configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/resnet_locust.yml

@ -0,0 +1,50 @@
Collections:
- Name: SimpleBaseline2D
Paper:
Title: Simple baselines for human pose estimation and tracking
URL: http://openaccess.thecvf.com/content_ECCV_2018/html/Bin_Xiao_Simple_Baselines_for_ECCV_2018_paper.html
README: https://github.com/open-mmlab/mmpose/blob/master/docs/en/papers/algorithms/simplebaseline2d.md
Models:
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res50_locust_160x160.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: &id001
- SimpleBaseline2D
Training Data: Desert Locust
Name: topdown_heatmap_res50_locust_160x160
Results:
- Dataset: Desert Locust
Metrics:
AUC: 0.899
EPE: 2.27
PCK@0.2: 0.999
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res50_locust_160x160-9efca22b_20210407.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res101_locust_160x160.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Desert Locust
Name: topdown_heatmap_res101_locust_160x160
Results:
- Dataset: Desert Locust
Metrics:
AUC: 0.907
EPE: 2.03
PCK@0.2: 0.999
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res101_locust_160x160-d77986b3_20210407.pth
- Config: configs/animal/2d_kpt_sview_rgb_img/topdown_heatmap/locust/res152_locust_160x160.py
In Collection: SimpleBaseline2D
Metadata:
Architecture: *id001
Training Data: Desert Locust
Name: topdown_heatmap_res152_locust_160x160
Results:
- Dataset: Desert Locust
Metrics:
AUC: 0.926
EPE: 1.48
PCK@0.2: 1.0
Task: Animal 2D Keypoint
Weights: https://download.openmmlab.com/mmpose/animal/resnet/res152_locust_160x160-4ea9b372_20210407.pth

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save