ViTPose/tools/webcam/webcam_apis/nodes/xdwendwen_node.py


								# Copyright (c) OpenMMLab. All rights reserved.

								import json

								from dataclasses import dataclass

								from typing import List, Tuple, Union


								import cv2

								import numpy as np


								from mmpose.datasets.dataset_info import DatasetInfo

								from ..utils import load_image_from_disk_or_url

								from .builder import NODES

								from .frame_drawing_node import FrameDrawingNode


								@dataclass

								class DynamicInfo:

								    pos_curr: Tuple[int, int] = (0, 0)

								    pos_step: Tuple[int, int] = (0, 0)

								    step_curr: int = 0


								@NODES.register_module()

								class XDwenDwenNode(FrameDrawingNode):

								    """An effect drawing node that captures the face of a cat or dog and blend

								    it into a Bing-Dwen-Dwen (the mascot of 2022 Beijing Winter Olympics).


								    Parameters:

								        name (str, optional): The node name (also thread name).

								        frame_buffer (str): The name of the input buffer.

								        output_buffer (str | list): The name(s) of the output buffer(s).

								        mode_key (str | int): A hot key to switch the background image.

								        resource_file (str): The annotation file of resource images, which

								            should be in Labelbee format and contain both facial keypoint and

								            region annotations.

								        out_shape (tuple): The shape of output frame in (width, height).

								    """


								    dynamic_scale = 0.15

								    dynamic_max_step = 15


								    def __init__(

								        self,

								        name: str,

								        frame_buffer: str,

								        output_buffer: Union[str, List[str]],

								        mode_key: Union[str, int],

								        resource_file: str,

								        out_shape: Tuple[int, int] = (480, 480),

								        rigid_transform: bool = True,

								    ):

								        super().__init__(name, frame_buffer, output_buffer, enable=True)


								        self.mode_key = mode_key

								        self.mode_index = 0

								        self.out_shape = out_shape

								        self.rigid = rigid_transform


								        self.latest_pred = None


								        self.dynamic_info = DynamicInfo()


								        self.register_event(

								            self.mode_key, is_keyboard=True, handler_func=self.switch_mode)


								        self._init_resource(resource_file)


								    def _init_resource(self, resource_file):


								        # The resource_file is a JSON file that contains the facial

								        # keypoint and mask annotation information of the resource files.

								        # The annotations should follow the label-bee standard format.

								        # See https://github.com/open-mmlab/labelbee-client for details.

								        with open(resource_file) as f:

								            anns = json.load(f)

								        resource_infos = []


								        for ann in anns:

								            # Load image

								            img = load_image_from_disk_or_url(ann['url'])

								            # Load result

								            rst = json.loads(ann['result'])


								            # Check facial keypoint information

								            assert rst['step_1']['toolName'] == 'pointTool'

								            assert len(rst['step_1']['result']) == 3


								            keypoints = sorted(

								                rst['step_1']['result'], key=lambda x: x['order'])

								            keypoints = np.array([[pt['x'], pt['y']] for pt in keypoints])


								            # Check facial mask

								            assert rst['step_2']['toolName'] == 'polygonTool'

								            assert len(rst['step_2']['result']) == 1

								            assert len(rst['step_2']['result'][0]['pointList']) > 2


								            mask_pts = np.array(

								                [[pt['x'], pt['y']]

								                 for pt in rst['step_2']['result'][0]['pointList']])


								            mul = 1.0 + self.dynamic_scale


								            w_scale = self.out_shape[0] / img.shape[1] * mul

								            h_scale = self.out_shape[1] / img.shape[0] * mul


								            img = cv2.resize(

								                img,

								                dsize=None,

								                fx=w_scale,

								                fy=h_scale,

								                interpolation=cv2.INTER_CUBIC)


								            keypoints *= [w_scale, h_scale]

								            mask_pts *= [w_scale, h_scale]


								            mask = cv2.fillPoly(

								                np.zeros(img.shape[:2], dtype=np.uint8),

								                [mask_pts.astype(np.int32)],

								                color=1)


								            res = {

								                'img': img,

								                'keypoints': keypoints,

								                'mask': mask,

								            }

								            resource_infos.append(res)


								        self.resource_infos = resource_infos


								        self._reset_dynamic()


								    def switch_mode(self):

								        self.mode_index = (self.mode_index + 1) % len(self.resource_infos)


								    def _reset_dynamic(self):

								        x_tar = np.random.randint(int(self.out_shape[0] * self.dynamic_scale))

								        y_tar = np.random.randint(int(self.out_shape[1] * self.dynamic_scale))


								        x_step = (x_tar -

								                  self.dynamic_info.pos_curr[0]) / self.dynamic_max_step

								        y_step = (y_tar -

								                  self.dynamic_info.pos_curr[1]) / self.dynamic_max_step


								        self.dynamic_info.pos_step = (x_step, y_step)

								        self.dynamic_info.step_curr = 0


								    def draw(self, frame_msg):


								        full_pose_results = frame_msg.get_pose_results()


								        pred = None

								        if full_pose_results:

								            for pose_results in full_pose_results:

								                if not pose_results['preds']:

								                    continue


								                pred = pose_results['preds'][0].copy()

								                pred['dataset'] = DatasetInfo(pose_results['model_cfg'].data.

								                                              test.dataset_info).dataset_name


								                self.latest_pred = pred

								                break


								        # Use the latest pose result if there is none available in

								        # the current frame.

								        if pred is None:

								            pred = self.latest_pred


								        # Get the background image and facial annotations

								        res = self.resource_infos[self.mode_index]

								        img = frame_msg.get_image()

								        canvas = res['img'].copy()

								        mask = res['mask']

								        kpts_tar = res['keypoints']


								        if pred is not None:

								            if pred['dataset'] == 'ap10k':

								                # left eye: 0, right eye: 1, nose: 2

								                kpts_src = pred['keypoints'][[0, 1, 2], :2]

								            elif pred['dataset'] == 'coco_wholebody':

								                # left eye: 1, right eye 2, nose: 0

								                kpts_src = pred['keypoints'][[1, 2, 0], :2]

								            else:

								                raise ValueError('Can not obtain face landmark information'

								                                 f'from dataset: {pred["type"]}')


								            trans_mat = self._get_transform(kpts_src, kpts_tar)


								            warp = cv2.warpAffine(img, trans_mat, dsize=canvas.shape[:2])

								            cv2.copyTo(warp, mask, canvas)


								        # Add random movement to the background

								        xc, yc = self.dynamic_info.pos_curr

								        xs, ys = self.dynamic_info.pos_step

								        w, h = self.out_shape


								        x = min(max(int(xc), 0), canvas.shape[1] - w + 1)

								        y = min(max(int(yc), 0), canvas.shape[0] - h + 1)


								        canvas = canvas[y:y + h, x:x + w]


								        self.dynamic_info.pos_curr = (xc + xs, yc + ys)

								        self.dynamic_info.step_curr += 1


								        if self.dynamic_info.step_curr == self.dynamic_max_step:

								            self._reset_dynamic()


								        return canvas


								    def _get_transform(self, kpts_src, kpts_tar):

								        if self.rigid:

								            # rigid transform

								            n = kpts_src.shape[0]

								            X = np.zeros((n * 2, 4), dtype=np.float32)

								            U = np.zeros((n * 2, 1), dtype=np.float32)

								            X[:n, :2] = kpts_src

								            X[:n, 2] = 1

								            X[n:, 0] = kpts_src[:, 1]

								            X[n:, 1] = -kpts_src[:, 0]

								            X[n:, 3] = 1


								            U[:n, 0] = kpts_tar[:, 0]

								            U[n:, 0] = kpts_tar[:, 1]


								            M = np.linalg.pinv(X).dot(U).flatten()


								            trans_mat = np.array([[M[0], M[1], M[2]], [-M[1], M[0], M[3]]],

								                                 dtype=np.float32)


								        else:

								            # normal affine transform

								            # adaptive horizontal flipping

								            if (np.linalg.norm(kpts_tar[0] - kpts_tar[2]) -

								                    np.linalg.norm(kpts_tar[1] - kpts_tar[2])) * (

								                        np.linalg.norm(kpts_src[0] - kpts_src[2]) -

								                        np.linalg.norm(kpts_src[1] - kpts_src[2])) < 0:

								                kpts_src = kpts_src[[1, 0, 2], :]

								            trans_mat, _ = cv2.estimateAffine2D(

								                kpts_src.astype(np.float32), kpts_tar.astype(np.float32))


								        return trans_mat