STCFormer_hp/model/stc_pe_3dhp.py


								import torch

								import torch.nn as nn

								# from model.module.trans import Transformer as Transformer_s

								# from model.module.trans_hypothesis import Transformer

								import numpy as np

								from einops import rearrange

								from collections import OrderedDict

								from torch.nn import functional as F

								from torch.nn import init

								import scipy.sparse as sp


								from timm.models.layers import DropPath


								class Model(nn.Module):

								    def __init__(self, args):

								        super().__init__()


								        layers, channel, d_hid, length = args.layers, args.channel, args.d_hid, args.frames

								        self.num_joints_in, self.num_joints_out = args.n_joints, args.out_joints

								        args.d_hid = 256

								        isTrainning = args.train


								        # dimension tranfer

								        self.pose_emb = nn.Linear(2, args.d_hid, bias=False)

								        self.gelu = nn.GELU()


								        # self.flow_emb = nn.Linear(2, args.d_hid, bias=False)

								        # self.gelu = nn.GELU()


								        self.mlpmixer = MlpMixer(6, args.frames, 17, args.d_hid, isTrainning)


								        self.pose_lift = nn.Linear(args.d_hid, 3, bias=False)


								        # self.sequence_pos_encoder = PositionalEncoding(args.d_hid, 0.1)


								        # self.tem_pool = nn.AdaptiveAvgPool1d(1)

								        # self.lpm = LearnedPosMap2D(args.frames,18)


								    def forward(self, x):

								      	#x = x[:, :, :, :, 0].permute(0, 2, 3, 1).contiguous()  # B,T,J,2,1

								        x = x[:, :, :, :, 0].permute(0, 2, 3, 1).contiguous()  # B,T,J,2,1

								        #x = x.view(x.shape[0], x.shape[1], x.shape[2], -1)  # b,t,j,2


								        b, t, j, c = x.shape


								        #g = torch.zeros([b,t,1,c]).cuda()

								        #x = torch.cat((x,g),-2)


								        x = self.pose_emb(x)

								        x = self.gelu(x)


								        # x = x.reshape(b,t,j,c)


								        x = self.mlpmixer(x)


								        x = self.pose_lift(x)


								        return x


								def normalize(mx):

								    """Row-normalize sparse matrix"""

								    rowsum = np.array(mx.sum(1))

								    r_inv = np.power(rowsum, -1).flatten()

								    r_inv[np.isinf(r_inv)] = 0.

								    r_mat_inv = sp.diags(r_inv)

								    mx = r_mat_inv.dot(mx)

								    return mx


								def sparse_mx_to_torch_sparse_tensor(sparse_mx):

								    """Convert a scipy sparse matrix to a torch sparse tensor."""

								    sparse_mx = sparse_mx.tocoo().astype(np.float32)

								    indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))

								    values = torch.from_numpy(sparse_mx.data)

								    shape = torch.Size(sparse_mx.shape)

								    return torch.sparse.FloatTensor(indices, values, shape)


								def adj_mx_from_edges(num_pts, edges, sparse=False):

								    edges = np.array(edges, dtype=np.int32)

								    data, i, j = np.ones(edges.shape[0]), edges[:, 0], edges[:, 1]

								    adj_mx = sp.coo_matrix((data, (i, j)), shape=(num_pts, num_pts), dtype=np.float32)

								    # print(11,adj_mx)


								    # build symmetric adjacency matrix

								    adj_mx = adj_mx + adj_mx.T.multiply(adj_mx.T > adj_mx) - adj_mx.multiply(adj_mx.T > adj_mx)

								    # adj_mx = normalize(adj_mx + sp.eye(adj_mx.shape[0]))

								    if sparse:

								        adj_mx = sparse_mx_to_torch_sparse_tensor(adj_mx)

								    else:

								        adj_mx = torch.tensor(adj_mx.todense(), dtype=torch.float)

								    return adj_mx.sum(-1)


								class ChebConv(nn.Module):

								    """

								    The ChebNet convolution operation.

								    :param in_c: int, number of input channels.

								    :param out_c: int, number of output channels.

								    :param K: int, the order of Chebyshev Polynomial.

								    """


								    def __init__(self, in_c, out_c, K, bias=True, normalize=True):

								        super(ChebConv, self).__init__()

								        self.normalize = normalize


								        self.weight = nn.Parameter(torch.Tensor(K + 1, 1, in_c, out_c))  # [K+1, 1, in_c, out_c]

								        init.xavier_normal_(self.weight)


								        if bias:

								            self.bias = nn.Parameter(torch.Tensor(1, 1, out_c))

								            init.zeros_(self.bias)

								        else:

								            self.register_parameter("bias", None)


								        self.K = K + 1


								    def forward(self, inputs, graph):

								        """

								        :param inputs: the input data, [B, N, C]

								        :param graph: the graph structure, [N, N]

								        :return: convolution result, [B, N, D]

								        """

								        L = ChebConv.get_laplacian(graph, self.normalize)  # [N, N]

								        mul_L = self.cheb_polynomial(L).unsqueeze(1)  # [K, 1, N, N]


								        result = torch.matmul(mul_L, inputs)  # [K, B, N, C]


								        result = torch.matmul(result, self.weight)  # [K, B, N, D]

								        result = torch.sum(result, dim=0) + self.bias  # [B, N, D]


								        return result


								    def cheb_polynomial(self, laplacian):

								        """

								        Compute the Chebyshev Polynomial, according to the graph laplacian.

								        :param laplacian: the graph laplacian, [N, N].

								        :return: the multi order Chebyshev laplacian, [K, N, N].

								        """

								        N = laplacian.size(0)  # [N, N]

								        multi_order_laplacian = torch.zeros([self.K, N, N], device=laplacian.device, dtype=torch.float)  # [K, N, N]

								        multi_order_laplacian[0] = torch.eye(N, device=laplacian.device, dtype=torch.float)


								        if self.K == 1:

								            return multi_order_laplacian

								        else:

								            multi_order_laplacian[1] = laplacian

								            if self.K == 2:

								                return multi_order_laplacian

								            else:

								                for k in range(2, self.K):

								                    multi_order_laplacian[k] = 2 * torch.mm(laplacian, multi_order_laplacian[k - 1]) - \

								                                               multi_order_laplacian[k - 2]


								        return multi_order_laplacian


								    @staticmethod

								    def get_laplacian(graph, normalize):

								        """

								        return the laplacian of the graph.

								        :param graph: the graph structure without self loop, [N, N].

								        :param normalize: whether to used the normalized laplacian.

								        :return: graph laplacian.

								        """

								        if normalize:


								            D = torch.diag(torch.sum(graph, dim=-1) ** (-1 / 2))

								            L = torch.eye(graph.size(0), device=graph.device, dtype=graph.dtype) - torch.mm(torch.mm(D, graph), D)

								        else:

								            D = torch.diag(torch.sum(graph, dim=-1))

								            L = D - graph

								        return L


								class cross_att(nn.Module):

								    def __init__(self, d_time, d_joint, d_coor, isTrainning=False, head=4):

								        super().__init__()


								        self.qkv = nn.Linear(d_coor, d_coor * 3)

								        self.head = head

								        self.layer_norm = nn.LayerNorm(d_coor)

								        # self.lpm_st_1 = LearnedPosMap2D(d_time, d_joint, gamma=4)

								        self.scale = d_coor ** -0.5

								        self.proj = nn.Linear(d_coor, d_coor)

								        self.d_time = d_time

								        self.d_joint = d_joint

								        self.head = head


								        # self.gate_s = nn.Conv2d(d_coor//2, d_coor//2, kernel_size=3, stride=1, padding=1,groups=d_coor//2)

								        # self.gate_t = nn.Conv2d(d_coor//2, d_coor//2, kernel_size=3, stride=1, padding=1,groups=d_coor//2)


								#         self.gate_s = MSLSP(d_time, d_joint, d_coor // 2)

								        self.gate_t = nn.Conv2d(d_coor//2, d_coor//2, kernel_size=3, stride=1, padding=1,groups=d_coor//2)

								        self.gate_s = nn.Conv2d(d_coor//2, d_coor//2, kernel_size=3, stride=1, padding=1,groups=d_coor//2)


								        # self.gate_gs = ChebConv(d_coor//2, d_coor//2, K=2)

								        #self.scf     = nn.Parameter(0.0001*torch.Tensor(1,1,d_coor//8))


								        #self.weight = nn.Parameter(torch.Tensor(K + 1, 1, in_c, out_c))  # [K+1, 1, in_c, out_c]

								        #init.xavier_normal_(self.scf)


								        self.body_edges = torch.tensor([[0, 1], [1, 2], [2, 3],

								                                        [0, 4], [4, 5], [5, 6],

								                                        [0, 7], [7, 8], [8, 9], [9, 10],

								                                        [8, 11], [11, 12], [12, 13],

								                                        [8, 14], [14, 15], [15, 16]], dtype=torch.long)

								                                             #   [0,17],[1,17],[2,17],[3,17],[4,17],[5,17],[6,17],[7,17],[8,17],[9,17],

								                                        #[10,17],[11,17],[12,17],[13,17],[14,17],[15,17],[16,17]

								        # self.conv_2 = nn.Conv2d(d_coor, d_coor, kernel_size=5, stride=1, padding=2,groups=d_coor)

								        self.graph = adj_mx_from_edges(d_joint, self.body_edges).long().cuda()

								        self.emb =  nn.Embedding(20, d_coor//8, padding_idx=0)

								        self.part = torch.tensor([0,0,1,1,1,2,2,2,3,3,3,4,4,4,0,0,0]).long().cuda()


								        # self.gate_t = MSLSP(d_time, d_joint, d_coor//2)


								        # self.lpm_s = LearnedPosMap2D(d_time,d_joint)

								        # self.lpm_t = LearnedPosMap2D(d_time,d_joint)


								        self.drop = DropPath(0.5)


								    def forward(self, input):

								        b, t, s, c = input.shape

								#         print(self.scf)

								#         exit()

								        # input = input + self.lpm_st_1(input)

								        h = input

								        # print(input.shape)

								        # exit()

								        x = self.layer_norm(input)

								        qkv = self.qkv(x)


								        qkv = qkv.reshape(b, t, s, c, 3).permute(4, 0, 1, 2, 3)  # b,t,s,c

								        # print(qkv.shape)


								        qkv_s, qkv_t = qkv.chunk(2, 4)

								        # print(qkv_s.shape,qkv_t.shape)


								        q_s, k_s, v_s = qkv_s[0], qkv_s[1], qkv_s[2]  # b,t,s,c

								        q_t, k_t, v_t = qkv_t[0], qkv_t[1], qkv_t[2]  # b,t,s,c


								        # print(q_s.shape,q_t.shape)


								        q_s = rearrange(q_s, 'b t s (h c) -> (b h t) s c', h=self.head)

								        k_s = rearrange(k_s, 'b t s (h c) -> (b h t) c s ', h=self.head)


								        q_t = rearrange(q_t, 'b  t s (h c) -> (b h s) t c', h=self.head)

								        k_t = rearrange(k_t, 'b  t s (h c) -> (b h s) c t ', h=self.head)


								        att_s = (q_s @ k_s) * self.scale  # b*h,s,s

								        att_t = (q_t @ k_t) * self.scale  # b*h,s,s


								        att_s = att_s.softmax(-1)

								        att_t = att_t.softmax(-1)


								        v_s = rearrange(v_s, 'b  t s c -> b c t s ')

								        v_t = rearrange(v_t, 'b  t s c -> b c t s ')


								#

								#         print(v_s.shape,self.graph.shape)

								        lep_s = self.gate_s(v_s)

								        lep_t = self.gate_t(v_t)

								        v_s = rearrange(v_s, 'b c t s -> (b t ) s c')

								        # sep_s = self.gate_gs(v_s,self.graph)

								        sep_s = self.emb(self.part).unsqueeze(0)

								        # print(sep_s.shape)


								        # sep_s = rearrange(sep_s, '(b t) s (h c)   -> (b h t) s c ', t=t,h=self.head)


								        lep_s = rearrange(lep_s, 'b (h c) t s  -> (b h t) s c ', h=self.head)

								        lep_t = rearrange(lep_t, 'b (h c) t s  -> (b h s) t c ', h=self.head)


								        v_s = rearrange(v_s, '(b t) s (h c)   -> (b h t) s c ', t=t,h=self.head)

								#         v_s = rearrange(v_s, 'b (h c) t s  -> (b h t) s c ', h=self.head)

								        v_t = rearrange(v_t, 'b (h c) t s  -> (b h s) t c ', h=self.head)

								        #print(lep_s[55,:,:])

								        #print(sep_s[55,:,:])

								        #print(self.scf)

								        #print(self.scf*sep_s[55,:,:])

								        #exit()


								        # v = torch.cat((v1, v2), -1)


								        x_s = att_s @ v_s + lep_s + 0.0001*self.drop(sep_s)  # b*h,s,c//h

								        x_t = att_t @ v_t + lep_t  # b*h,t,c//h

								        # print(x_s.shape,x_t.shape)


								        x_s = rearrange(x_s, '(b h t) s c -> b h t s c ', h=self.head, t=t)

								        x_t = rearrange(x_t, '(b h s) t c -> b h t s c ', h=self.head, s=s)

								        # print(x_s.shape,x_t.shape)

								        x = torch.cat((x_s, x_t), -1)

								        x = rearrange(x, 'b h t s c -> b  t s (h c) ')


								        x = self.proj(x)

								        # print(x.shape,h.shape)

								        x = x + h

								        return x


								class MLP_3D(nn.Module):

								    def __init__(self, d_time, d_joint, d_coor, isTrainning=False, ):

								        super().__init__()


								        self.d_time = d_time

								        self.d_joint = d_joint

								        self.d_coor = d_coor


								        self.layer_norm1 = nn.LayerNorm(self.d_coor)

								        self.layer_norm2 = nn.LayerNorm(self.d_coor)


								        self.mlp1 = Mlp(self.d_coor, self.d_coor * 4, self.d_coor)


								        self.cross_att = cross_att(d_time, d_joint, d_coor, isTrainning)

								        self.drop = DropPath(0.0)


								    def forward(self, input):

								        b, t, s, c = input.shape


								        x = self.cross_att(input)


								        x = x + self.drop(self.mlp1(self.layer_norm1(x)))


								        return x


								class Mlp(nn.Module):

								    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.1):

								        super().__init__()

								        out_features = out_features or in_features

								        hidden_features = hidden_features or in_features

								        self.fc1 = nn.Linear(in_features, hidden_features, bias=False)

								        self.act = act_layer()

								        self.fc2 = nn.Linear(hidden_features, out_features, bias=False)

								        self.drop = nn.Dropout(drop)


								    def forward(self, x):

								        x = self.fc1(x)

								        x = self.act(x)

								        x = self.drop(x)

								        x = self.fc2(x)

								        x = self.drop(x)

								        return x


								class Mlp_C(nn.Module):

								    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.0):

								        super().__init__()

								        out_features = out_features or in_features

								        hidden_features = hidden_features or in_features

								        self.fc1 = nn.Linear(in_features, hidden_features, bias=False)

								        self.act = act_layer()

								        self.drop = nn.Dropout(drop)

								        self.fc2 = nn.Linear(hidden_features, out_features, bias=False)

								        self.sig = nn.Sigmoid()

								        self.avg = nn.AdaptiveAvgPool2d((1, 1))


								    def forward(self, x):

								        b, t, s, c = x.shape

								        # gate = self.avg(x.permute(0,3,1,2)).permute(0,2,3,1)

								        gate = self.fc1(x)

								        gate = self.act(gate)

								        gate = self.drop(gate)

								        gate = self.fc2(gate)

								        gate = self.sig(gate)

								        # gate = gate.expand(b,t,s,c)

								        x = x * gate

								        return x


								class MlpMixer(nn.Module):

								    def __init__(self, num_block, d_time, d_joint, d_coor, isTrainning=False, ):

								        super(MlpMixer, self).__init__()


								        self.num_block = num_block

								        self.d_time = d_time

								        self.d_joint = d_joint

								        self.d_coor = d_coor


								        self.mixerblocks = []

								        for l in range(self.num_block):

								            self.mixerblocks.append(MLP_3D(self.d_time, self.d_joint, self.d_coor, isTrainning))

								        self.mixerblocks = nn.ModuleList(self.mixerblocks)


								    def forward(self, input):

								        # blocks layers

								        for i in range(self.num_block):

								            input = self.mixerblocks[i](input)

								        # exit()


								        return input


								if __name__ == "__main__":

								    inputs = torch.rand(64, 351, 34)  # [btz, channel, T, H, W]

								    # inputs = torch.rand(1, 64, 4, 112, 112) #[btz, channel, T, H, W]

								    net = Model()

								    output = net(inputs)

								    print(output.size())

								    from thop import profile


								    flops, params = profile(net, inputs=(inputs,))

								    print(flops)

								    print(params)

								    """

								    for name, param in net.named_parameters():

								        if param.requires_grad:

								            print(name,':',param.size())

								    """