GCN代码超详解析Two-stream adaptive graph convolutional network for Skeleton-Based Action Recognition（二）

代码地址：https://github.com/lshiwjx/2s-AGCN
本篇解析代码为[2s-AGCN/model/aagcn.py]

import mathimport numpy as np
import torch
import torch.nn as nn
from torch.autograd import Variabledef import_class(name):components = name.split('.')mod = __import__(components[0])for comp in components[1:]:mod = getattr(mod, comp)return moddef conv_branch_init(conv, branches):weight = conv.weightn = weight.size(0)k1 = weight.size(1)k2 = weight.size(2)nn.init.normal_(weight, 0, math.sqrt(2. / (n * k1 * k2 * branches)))nn.init.constant_(conv.bias, 0)def conv_init(conv):nn.init.kaiming_normal_(conv.weight, mode='fan_out')nn.init.constant_(conv.bias, 0)def bn_init(bn, scale):nn.init.constant_(bn.weight, scale)nn.init.constant_(bn.bias, 0)class unit_tcn(nn.Module):def __init__(self, in_channels, out_channels, kernel_size=9, stride=1):super(unit_tcn, self).__init__()pad = int((kernel_size - 1) / 2)self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=(kernel_size, 1), padding=(pad, 0),stride=(stride, 1))self.bn = nn.BatchNorm2d(out_channels)self.relu = nn.ReLU(inplace=True)conv_init(self.conv)bn_init(self.bn, 1)def forward(self, x):x = self.bn(self.conv(x))return xclass unit_gcn(nn.Module):def __init__(self, in_channels, out_channels, A, coff_embedding=4, num_subset=3, adaptive=True, attention=True):super(unit_gcn, self).__init__()inter_channels = out_channels // coff_embeddingself.inter_c = inter_channelsself.out_c = out_channelsself.in_c = in_channelsself.num_subset = num_subset#暂时不知道是什么？应该是和后面的A有关，等我把第三部分写完回来填坑……#回来填坑了，这个地方应该指的是划分子集的方式，按最初的ST-GCN的第三种方式，一共被划分为3个子集，因此num_set=3#并且个st-gcn单元都有自己的权重参数用于训练参见下图

摘自https://www.cnblogs.com/shyern/p/11262926.html#_label4_1

       num_jpts = A.shape[-1]self.conv_d = nn.ModuleList()#类似于pytho中的list类型，只是将一系列层装入列表，并没有实现forward()方法for i in range(self.num_subset):self.conv_d.append(nn.Conv2d(in_channels, out_channels, 1))#构建3个卷积层if adaptive:#自适应self.PA = nn.Parameter(torch.from_numpy(A.astype(np.float32)))#复制A，torch.from_numpy,The returned tensor and `ndarray` share the same memory. Modifications to the tensor will be reflected in the `ndarray` and vice versa. The returned tensor is not resizable.#后半部分nn.Parameter，将一个不可训练的类型Tensor转换成可以训练的类型parameter并将这个parameter绑定到这个module里面self.alpha = nn.Parameter(torch.zeros(1))# self.beta = nn.Parameter(torch.ones(1))# nn.init.constant_(self.PA, 1e-6)# self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)# self.A = self.PAself.conv_a = nn.ModuleList()self.conv_b = nn.ModuleList()for i in range(self.num_subset):self.conv_a.append(nn.Conv2d(in_channels, inter_channels, 1))self.conv_b.append(nn.Conv2d(in_channels, inter_channels, 1))#上面的部分一共构建了3个卷积块conv_d，conv_a，conv_b，每个卷积块有3（num_subset）个卷积层else:self.A = Variable(torch.from_numpy(A.astype(np.float32)), requires_grad=False)self.adaptive = adaptiveif attention:#是否添加注意力机制# self.beta = nn.Parameter(torch.zeros(1))# self.gamma = nn.Parameter(torch.zeros(1))# unified attention# self.Attention = nn.Parameter(torch.ones(num_jpts))# temporal attention 时间维度注意力self.conv_ta = nn.Conv1d(out_channels, 1, 9, padding=4)#1d卷积nn.init.constant_(self.conv_ta.weight, 0)#初始化常量nn.init.constant_(self.conv_ta.bias, 0)# s attention #空间维度注意力ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts #ker_jpt始终为奇数pad = (ker_jpt - 1) // 2 #偶数填充self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad)nn.init.xavier_normal_(self.conv_sa.weight)nn.init.constant_(self.conv_sa.bias, 0)# channel attention 通道级注意力rr = 2self.fc1c = nn.Linear(out_channels, out_channels // rr)self.fc2c = nn.Linear(out_channels // rr, out_channels)nn.init.kaiming_normal_(self.fc1c.weight)#Xavier初始化方法nn.init.constant_(self.fc1c.bias, 0)nn.init.constant_(self.fc2c.weight, 0)nn.init.constant_(self.fc2c.bias, 0)# self.bn = nn.BatchNorm2d(out_channels)# bn_init(self.bn, 1)self.attention = attentionif in_channels != out_channels:#如果输入通道与输出通道不相同，则调整至out_channel大小self.down = nn.Sequential(nn.Conv2d(in_channels, out_channels, 1),nn.BatchNorm2d(out_channels))else:self.down = lambda x: xself.bn = nn.BatchNorm2d(out_channels)self.soft = nn.Softmax(-2)self.tan = nn.Tanh()self.sigmoid = nn.Sigmoid()self.relu = nn.ReLU(inplace=True)for m in self.modules():if isinstance(m, nn.Conv2d):conv_init(m)elif isinstance(m, nn.BatchNorm2d):bn_init(m, 1)bn_init(self.bn, 1e-6)for i in range(self.num_subset):conv_branch_init(self.conv_d[i], self.num_subset)

代码里的N，V和图里的正好反了，看的时候注意一下

#如何调用构建的模型def forward(self, x):N, C, T, V = x.size()y = Noneif self.adaptive:A = self.PA# A = A + self.PAfor i in range(self.num_subset):A1 = self.conv_a[i](x).permute(0, 3, 1, 2).contiguous().view(N, V, self.inter_c * T)#上图蓝2A2 = self.conv_b[i](x).view(N, self.inter_c * T, V)#上图蓝3A1 = self.tan(torch.matmul(A1, A2) / A1.size(-1))  # N V V，上图蓝4A1 = A[i] + A1 * self.alphaA2 = x.view(N, C * T, V)#上图蓝1z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))y = z + y if y is not None else z#上图蓝5else:A = self.A.cuda(x.get_device()) * self.maskfor i in range(self.num_subset):A1 = A[i]A2 = x.view(N, C * T, V)z = self.conv_d[i](torch.matmul(A2, A1).view(N, C, T, V))y = z + y if y is not None else zy = self.bn(y)y += self.down(x)y = self.relu(y)

        if self.attention:# spatial attentionse = y.mean(-2)  # N C V，归一化se1 = self.sigmoid(self.conv_sa(se))#计算注意力机制，下同不赘述y = y * se1.unsqueeze(-2) + y# a1 = se1.unsqueeze(-2)# temporal attentionse = y.mean(-1)se1 = self.sigmoid(self.conv_ta(se))y = y * se1.unsqueeze(-1) + y# a2 = se1.unsqueeze(-1)# channel attentionse = y.mean(-1).mean(-1)se1 = self.relu(self.fc1c(se))se2 = self.sigmoid(self.fc2c(se1))y = y * se2.unsqueeze(-1).unsqueeze(-1) + y# a3 = se2.unsqueeze(-1).unsqueeze(-1)# unified attention# y = y * self.Attention + y# y = y + y * ((a2 + a3) / 2)# y = self.bn(y)return y

class TCN_GCN_unit(nn.Module):def __init__(self, in_channels, out_channels, A, stride=1, residual=True, adaptive=True, attention=True):super(TCN_GCN_unit, self).__init__()self.gcn1 = unit_gcn(in_channels, out_channels, A, adaptive=adaptive, attention=attention)#计算图卷积self.tcn1 = unit_tcn(out_channels, out_channels, stride=stride)#计算时间卷积self.relu = nn.ReLU(inplace=True)# if attention:# self.alpha = nn.Parameter(torch.zeros(1))# self.beta = nn.Parameter(torch.ones(1))# temporal attention# self.conv_ta1 = nn.Conv1d(out_channels, out_channels//rt, 9, padding=4)# self.bn = nn.BatchNorm2d(out_channels)# bn_init(self.bn, 1)# self.conv_ta2 = nn.Conv1d(out_channels, 1, 9, padding=4)# nn.init.kaiming_normal_(self.conv_ta1.weight)# nn.init.constant_(self.conv_ta1.bias, 0)# nn.init.constant_(self.conv_ta2.weight, 0)# nn.init.constant_(self.conv_ta2.bias, 0)# rt = 4# self.inter_c = out_channels // rt# self.conv_ta1 = nn.Conv2d(out_channels, out_channels // rt, 1)# self.conv_ta2 = nn.Conv2d(out_channels, out_channels // rt, 1)# nn.init.constant_(self.conv_ta1.weight, 0)# nn.init.constant_(self.conv_ta1.bias, 0)# nn.init.constant_(self.conv_ta2.weight, 0)# nn.init.constant_(self.conv_ta2.bias, 0)# s attention# num_jpts = A.shape[-1]# ker_jpt = num_jpts - 1 if not num_jpts % 2 else num_jpts# pad = (ker_jpt - 1) // 2# self.conv_sa = nn.Conv1d(out_channels, 1, ker_jpt, padding=pad)# nn.init.constant_(self.conv_sa.weight, 0)# nn.init.constant_(self.conv_sa.bias, 0)# channel attention# rr = 16# self.fc1c = nn.Linear(out_channels, out_channels // rr)# self.fc2c = nn.Linear(out_channels // rr, out_channels)# nn.init.kaiming_normal_(self.fc1c.weight)# nn.init.constant_(self.fc1c.bias, 0)# nn.init.constant_(self.fc2c.weight, 0)# nn.init.constant_(self.fc2c.bias, 0)## self.softmax = nn.Softmax(-2)# self.sigmoid = nn.Sigmoid()self.attention = attentionif not residual:self.residual = lambda x: 0elif (in_channels == out_channels) and (stride == 1):self.residual = lambda x: xelse:self.residual = unit_tcn(in_channels, out_channels, kernel_size=1, stride=stride)def forward(self, x):if self.attention:y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))# spatial attention# se = y.mean(-2)  # N C V# se1 = self.sigmoid(self.conv_sa(se))# y = y * se1.unsqueeze(-2) + y# a1 = se1.unsqueeze(-2)# temporal attention# se = y.mean(-1)  # N C T# # se1 = self.relu(self.bn(self.conv_ta1(se)))# se2 = self.sigmoid(self.conv_ta2(se))# # y = y * se1.unsqueeze(-1) + y# a2 = se2.unsqueeze(-1)# se = y  # NCTV# N, C, T, V = y.shape# se1 = self.conv_ta1(se).permute(0, 2, 1, 3).contiguous().view(N, T, self.inter_c * V)  # NTCV# se2 = self.conv_ta2(se).permute(0, 1, 3, 2).contiguous().view(N, self.inter_c * V, T)  # NCVT# a2 = self.softmax(torch.matmul(se1, se2) / np.sqrt(se1.size(-1)))  # N T T# y = torch.matmul(y.permute(0, 1, 3, 2).contiguous().view(N, C * V, T), a2) \#         .view(N, C, V, T).permute(0, 1, 3, 2) * self.alpha + y# channel attention# se = y.mean(-1).mean(-1)# se1 = self.relu(self.fc1c(se))# se2 = self.sigmoid(self.fc2c(se1))# # y = y * se2.unsqueeze(-1).unsqueeze(-1) + y# a3 = se2.unsqueeze(-1).unsqueeze(-1)## y = y * ((a2 + a3) / 2) + y# y = self.bn(y)else:y = self.relu(self.tcn1(self.gcn1(x)) + self.residual(x))return y

class Model(nn.Module):def __init__(self, num_class=60, num_point=25, num_person=2, graph=None, graph_args=dict(), in_channels=3,drop_out=0, adaptive=True, attention=True):super(Model, self).__init__()if graph is None:raise ValueError()else:Graph = import_class(graph)self.graph = Graph(**graph_args)A = self.graph.Aself.num_class = num_class#动作类别数self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)
#网络模型构建self.l1 = TCN_GCN_unit(3, 64, A, residual=False, adaptive=adaptive, attention=attention)self.l2 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l3 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l4 = TCN_GCN_unit(64, 64, A, adaptive=adaptive, attention=attention)self.l5 = TCN_GCN_unit(64, 128, A, stride=2, adaptive=adaptive, attention=attention)self.l6 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)self.l7 = TCN_GCN_unit(128, 128, A, adaptive=adaptive, attention=attention)self.l8 = TCN_GCN_unit(128, 256, A, stride=2, adaptive=adaptive, attention=attention)self.l9 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)self.l10 = TCN_GCN_unit(256, 256, A, adaptive=adaptive, attention=attention)self.fc = nn.Linear(256, num_class)nn.init.normal_(self.fc.weight, 0, math.sqrt(2. / num_class))bn_init(self.data_bn, 1)if drop_out:self.drop_out = nn.Dropout(drop_out)else:self.drop_out = lambda x: xdef forward(self, x):N, C, T, V, M = x.size()#对应输入数据格式，N=sample数量，C=（channel，3固定），T=帧，V=（关节数，固定25），M=（最大人数，固定2）x = x.permute(0, 4, 3, 1, 2).contiguous().view(N, M * V * C, T)#permute更换tensor维度的顺序；#view通常会使用contiguous，与内存有关，重新强制拷贝一份tensor#view顺序取数组重新组成目标形状x = self.data_bn(x)#self.data_bn = nn.BatchNorm1d(num_person * in_channels * num_point)x = x.view(N, M, V, C, T).permute(0, 1, 3, 4, 2).contiguous().view(N * M, C, T, V)#同上#model中定义的层结构x = self.l1(x)x = self.l2(x)x = self.l3(x)x = self.l4(x)x = self.l5(x)x = self.l6(x)x = self.l7(x)x = self.l8(x)x = self.l9(x)x = self.l10(x)# N*M,C,T,Vc_new = x.size(1)x = x.view(N, M, c_new, -1)x = x.mean(3).mean(1)x = self.drop_out(x)#self.drop_out = nn.Dropout(drop_out)return self.fc(x)#全连接分类，self.fc = nn.Linear(256, num_class)

GCN代码超详解析Two-stream adaptive graph convolutional network for Skeleton-Based Action Recognition（二）相关推荐

GCN代码超详解析Two-stream adaptive graph convolutional network for Skeleton-Based Action Recognition（三）
代码地址https://github.com/lshiwjx/2s-AGCN 本篇解析对应 2s-AGCN/graph/ntu_rgb_d.py ,用于创建NTURGB-D对应的图结构,以及2s-AG ...
GCN代码超详解析Two-stream adaptive graph convolutional network for Skeleton-Based Action Recognition（一）
代码地址:https://github.com/lshiwjx/2s-AGCN 这个图用于说明人体关键节点的定义及其连接方式这个文件是根据NTURGB-D中关键点的定义计算骨骼长度所以最终得到的数 ...
【GCN】《Adaptive Propagation Graph Convolutional Network》（TNNLS 2020）
<Adaptive Propagation Graph Convolutional Network>(TNNLS 2020) 为每个节点赋予一个停止单元,该单元输出一个值控制Propaga ...
图卷积网络 GCN Graph Convolutional Network（谱域GCN）的理解和详细推导
文章目录 1. 为什么会出现图卷积神经网络? 2. 图卷积网络的两种理解方式 2.1 vertex domain(spatial domain):顶点域(空间域) 2.2 spectral domai ...
Adaptive Propagation Graph Convolutional Network
翻译一篇TNN 的论文仅用于学习原文章链接有道翻译的也是用了第一人称.如果有错,一定是你对. 题目:Adaptive Propagation Graph Convolutional Network ...
论文翻译：Skeleton-Based Action Recognition with Multi-Stream Adaptive Graph Convolutional Networks
摘要: 抽象图卷积网络(GCNs)将神经网络推广到更一般的非欧几里得结构,在基于骨架的动作识别方面取得了显著的性能.但是,以往基于gc的模型仍然存在一些问题.首先,图形的拓扑被启发式地设置并固定在所有 ...
论文阅读 Parallelly Adaptive Graph Convolutional Clustering Model(TNNLS2022)
论文标题 Parallelly Adaptive Graph Convolutional Clustering Model 论文作者.链接作者: He, Xiaxia and Wang, Boyue ...
GNN Algorithms(2): GCN, Graph Convolutional Network
目录 GCN Algorithm Background 传统卷积公式:在graph上不行定义Graph Fourier 传统Fourier transformation 传统Inverse Four ...
GCN (Graph Convolutional Network) 图卷积网络
这几个同时看一下,感觉能理解不少: B站视频:https://www.bilibili.com/video/BV1ta4y1t7EK GCN论文原文和代码:https://github.com/tki ...

GCN代码超详解析Two-stream adaptive graph convolutional network for Skeleton-Based Action Recognition（二）

GCN代码超详解析Two-stream adaptive graph convolutional network for Skeleton-Based Action Recognition（二）相关推荐

最新文章

热门文章