st-gcn训练自建行为识别数据集

一、代码下载与环境配置
二、准备行为数据
三、数据转换
四、添加Layout
五、修改训练参数
六、开始训练
七、模型测试

一、代码下载与环境配置

首先参照下面的命令，下载st-gcn算法的训练代码，配置环境。

git clone https://github.com/yysijie/st-gcn.git
cd st-gcn
pip install -r requirements.txt
cd torchlight
python setup.py install
cd ..

二、准备行为数据

训练之前，需要根据kinetics-skeleton数据集的格式，提取自建数据集中目标的行为数据。下图中是kinetics-skeleton数据集的组成。其中kinetics_train和kinetics_val文件夹中存储的是每一段视频中的行为信息，包括每一帧的姿态和行为标签。另外两个json文件中包含了对应文件夹中所有的文件名称和行为标签与索引。相关示例可下载。

提取生成kinetics-skeleton人体行为数据集的方法参考下面链接：
kinetics-skeleton格式行为数据提取方法

三、数据转换

stgcn训练代码中自带了数据转换代码tools/kinetics_gendata.py,使用该脚本将kinetics-skleton数据集转换为训练使用的npy与pkl文件。

python tools/kinetics_gendata.py

这里需要注意根据自己的数据集，修改kinetics_gendata.py中的内容，包括37-39行的num_person_in、num_person_out和max_frame，55行的关键点个数，72~83行的数据读取路径。feeder_kinetics.py中也要相应的修改。

 num_person_in=5,  #observe the first 5 persons num_person_out=2,  #then choose 2 persons with the highest scoremax_frame=300):

 shape=(len(sample_name), 3, max_frame, 18, num_person_out))

 # output data shape (N, C, T, V, M)self.N = len(self.sample_name)  #sampleself.C = 3  #channelself.T = 300  #frameself.V = 18  #jointself.M = self.num_person_out  #person

修改后运行脚本，转换生成npy与pkl文件如图。

四、添加Layout

在net/utils/graph.py文件里面get_edge函数中增加一个elif，num_node为关键点个数、self_link为连接关系。如下添加的是一个‘my_pose’Layout，关键点个数为20（默认的pose点数是18）。

注意这里的默认的layout如果符合自己定义的姿态就不用修改，否则需要自定义一个。其中num_node为关键点的个数，neighbor_link为关键点连接关系。如果新定义的姿态点数不为18，在后续转换中可能还有修改保持一致。

elif layout == 'my_pose':self.num_node = 20self_link = [(i, i) for i in range(self.num_node)]neighbor_link = [(0, 1), (0, 3), (1, 2), (3, 4), (0, 5), (0, 11),(5, 6), (6, 7), (11, 12), (12, 13),(5, 8), (11, 14), (8, 9), (9, 10), (14, 15), (15, 16),(17, 18), (8, 19), (14, 19), (17, 5), (17, 8), (17, 11),(17, 14)]self.edge = self_link + neighbor_linkself.center = 1

五、修改训练参数

修改config/st_gcn/kinetics-skeleton/train.yaml中的相关参数。
data_path和label_path修改为之前生成的文件路径；
num_class改为自建数据集的行为类别个数；
layout参数修改为之前添加的layout类别；
strategy设置为spatial；
由于我使用多GPU训练报错，所以设置device: [0]；
window_size可适当增大；
调整batch_size、学习率和迭代次数等。

六、开始训练

执行训练代码：

python main.py recognition -c config/st_gcn/kinetics-skeleton/train.yaml

训练生成的模型在work_dir中，默认每10个epoch保存一次模型。

七、模型测试

import numpy as np
import torch
import torch.nn as nn
from net.utils.graph import Graph
from net.utils.tgcn import ConvTemporalGraphical
# from net.st_gcn import Model
import torch.nn.functional as F
class Model(nn.Module):r"""Spatial temporal graph convolutional networks.Args:in_channels (int): Number of channels in the input datanum_class (int): Number of classes for the classification taskgraph_args (dict): The arguments for building the graphedge_importance_weighting (bool): If ``True``, adds a learnableimportance weighting to the edges of the graph**kwargs (optional): Other parameters for graph convolution unitsShape:- Input: :math:`(N, in_channels, T_{in}, V_{in}, M_{in})`- Output: :math:`(N, num_class)` where:math:`N` is a batch size,:math:`T_{in}` is a length of input sequence,:math:`V_{in}` is the number of graph nodes,:math:`M_{in}` is the number of instance in a frame."""def __init__(self, in_channels=3, num_class=3,edge_importance_weighting=True, **kwargs):super().__init__()# load graphself.graph = Graph()A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False)self.register_buffer('A', A)# build networksspatial_kernel_size = A.size(0)temporal_kernel_size = 9kernel_size = (temporal_kernel_size, spatial_kernel_size)self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}self.st_gcn_networks = nn.ModuleList((st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),st_gcn(64, 64, kernel_size, 1, **kwargs),st_gcn(64, 64, kernel_size, 1, **kwargs),st_gcn(64, 64, kernel_size, 1, **kwargs),st_gcn(64, 128, kernel_size, 2, **kwargs),st_gcn(128, 128, kernel_size, 1, **kwargs),st_gcn(128, 128, kernel_size, 1, **kwargs),st_gcn(128, 256, kernel_size, 2, **kwargs),st_gcn(256, 256, kernel_size, 1, **kwargs),st_gcn(256, 256, kernel_size, 1, **kwargs),))# initialize parameters for edge importance weightingif edge_importance_weighting:self.edge_importance = nn.ParameterList([nn.Parameter(torch.ones(self.A.size()))for i in self.st_gcn_networks])else:self.edge_importance = [1] * len(self.st_gcn_networks)# fcn for predictionself.fcn = nn.Conv2d(256, num_class, kernel_size=1)def forward(self, x):# data normalizationN, C, T, V, M = x.size()x = x.permute(0, 4, 3, 1, 2).contiguous()x = x.view(N * M, V * C, T)x = self.data_bn(x)x = x.view(N, M, V, C, T)x = x.permute(0, 1, 3, 4, 2).contiguous()x = x.view(N * M, C, T, V)# forwadfor gcn, importance in zip(self.st_gcn_networks, self.edge_importance):x, _ = gcn(x, self.A * importance)# global poolingx = F.avg_pool2d(x, x.size()[2:])x = x.view(N, M, -1, 1, 1).mean(dim=1)# predictionx = self.fcn(x)x = x.view(x.size(0), -1)return xdef extract_feature(self, x):# data normalizationN, C, T, V, M = x.size()x = x.permute(0, 4, 3, 1, 2).contiguous()x = x.view(N * M, V * C, T)x = self.data_bn(x)x = x.view(N, M, V, C, T)x = x.permute(0, 1, 3, 4, 2).contiguous()x = x.view(N * M, C, T, V)# forwadfor gcn, importance in zip(self.st_gcn_networks, self.edge_importance):x, _ = gcn(x, self.A * importance)_, c, t, v = x.size()feature = x.view(N, M, c, t, v).permute(0, 2, 3, 4, 1)# predictionx = self.fcn(x)output = x.view(N, M, -1, t, v).permute(0, 2, 3, 4, 1)return output, featureclass st_gcn(nn.Module):r"""Applies a spatial temporal graph convolution over an input graph sequence.Args:in_channels (int): Number of channels in the input sequence dataout_channels (int): Number of channels produced by the convolutionkernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernelstride (int, optional): Stride of the temporal convolution. Default: 1dropout (int, optional): Dropout rate of the final output. Default: 0residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True``Shape:- Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format- Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format- Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format- Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` formatwhere:math:`N` is a batch size,:math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,:math:`T_{in}/T_{out}` is a length of input/output sequence,:math:`V` is the number of graph nodes."""def __init__(self,in_channels,out_channels,kernel_size,stride=1,dropout=0,residual=True):super().__init__()assert len(kernel_size) == 2assert kernel_size[0] % 2 == 1padding = ((kernel_size[0] - 1) // 2, 0)self.gcn = ConvTemporalGraphical(in_channels, out_channels,kernel_size[1])self.tcn = nn.Sequential(nn.BatchNorm2d(out_channels),nn.ReLU(inplace=True),nn.Conv2d(out_channels,out_channels,(kernel_size[0], 1),(stride, 1),padding,),nn.BatchNorm2d(out_channels),nn.Dropout(dropout, inplace=True),)if not residual:self.residual = lambda x: 0elif (in_channels == out_channels) and (stride == 1):self.residual = lambda x: xelse:self.residual = nn.Sequential(nn.Conv2d(in_channels,out_channels,kernel_size=1,stride=(stride, 1)),nn.BatchNorm2d(out_channels),)self.relu = nn.ReLU(inplace=True)def forward(self, x, A):res = self.residual(x)x, A = self.gcn(x, A)x = self.tcn(x) + resreturn self.relu(x), Aif __name__ == '__main__':# set paramsweights_path = './work_dir/recognition/pig18/epoch300_model.pt'label_list = ['standing', 'walking', 'laying']data_path = "./data/pig18/val_data.npy"label_path = "./data/pig18/val_label.pkl"model = Model().to('cuda:0')weights = torch.load(weights_path)model.load_state_dict(weights)model.eval()val_data = np.load(data_path)f = open(label_path, 'rb')label_data = pickle.load(f)Num_ture = [0] * (len(label_list)+1)Num_total = [0] * (len(label_list)+1)'''data1 = torch.tensor(val_data[0]).unsqueeze(0)data1 = data1.float().to("cuda:0")traced_model = torch.jit.trace(model, data1)  #trace模型'''for data in val_data:data = torch.tensor(data).unsqueeze(0)data = data.float().to("cuda:0")output = model(data).data.cpu().numpy()[0].tolist()pred_index = output.index(max(output))label_index = label_data[1][Num_total[-1]]print("Label/Pred:  {}/{}".format(label_list[label_index], label_list[pred_index]))for l_idx in range(len(label_list)):if label_index == l_idx:Num_total[l_idx] += 1if pred_index==label_index:Num_ture[l_idx] += 1Num_ture[-1] += 1Num_total[-1] += 1for idx in range(len(label_list)):print("Accuracy for {}-{}: {}, TP: {}, Total_num: {}".format(idx, label_list[idx], Num_ture[idx]/Num_total[idx], Num_ture[idx], Num_total[idx]))print("Total Accuracy: {}".format(Num_ture[-1]/Num_total[-1]))# # traced_model.save("stgcn_torchscript.pt")  #保存trace的模型