【MMPose】在HRNet应用SimDR（SimCC）/Part.3-处理头篇（Head）

github代码已经上传：mmpose_simDR

SimDR（现在已经改名叫SimCC，后文还是称SimDR）将姿态估计的Heatmap方法转换为分类方法，在HRNet上实现了涨点，并且减小了显存占用。作者已经在github上开源了代码，但是在MMPose上目前还没有实现，所以本篇文章就使用HRNet在MMPose上实现SimDR。

SimDR原文： Is 2D Heatmap Representation Even Necessary for Human Pose Estimation?

SimDR开源代码：SimCC

因为在MMPose上修改的部分较多，所以文章会分为以下几个部分：

配置文件篇：因为MMPose使用了配置文件（.py）来进行各种参数的设置，所以我们先将会用到的参数先在配置文件中给出，方便后面修改代码时的调用与理解；
流水线篇（Pipeline）：这部分主要是进行数据预处理，因为SimDR网络的输出分为x和y轴两个，所以需要对数据集处理的代码进行些许更改；
处理头篇（Head）：这部分主要就是网络和损失函数的修改；
检测器篇（Detector）：对训练和验证部分的代码进行一定的修改。

下面开始Part.3 部分

1.新建处理头（Head）

处理头主要负责对网络输出进行处理，例如最后的线性层和损失函数等。拷贝mmpose\models\heads\topdown_heatmap_simple_head.py并重命名为mmpose\models\heads\simDR_head.py，接下来代码会在此基础上进行修改。

创建继承TopdownHeatmapBaseHead的类simDRHead：

import torch
import torch.nn as nn
from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,constant_init, normal_init)from mmpose.models.builder import build_loss
from mmpose.models.utils.ops import resize
from ..builder import HEADS
from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
from einops import rearrange, repeat
import torch.nn.functional as F
import numpy as np
import cv2@HEADS.register_module()
class simDRHead(TopdownHeatmapBaseHead):"""simDR HeadArgs:in_channels (int): Number of input channelsout_channels (int): Number of output channelsnum_deconv_layers (int): Number of deconv layers.num_deconv_layers should >= 0. Note that 0 meansno deconv layers.num_deconv_filters (list|tuple): Number of filters.If num_deconv_layers > 0, the length ofnum_deconv_kernels (list|tuple): Kernel sizes.in_index (int|Sequence[int]): Input feature index. Default: 0input_transform (str|None): Transformation type of input features.Options: 'resize_concat', 'multiple_select', None.Default: None.- 'resize_concat': Multiple feature maps will be resized to thesame size as the first one and then concat together.Usually used in FCN head of HRNet.- 'multiple_select': Multiple feature maps will be bundle intoa list and passed into decode head.- None: Only one select feature map is allowed.align_corners (bool): align_corners argument of F.interpolate.Default: False.loss_keypoint (dict): Config for keypoint loss. Default: None."""pass

同样在mmpose\models\heads\__init__.py添加创建的处理头：

# Copyright (c) OpenMMLab. All rights reserved.
from .ae_higher_resolution_head import AEHigherResolutionHead
from .ae_multi_stage_head import AEMultiStageHead
from .ae_simple_head import AESimpleHead
from .cid_head import CIDHead
from .deconv_head import DeconvHead
from .deeppose_regression_head import DeepposeRegressionHead
from .dekr_head import DEKRHead
from .hmr_head import HMRMeshHead
from .interhand_3d_head import Interhand3DHead
from .mtut_head import MultiModalSSAHead
from .temporal_regression_head import TemporalRegressionHead
from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
from .topdown_heatmap_multi_stage_head import (TopdownHeatmapMSMUHead,TopdownHeatmapMultiStageHead)
from .topdown_heatmap_simple_head import TopdownHeatmapSimpleHead
from .vipnas_heatmap_simple_head import ViPNASHeatmapSimpleHead
from .voxelpose_head import CuboidCenterHead, CuboidPoseHead
from .simDR_head import simDRHead
__all__ = ['TopdownHeatmapSimpleHead', 'TopdownHeatmapMultiStageHead','TopdownHeatmapMSMUHead', 'TopdownHeatmapBaseHead','AEHigherResolutionHead', 'AESimpleHead', 'AEMultiStageHead', 'CIDHead','DeepposeRegressionHead', 'TemporalRegressionHead', 'Interhand3DHead','HMRMeshHead', 'DeconvHead', 'ViPNASHeatmapSimpleHead', 'CuboidCenterHead','CuboidPoseHead', 'MultiModalSSAHead', 'DEKRHead','simDRHead'
]

这样就能在配置文件中直接调用我们创建的处理头了。

2.添加evaluation需要的函数

因为处理头涉及到损失和验证函数，所以需要对验证函数做些许修改，本来验证函数是在mmpose.core.evaluation里面，但是为了不修改mmpose源码我将验证函数之间写到了处理头里。想要更加标准化的话可以自定义一个evaluation.py放在core文件夹里。

def transform_preds(coords, center, scale, output_size):target_coords = np.zeros(coords.shape)trans = get_affine_transform(center, scale, 0, output_size, inv=1)for p in range(coords.shape[0]):target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)return target_coordsdef get_affine_transform(center, scale, rot, output_size,shift=np.array([0, 0], dtype=np.float32), inv=0
):if not isinstance(scale, np.ndarray) and not isinstance(scale, list):print(scale)scale = np.array([scale, scale])scale_tmp = scale * 200.0src_w = scale_tmp[0]dst_w = output_size[0]dst_h = output_size[1]rot_rad = np.pi * rot / 180src_dir = get_dir([0, src_w * -0.5], rot_rad)dst_dir = np.array([0, dst_w * -0.5], np.float32)src = np.zeros((3, 2), dtype=np.float32)dst = np.zeros((3, 2), dtype=np.float32)src[0, :] = center + scale_tmp * shiftsrc[1, :] = center + src_dir + scale_tmp * shiftdst[0, :] = [dst_w * 0.5, dst_h * 0.5]dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dirsrc[2:, :] = get_3rd_point(src[0, :], src[1, :])dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])if inv:trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))else:trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))return transdef affine_transform(pt, t):new_pt = np.array([pt[0], pt[1], 1.]).Tnew_pt = np.dot(t, new_pt)return new_pt[:2]def get_3rd_point(a, b):direct = a - breturn b + np.array([-direct[1], direct[0]], dtype=np.float32)def get_dir(src_point, rot_rad):sn, cs = np.sin(rot_rad), np.cos(rot_rad)src_result = [0, 0]src_result[0] = src_point[0] * cs - src_point[1] * snsrc_result[1] = src_point[0] * sn + src_point[1] * csreturn src_resultdef flip_back_simdr(output_flipped, matched_parts, type='x'):'''ouput_flipped: numpy.ndarray(batch_size, num_joints, onehot)'''assert output_flipped.ndim == 3,\'output_flipped should be [batch_size, num_joints, onehot]'if type == 'x':output_flipped = output_flipped[:, :, ::-1]for pair in matched_parts:tmp = output_flipped[:, pair[0], :].copy()output_flipped[:, pair[0], :] = output_flipped[:, pair[1], :]output_flipped[:, pair[1], :] = tmpreturn output_flippeddef _calc_distances(preds, targets, mask, normalize):"""Calculate the normalized distances between preds and target.Note:batch_size: Nnum_keypoints: Kdimension of keypoints: D (normally, D=2 or D=3)Args:preds (np.ndarray[N, K, D]): Predicted keypoint location.targets (np.ndarray[N, K, D]): Groundtruth keypoint location.mask (np.ndarray[N, K]): Visibility of the target. False for invisiblejoints, and True for visible. Invisible joints will be ignored foraccuracy calculation.normalize (np.ndarray[N, D]): Typical value is heatmap_sizeReturns:np.ndarray[K, N]: The normalized distances. \If target keypoints are missing, the distance is -1."""N, K, _ = preds.shape# set mask=0 when normalize==0_mask = mask.copy()_mask[np.where((normalize == 0).sum(1))[0], :] = Falsedistances = np.full((N, K), -1, dtype=np.float32)# handle invalid valuesnormalize[np.where(normalize <= 0)] = 1e6distances[_mask] = np.linalg.norm(((preds - targets) / normalize[:, None, :])[_mask], axis=-1)return distances.Tdef _distance_acc(distances, thr=0.5):"""Return the percentage below the distance threshold, while ignoringdistances values with -1.Note:batch_size: NArgs:distances (np.ndarray[N, ]): The normalized distances.thr (float): Threshold of the distances.Returns:float: Percentage of distances below the threshold. \If all target keypoints are missing, return -1."""distance_valid = distances != -1num_distance_valid = distance_valid.sum()if num_distance_valid > 0:return (distances[distance_valid] < thr).sum() / num_distance_validreturn -1def keypoint_pck_accuracy(pred, gt, mask, thr, normalize):"""Calculate the pose accuracy of PCK for each individual keypoint and theaveraged accuracy across all keypoints for coordinates.Note:PCK metric measures accuracy of the localization of the body joints.The distances between predicted positions and the ground-truth onesare typically normalized by the bounding box size.The threshold (thr) of the normalized distance is commonly setas 0.05, 0.1 or 0.2 etc.- batch_size: N- num_keypoints: KArgs:pred (np.ndarray[N, K, 2]): Predicted keypoint location.gt (np.ndarray[N, K, 2]): Groundtruth keypoint location.mask (np.ndarray[N, K]): Visibility of the target. False for invisiblejoints, and True for visible. Invisible joints will be ignored foraccuracy calculation.thr (float): Threshold of PCK calculation.normalize (np.ndarray[N, 2]): Normalization factor for H&W.Returns:tuple: A tuple containing keypoint accuracy.- acc (np.ndarray[K]): Accuracy of each keypoint.- avg_acc (float): Averaged accuracy across all keypoints.- cnt (int): Number of valid keypoints."""distances = _calc_distances(pred, gt, mask, normalize)acc = np.array([_distance_acc(d, thr) for d in distances])valid_acc = acc[acc >= 0]cnt = len(valid_acc)avg_acc = valid_acc.mean() if cnt > 0 else 0return acc, avg_acc, cntdef _get_max_preds(heatmaps):"""Get keypoint predictions from score maps.Note:batch_size: Nnum_keypoints: Kheatmap height: Hheatmap width: WArgs:heatmaps (np.ndarray[N, K, H, W]): model predicted heatmaps.Returns:tuple: A tuple containing aggregated results.- preds (np.ndarray[N, K, 2]): Predicted keypoint location.- maxvals (np.ndarray[N, K, 1]): Scores (confidence) of the keypoints."""assert isinstance(heatmaps,np.ndarray), ('heatmaps should be numpy.ndarray')assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'N, K, _, W = heatmaps.shapeheatmaps_reshaped = heatmaps.reshape((N, K, -1))idx = np.argmax(heatmaps_reshaped, 2).reshape((N, K, 1))maxvals = np.amax(heatmaps_reshaped, 2).reshape((N, K, 1))preds = np.tile(idx, (1, 1, 2)).astype(np.float32)preds[:, :, 0] = preds[:, :, 0] % Wpreds[:, :, 1] = preds[:, :, 1] // Wpreds = np.where(np.tile(maxvals, (1, 1, 2)) > 0.0, preds, -1)return preds, maxvalsdef pose_pck_accuracy_DR(output, target, mask, cfg,thr=0.05, normalize=None):"""Calculate the pose accuracy of PCK for each individual keypoint and theaveraged accuracy across all keypoints from heatmaps.Note:PCK metric measures accuracy of the localization of the body joints.The distances between predicted positions and the ground-truth onesare typically normalized by the bounding box size.The threshold (thr) of the normalized distance is commonly setas 0.05, 0.1 or 0.2 etc.- batch_size: N- num_keypoints: K- heatmap height: H- heatmap width: WArgs:output (np.ndarray[N, K, H, W]): Model output heatmaps.target (np.ndarray[N, K, H, W]): Groundtruth heatmaps.mask (np.ndarray[N, K]): Visibility of the target. False for invisiblejoints, and True for visible. Invisible joints will be ignored foraccuracy calculation.thr (float): Threshold of PCK calculation. Default 0.05.normalize (np.ndarray[N, 2]): Normalization factor for H&W.Returns:tuple: A tuple containing keypoint accuracy.- np.ndarray[K]: Accuracy of each keypoint.- float: Averaged accuracy across all keypoints.- int: Number of valid keypoints."""if isinstance(output, tuple):if isinstance(target, tuple) or isinstance(target, list):output_x,output_y=outputtarget_x,target_y=targetoutput_x = F.softmax(output_x,dim=2)output_y = F.softmax(output_y,dim=2) _, preds_x = output_x.max(2,keepdim=True)_, preds_y = output_y.max(2,keepdim=True)output = torch.ones([preds_x.size(0),preds_x.size(1),2])output[:,:,0] = torch.squeeze(torch.true_divide(preds_x, cfg['SIMDR_SPLIT_RATIO']))output[:,:,1] = torch.squeeze(torch.true_divide(preds_y, cfg['SIMDR_SPLIT_RATIO']))del preds_x,preds_y,output_x,output_ypred=output.detach().cpu().numpy()_, target_x = target_x.max(2,keepdim=True)_, target_y = target_y.max(2,keepdim=True)target = torch.ones([target_x.size(0),target_x.size(1),2])target[:,:,0] = torch.squeeze(torch.true_divide(target_x, cfg['SIMDR_SPLIT_RATIO']))target[:,:,1] = torch.squeeze(torch.true_divide(target_y, cfg['SIMDR_SPLIT_RATIO']))del target_x,target_ygt=target.detach().cpu().numpy()else:    output_x,output_y=outputoutput_x = F.softmax(output_x,dim=2)output_y = F.softmax(output_y,dim=2) _, preds_x = output_x.max(2,keepdim=True)_, preds_y = output_y.max(2,keepdim=True)output = torch.ones([preds_x.size(0),preds_x.size(1),2])output[:,:,0] = torch.squeeze(torch.true_divide(preds_x, cfg['SIMDR_SPLIT_RATIO']))output[:,:,1] = torch.squeeze(torch.true_divide(preds_y, cfg['SIMDR_SPLIT_RATIO']))del preds_x,preds_ypred=output.detach().cpu().numpy()target=target.detach().cpu().numpy()gt, _ = _get_max_preds(target)N, K, _ = output.shapeH,W=cfg['image_size']if K == 0:return None, 0, 0if normalize is None:normalize = np.tile(np.array([[H, W]]), (N, 1))return keypoint_pck_accuracy(pred, gt, mask, thr, normalize)

以上函数我写在了类外，其实更建议写在类里。下面要开始编写处理头类的内容了。

3.simDR处理头的init()

因为simDR需要传入额外的几个参数，所以在编写配置文件时，我们在extra中加入了几行：

keypoint_head=dict(type='simDRHead',in_channels=40,out_channels=channel_cfg['num_output_channels'],num_deconv_layers=0,extra=dict(final_conv_kernel=1, HEAD_INPUT=data_cfg['heatmap_size'][0]*data_cfg['heatmap_size'][1],image_size=data_cfg['image_size'],SIMDR_SPLIT_RATIO=simdr_split_ratio,coord_representation='sa-simdr',NUM_JOINTS=channel_cfg['dataset_joints']),loss_keypoint=dict(type='KLDiscretLoss')),

配置文件的参数会传入处理头的__init__函数：

def __init__(self,in_channels,out_channels,num_deconv_layers=3,num_deconv_filters=(256, 256, 256),num_deconv_kernels=(4, 4, 4),extra=None,in_index=0,input_transform=None,align_corners=False,loss_keypoint=None,train_cfg=None,test_cfg=None):super().__init__()self.in_channels = in_channelsself.loss = build_loss(loss_keypoint)self.train_cfg = {} if train_cfg is None else train_cfgself.test_cfg = {} if test_cfg is None else test_cfgself.target_type = self.test_cfg.get('target_type', 'GaussianHeatmap')self._init_inputs(in_channels, in_index, input_transform)self.in_index = in_indexself.align_corners = align_cornersif extra is not None and not isinstance(extra, dict):raise TypeError('extra should be dict or None.')if num_deconv_layers > 0:self.deconv_layers = self._make_deconv_layer(num_deconv_layers,num_deconv_filters,num_deconv_kernels,)elif num_deconv_layers == 0:self.deconv_layers = nn.Identity()else:raise ValueError(f'num_deconv_layers ({num_deconv_layers}) should >= 0.')identity_final_layer = Falseif extra is not None and 'final_conv_kernel' in extra:assert extra['final_conv_kernel'] in [0, 1, 3]if extra['final_conv_kernel'] == 3:padding = 1elif extra['final_conv_kernel'] == 1:padding = 0else:# 0 for Identity mapping.identity_final_layer = Truekernel_size = extra['final_conv_kernel']else:kernel_size = 1padding = 0if identity_final_layer:self.final_layer = nn.Identity()else:conv_channels = num_deconv_filters[-1] if num_deconv_layers > 0 else self.in_channelslayers = []if extra is not None:num_conv_layers = extra.get('num_conv_layers', 0)num_conv_kernels = extra.get('num_conv_kernels',[1] * num_conv_layers)for i in range(num_conv_layers):layers.append(build_conv_layer(dict(type='Conv2d'),in_channels=conv_channels,out_channels=conv_channels,kernel_size=num_conv_kernels[i],stride=1,padding=(num_conv_kernels[i] - 1) // 2))layers.append(build_norm_layer(dict(type='BN'), conv_channels)[1])layers.append(nn.ReLU(inplace=True))layers.append(build_conv_layer(cfg=dict(type='Conv2d'),in_channels=conv_channels,out_channels=out_channels,kernel_size=kernel_size,stride=1,padding=padding))if len(layers) > 1:self.final_layer = nn.Sequential(*layers)else:self.final_layer = layers[0]self.extra = extraself.coord_representation = extra['coord_representation']assert  self.coord_representation in ['simdr', 'sa-simdr', 'heatmap'], 'only simdr and sa-simdr and heatmap supported for pose_resnet_upfree'if self.coord_representation == 'simdr' or self.coord_representation == 'sa-simdr':self.mlp_head_x = nn.Linear(self.extra['HEAD_INPUT'], int(self.extra['image_size'][0]*self.extra['SIMDR_SPLIT_RATIO']))self.mlp_head_y = nn.Linear(self.extra['HEAD_INPUT'], int(self.extra['image_size'][1]*self.extra['SIMDR_SPLIT_RATIO']))

其实与TopdownHeatmapSimpleHead类的内容大同小异，只是根据simDR的源码添加了几行，主要是线性层的创建。

4.simDR检测头的损失函数部分

simDR类中的get_loss（）与TopdownHeatmapSimpleHead类一样不需要修改，这里要修改的是损失函数。

在mmpose\models\losses路径新建simDR_loss.py文件：

import torch
import torch.nn as nnfrom ..builder import LOSSES@LOSSES.register_module()
class KLDiscretLoss(nn.Module):def __init__(self):super(KLDiscretLoss, self).__init__()self.LogSoftmax = nn.LogSoftmax(dim=1) #[B,LOGITS]self.criterion_ = nn.KLDivLoss(reduction='none')def criterion(self, dec_outs, labels):scores = self.LogSoftmax(dec_outs)loss = torch.mean(self.criterion_(scores, labels), dim=1) return lossdef forward(self, output, target, target_weight):output_x=output[0]output_y=output[1]target_x=target[0]target_y=target[1]num_joints = output_x.size(1)loss = 0for idx in range(num_joints):coord_x_pred = output_x[:,idx].squeeze()coord_y_pred = output_y[:,idx].squeeze()coord_x_gt = target_x[:,idx].squeeze()coord_y_gt = target_y[:,idx].squeeze()weight = target_weight[:,idx].squeeze()loss += (self.criterion(coord_x_pred,coord_x_gt).mul(weight).mean()) loss += (self.criterion(coord_y_pred,coord_y_gt).mul(weight).mean())return loss / num_joints @LOSSES.register_module()
class NMTNORMCritierion(nn.Module):def __init__(self, label_smoothing=0.0):super(NMTNORMCritierion, self).__init__()self.label_smoothing = label_smoothingself.LogSoftmax = nn.LogSoftmax(dim=1) #[B,LOGITS]if label_smoothing > 0:self.criterion_ = nn.KLDivLoss(reduction='none')else:self.criterion_ = nn.NLLLoss(reduction='none', ignore_index=100000)self.confidence = 1.0 - label_smoothingdef _smooth_label(self, num_tokens):one_hot = torch.randn(1, num_tokens)one_hot.fill_(self.label_smoothing / (num_tokens - 1))return one_hotdef _bottle(self, v):return v.view(-1, v.size(2))def criterion(self, dec_outs, labels):scores = self.LogSoftmax(dec_outs)num_tokens = scores.size(-1)# conduct label_smoothing modulegtruth = labels.view(-1)if self.confidence < 1:tdata = gtruth.detach()one_hot = self._smooth_label(num_tokens)  # Do label smoothing, shape is [M]if labels.is_cuda:one_hot = one_hot.cuda()tmp_ = one_hot.repeat(gtruth.size(0), 1)  # [N, M]tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence)  # after tdata.unsqueeze(1) , tdata shape is [N,1]gtruth = tmp_.detach()loss = torch.mean(self.criterion_(scores, gtruth), dim=1)return lossdef forward(self, output, target, target_weight):output_x, output_y=outputbatch_size = output_x.size(0)num_joints = output_x.size(1)loss = 0for idx in range(num_joints):coord_x_pred = output_x[:,idx].squeeze()coord_y_pred = output_y[:,idx].squeeze()coord_gt = target[:,idx].squeeze()weight = target_weight[:,idx].squeeze()loss += self.criterion(coord_x_pred,coord_gt[:,0]).mul(weight).mean()loss += self.criterion(coord_y_pred,coord_gt[:,1]).mul(weight).mean()return loss / num_joints@LOSSES.register_module()
class NMTCritierion(nn.Module):def __init__(self, label_smoothing=0.0):super(NMTCritierion, self).__init__()self.label_smoothing = label_smoothingself.LogSoftmax = nn.LogSoftmax(dim=1) #[B,LOGITS]if label_smoothing > 0:self.criterion_ = nn.KLDivLoss(reduction='none')else:self.criterion_ = nn.NLLLoss(reduction='none', ignore_index=100000)self.confidence = 1.0 - label_smoothingdef _smooth_label(self, num_tokens):one_hot = torch.randn(1, num_tokens)one_hot.fill_(self.label_smoothing / (num_tokens - 1))return one_hotdef _bottle(self, v):return v.view(-1, v.size(2))def criterion(self, dec_outs, labels):scores = self.LogSoftmax(dec_outs)num_tokens = scores.size(-1)# conduct label_smoothing modulegtruth = labels.view(-1)if self.confidence < 1:tdata = gtruth.detach()one_hot = self._smooth_label(num_tokens)  # Do label smoothing, shape is [M]if labels.is_cuda:one_hot = one_hot.cuda()tmp_ = one_hot.repeat(gtruth.size(0), 1)  # [N, M]tmp_.scatter_(1, tdata.unsqueeze(1), self.confidence)  # after tdata.unsqueeze(1) , tdata shape is [N,1]gtruth = tmp_.detach()loss = torch.sum(self.criterion_(scores, gtruth), dim=1)return lossdef forward(self, output, target, target_weight):output_x, output_y=outputbatch_size = output_x.size(0)num_joints = output_x.size(1)loss = 0for idx in range(num_joints):coord_x_pred = output_x[:,idx].squeeze()coord_y_pred = output_y[:,idx].squeeze()coord_gt = target[:,idx].squeeze()weight = target_weight[:,idx].squeeze()loss += self.criterion(coord_x_pred,coord_gt[:,0]).mul(weight).sum()loss += self.criterion(coord_y_pred,coord_gt[:,1]).mul(weight).sum()return loss / batch_size

这些损失函数从simDR源码得来。然后同样别忘了在mmpose\models\losses\__init__.py中做修改：

from .classfication_loss import BCELoss
from .heatmap_loss import AdaptiveWingLoss, FocalHeatmapLoss
from .mesh_loss import GANLoss, MeshLoss
from .mse_loss import JointsMSELoss, JointsOHKMMSELoss
from .multi_loss_factory import AELoss, HeatmapLoss, MultiLossFactory
from .regression_loss import (BoneLoss, L1Loss, MPJPELoss, MSELoss, RLELoss,SemiSupervisionLoss, SmoothL1Loss,SoftWeightSmoothL1Loss, SoftWingLoss, WingLoss)
from .simDR_loss import NMTCritierion,NMTNORMCritierion,KLDiscretLoss
__all__ = ['JointsMSELoss', 'JointsOHKMMSELoss', 'HeatmapLoss', 'AELoss','MultiLossFactory', 'MeshLoss', 'GANLoss', 'SmoothL1Loss', 'WingLoss','MPJPELoss', 'MSELoss', 'L1Loss', 'BCELoss', 'BoneLoss','SemiSupervisionLoss', 'SoftWingLoss', 'AdaptiveWingLoss', 'RLELoss','SoftWeightSmoothL1Loss', 'FocalHeatmapLoss','NMTCritierion','NMTNORMCritierion','KLDiscretLoss'
]

5.SimDR检测头的get_accuracy函数

主要会用到前面定义的验证函数：

def get_accuracy(self, output, target, target_weight):"""Calculate accuracy for top-down keypoint loss.Note:- batch_size: N- num_keypoints: K- heatmaps height: H- heatmaps weight: WArgs:output (torch.Tensor[N,K,H,W]): Output heatmaps.target (torch.Tensor[N,K,H,W]): Target heatmaps.target_weight (torch.Tensor[N,K,1]):Weights across different joint types."""accuracy = dict()if self.target_type == 'GaussianHeatmap':_, avg_acc, _ = pose_pck_accuracy_DR(output,target,target_weight.detach().cpu().numpy().squeeze(-1) > 0,self.extra)accuracy['acc_pose'] = float(avg_acc)return accuracy

6.SimDR检测头的forward函数

根据simDR源码，在final_layer之后将heatmap拆分为两个一维矩阵

def forward(self, x):"""Forward function."""x = self._transform_inputs(x)x = self.deconv_layers(x)x = self.final_layer(x)if self.coord_representation == 'heatmap':return xelif self.coord_representation == 'simdr' or self.coord_representation == 'sa-simdr':x = rearrange(x, 'b c h w -> b c (h w)')pred_x = self.mlp_head_x(x)pred_y = self.mlp_head_y(x)return (pred_x, pred_y)

因为输出由一个变为两个，为了避免报错使用元组进行传输。

7.SimDR检测头的inference_model函数

因为heatmap改变了，所以推理时的flip和shift操作有所变化。

def inference_model(self, x, flip_pairs=None):"""Inference function.Returns:output_heatmap (np.ndarray): Output heatmaps.Args:x (torch.Tensor[N,K,H,W]): Input features.flip_pairs (None | list[tuple]):Pairs of keypoints which are mirrored."""output_x, output_y = self.forward(x)if flip_pairs is not None:output_x = flip_back_simdr(output_x.cpu().numpy(),flip_pairs,type='x')output_y = flip_back_simdr(output_y.cpu().numpy(),flip_pairs,type='y')output_x = torch.from_numpy(output_x.copy()).cuda()output_y = torch.from_numpy(output_y.copy()).cuda()# feature is not aligned, shift flipped heatmap for higher accuracyif self.test_cfg.get('shift_heatmap', False):output_x[:, :, 0:-1] = output_x[:, :, 1:]return (output_x,output_y)

8.SimDR检测头的decode函数

def decode(self, img_metas, output, **kwargs):"""Decode keypoints from heatmaps.Args:img_metas (list(dict)): Information about data augmentationBy default this includes:- "image_file: path to the image file- "center": center of the bbox- "scale": scale of the bbox- "rotation": rotation of the bbox- "bbox_score": score of bboxoutput (np.ndarray[N, K, H, W]): model predicted heatmaps."""output_x,output_y=outputoutput_x = F.softmax(output_x,dim=2)output_y = F.softmax(output_y,dim=2) max_val_x, preds_x = output_x.max(2,keepdim=True)max_val_y, preds_y = output_y.max(2,keepdim=True)mask = max_val_x > max_val_ymax_val_x[mask] = max_val_y[mask]maxvals = max_val_x.detach().cpu().numpy()output = torch.ones([preds_x.size(0),preds_x.size(1),2])output[:,:,0] = torch.squeeze(torch.true_divide(preds_x, self.extra['SIMDR_SPLIT_RATIO']))output[:,:,1] = torch.squeeze(torch.true_divide(preds_y, self.extra['SIMDR_SPLIT_RATIO']))del preds_x,preds_y,output_x,output_ypreds=output.detach().cpu().numpy()batch_size = len(img_metas)if 'bbox_id' in img_metas[0]:bbox_ids = []else:bbox_ids = Nonec = np.zeros((batch_size, 2), dtype=np.float32)s = np.zeros((batch_size, 2), dtype=np.float32)image_paths = []score = np.ones(batch_size)for i in range(batch_size):c[i, :] = img_metas[i]['center']s[i, :] = img_metas[i]['scale']image_paths.append(img_metas[i]['image_file'])if 'bbox_score' in img_metas[i]:score[i] = np.array(img_metas[i]['bbox_score']).reshape(-1)if bbox_ids is not None:bbox_ids.append(img_metas[i]['bbox_id'])for i in range(output.shape[0]):preds[i] = transform_preds(output[i], c[i], s[i], [self.extra['image_size'][0], self.extra['image_size'][1]])all_preds = np.zeros((batch_size, preds.shape[1], 3), dtype=np.float32)all_boxes = np.zeros((batch_size, 6), dtype=np.float32)all_preds[:, :, 0:2] = preds[:, :, 0:2]all_preds[:, :, 2:3] = maxvalsall_boxes[:, 0:2] = c[:, 0:2]all_boxes[:, 2:4] = s[:, 0:2]all_boxes[:, 4] = np.prod(s * 200.0, axis=1)all_boxes[:, 5] = scoreresult = {}result['preds'] = all_predsresult['boxes'] = all_boxesresult['image_paths'] = image_pathsresult['bbox_ids'] = bbox_idsreturn result

因为heatmap有所变化，所以对关节点的decode需要做一定修改。

9.simDR_head.py文件一览

由于篇幅原因，还有几个没有修改过的函数没有提及，请参考TopdownHeatmapSimpleHead类自行添加。文章更新完成后，我会上传项目的完整文件到github。

如果文章对你有有帮助，请动动手指点点收藏和赞，谢谢。