【普通稀疏卷积】

了解完子流形3D稀疏卷积我们再来看spconv中对于普通3D稀疏卷积的处理过程。这要回到spconv_ops.cc文件中，从getIndicePairs的普通3D稀疏卷积分支讲起。

摘自：src/spconv/spconv_ops.cc

std::vector<torch::Tensor>
getIndicePairs(torch::Tensor indices, int64_t batchSize,std::vector<int64_t> outSpatialShape,std::vector<int64_t> spatialShape,std::vector<int64_t> kernelSize, std::vector<int64_t> stride,std::vector<int64_t> padding, std::vector<int64_t> dilation,std::vector<int64_t> outPadding, int64_t _subM,int64_t _transpose, int64_t _useHash) {//...省略....auto indicePairUnique = torch::full({indicePairs.numel() / 2 + 1}, std::numeric_limits<int>::max(),torch::dtype(torch::kInt32).device(indices.device()));torch::Tensor outInds =//e.g. torch.Size([N*27,3+1])torch::zeros({numAct * kernelVolume, coorDim + 1},torch::dtype(torch::kInt32).device(indices.device()));if (indices.device().type() == torch::kCPU) {numActOut = create_conv_indice_pair_cpu(indices, outInds, gridOut, indicePairs, indiceNum, kernelSize, stride,padding, dilation, outSpatialShape, transpose, false, useHash);}#ifdef TV_CUDAelse if (indices.device().type() == torch::kCUDA) {numActOut = create_conv_indice_pair_p1_cuda(indices, indicePairs, indiceNum, indicePairUnique, kernelSize, stride,padding, dilation, outSpatialShape, transpose);if (numActOut > 0) {auto res = torch::_unique(indicePairUnique);indicePairUnique = std::get<0>(res);numActOut = create_conv_indice_pair_p2_cuda(indices, outInds, gridOut, indicePairs, indiceNum, indicePairUnique,outSpatialShape, transpose, false, useHash);if (numActOut == -1) {auto device = indices.device();outInds = outInds.to({torch::kCPU});indicePairs = indicePairs.to({torch::kCPU});indiceNum = indiceNum.to({torch::kCPU});indices = indices.to({torch::kCPU});numActOut = create_conv_indice_pair_cpu(indices, outInds, gridOut, indicePairs, indiceNum, kernelSize,stride, padding, dilation, outSpatialShape, transpose, false,useHash);return {outInds.to(device).slice(0, 0, numActOut),indicePairs.to(device), indiceNum.to(device)};}}}      #endif//...省略...
}

cuda部分的计算逻辑这里分成两步，先调用create_conv_indice_pair_p1_cuda(...)函数，再调用create_conv_indice_pair_p2_cuda(...)函数。在create_conv_indice_pair_p1_cuda(...)函数中，我们重点关注prepareIndicePairsKernel核函数。

摘自：include/spconv/indice.cu.h

  1 template <typename Index, unsigned NDim, int KernelMaxVolume = 256,                                                                                                                                            2           typename Index1D = int>3 __global__ void prepareIndicePairsKernel(4     tv::TensorView<const Index> indicesIn, tv::TensorView<Index> indicePairs,5     tv::TensorView<Index> indiceNum, tv::TensorView<Index1D> indicePairUnique,6     const tv::SimpleVector<Index, NDim> kernelSize,7     const tv::SimpleVector<Index, NDim> stride,8     const tv::SimpleVector<Index, NDim> padding,9     const tv::SimpleVector<Index, NDim> dilation,10     const tv::SimpleVector<Index, NDim> outSpatialShape) {11   auto numActIn = indicesIn.dim(0);12   Index spatialVolume = 1;13 #pragma unroll14   for (int i = 0; i < NDim; ++i) {15     spatialVolume *= outSpatialShape[i];16   }17   Index kernelVolume = 1;18 #pragma unroll19   for (int i = 0; i < NDim; ++i) {20     kernelVolume *= kernelSize[i];21   }22   Index numValidPoints = 0;23   Index validPoints[KernelMaxVolume * (NDim + 1)]; //kernelMaxVolume??24   Index *pointPtr = nullptr;25   auto indicePairsDim2 = indicePairs.dim(2);26   Index index;27   for (int ix : tv::KernelLoopX<int>(numActIn)) {28     numValidPoints = getValidOutPos<Index, NDim>(29         indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),30         stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),31         validPoints);32     for (Index i = 0; i < numValidPoints; ++i) {33       pointPtr = validPoints + i * (NDim + 1);34       auto offset = pointPtr[NDim];35       Index oldNum = atomicAdd(indiceNum.data() + offset, Index(1));36       indicePairs(0, offset, oldNum) = ix;37       index = tv::ArrayIndexRowMajor<NDim, NDim>::runPtrs(38                   pointPtr, outSpatialShape.data(), 0) +39               spatialVolume * indicesIn(ix, 0);40       indicePairs(1, offset, oldNum) = index;41       indicePairUnique[offset * indicePairsDim2 + oldNum] = index;42     }43   }44 }

第17行定义的这个kernelVolume在核函数中并没有用到，不知何意。

 1 template <typename Index, unsigned NDim>                                                                                                                                                                2 TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,3                                     const Index *kernelSize,4                                     const Index *stride, 5                                     const Index *padding,6                                     const Index *dilation,7                                     const Index *outSpatialShape, 8                                     Index *out) {9   Index lowers[NDim];10   Index uppers[NDim];11   Index counter[NDim];12   Index counterSize[NDim];13   Index pointCounter = 0;14   Index val,m,offset;15   Index numPoints = 1;16   bool valid = false;17 #pragma unroll18   for (int i = 0; i < NDim; ++i) {19     lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 +20                  stride[i] + padding[i]) / stride[i];21     uppers[i] = (input_pos[i] + padding[i]) / stride[i];22   }  23 #pragma unroll24   for (unsigned i = 0; i < NDim; ++i) {25     counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1);26     numPoints *= counterSize[i];27   }  28 #pragma unroll29   for (int i = 0; i < NDim; ++i) {30     counter[i] = 0;31   }  32   for (int i = 0; i < numPoints; ++i) {33     valid = true;34     m = 1;35     offset = 0;36 #pragma unroll37     for (int j = NDim - 1; j >= 0; --j) { 38       val = uppers[j] - counter[j] * dilation[j]; 39       out[pointCounter * (NDim + 1) + j] = val;40       if (val < 0 || (val > outSpatialShape[j] - 1)) {41         valid = false; 42         // break;43       }44       offset += m * (input_pos[j] - val * stride[j] + padding[j]) / dilation[j];45       m *= kernelSize[j];46     }47     out[pointCounter * (NDim + 1) + NDim] = offset;48     if (valid) ++pointCounter;49     counter[NDim - 1] += 1;50 #pragma unroll51     for (int c = NDim - 1; c >= 0; --c) {52       if (counter[c] == counterSize[c] && c > 0) {53         counter[c - 1] += 1;54         counter[c] = 0;55       }56     }57   }  58   return pointCounter;59 }

第18～22行对于一个特定的输入，求其在各个维度上的输出边界[lower,upper]。在spconv github项目上我看了网友对它的一番解释，特地将其粘贴至此。

这里计算各个维度上的输出的边界值[lower,upper]，他们是根据给定的参数信息使用公式推导出来的理论值。这里还没有做进一步的越界检查的，后面的代码会做进一步的处理。

第32~57行要做的就是对输出数组(out)做一个有效的填充。你把out理解为一个[N][NDim+1]的二维数组。则每一行表示一个输出位置i，out[i][0]...out[i][NDim-1]存储第i个输出位置的索引。out[i][NDim]存储与输入相作用的kernel的偏移(offset)。

完成getValidOutPos的计算后返回到prepareIndicePairsKernel函数中，依靠getValidOutPos中计算得到的out数组完成rulebook的建立。重点在下面这几行代码:

for (Index i = 0; i < numValidPoints; ++i) {pointPtr = validPoints + i * (NDim + 1);auto offset = pointPtr[NDim];Index oldNum = atomicAdd(indiceNum.data() + offset, Index(1));//offset偏移处,第oldNum次运算,输入索引为idx,输出索引为indexindicePairs(0, offset, oldNum) = ix;index = tv::ArrayIndexRowMajor<NDim, NDim>::runPtrs(pointPtr, outSpatialShape.data(), 0) + spatialVolume * indicesIn(ix, 0);indicePairs(1, offset, oldNum) = index;//off0.....|off1....|off2.....|off3....|...{numActIn}..|....indicePairUnique[offset * indicePairsDim2 + oldNum] = index;
}

【参考文献】

What's the meaning of function "getValidOutPos"? · Issue #224 · traveller59/spconv · GitHub

【OpenPCDet】稀疏卷积SPConv-v1.2代码解读(5)相关推荐

3d稀疏卷积——spconv源码剖析（一）
本节主要是介绍下卷积的理论基础.结合spconv代码剖析从第二小节开始介绍,本节介绍2D和3D卷积基础理论和稀疏卷积分类,后再详细介绍下3d稀疏卷积的工作原理. 2D卷积 2D卷积:卷积核在输入图像的 ...
3d稀疏卷积——spconv源码剖析（三）
构建Rulebook 下面看ops.get_indice_pairs,位于:spconv/ops.py 构建Rulebook由ops.get_indice_pairs接口完成 get_indice_p ...
“看得见的”卷积神经网络（图文并茂+代码解读）（卷积神经网络可视化）
这篇博客主要是想和大家分享一下我学习卷积神经网络可视化之后的总结和心得.学习完卷积神经网络的大致流程之后,会感觉到它和其他深度学习网络一样,像个"黑盒子".我们只知道它有几层,每层 ...
卷积神经网络（CNN）加速器ip设计—1.HLS代码解读
原作者项目:https://github.com/dhm2013724/yolov2_xilinx_fpga 加速器整体结构上图是加速器中所有函数的调用关系,可以看到卷积层,池化层,重拍序层都采用乒 ...
DeepLearning tutorial（4）CNN卷积神经网络原理简介+代码详解
FROM: http://blog.csdn.net/u012162613/article/details/43225445 DeepLearning tutorial(4)CNN卷积神经网络原理简介 ...
VGAE（Variational graph auto-encoders）论文及代码解读
一,论文来源论文pdf Variational graph auto-encoders 论文代码 github代码二,论文解读理论部分参考: Variational Graph Auto-Enc ...
复现CLOCs中spconv v1.0 (commit 8da6f96)踩坑记录
最近看了一篇基于KITTI做2D和3D后融合的论文,CLOCs: Camera-LiDAR Object Candidates Fusion for 3D Object Detection.作者在Gi ...
dlib人脸识别代码解读
文章目录一人脸关键点检测器的训练 1.1 原理 1.1.1 级联回归公式 1.1.2 回归方程求解 1.1.3 分裂点 1.2 源代码 1.3 代码解读 1.3.1 预处理阶段 1.3.2 训练阶 ...
squeezenet代码解读
squeezenet代码解读目录概述使用了1 x 1卷积,方便灵活改变通道数.减少参数量通过squeeze layer较少了通道数将池化层放在比较靠后的位置,使得前半部分的特征信息尽可能不被 ...

【OpenPCDet】稀疏卷积SPConv-v1.2代码解读(5)

【普通稀疏卷积】

【OpenPCDet】稀疏卷积SPConv-v1.2代码解读(5)相关推荐

最新文章

热门文章