


getIndicePairs(torch::Tensor indices, int64_t batchSize,std::vector<int64_t> outSpatialShape,std::vector<int64_t> spatialShape,std::vector<int64_t> kernelSize, std::vector<int64_t> stride,std::vector<int64_t> padding, std::vector<int64_t> dilation,std::vector<int64_t> outPadding, int64_t _subM,int64_t _transpose, int64_t _useHash) {//...省略....auto indicePairUnique = torch::full({indicePairs.numel() / 2 + 1}, std::numeric_limits<int>::max(),torch::dtype(torch::kInt32).device(indices.device()));torch::Tensor outInds =//e.g. torch.Size([N*27,3+1])torch::zeros({numAct * kernelVolume, coorDim + 1},torch::dtype(torch::kInt32).device(indices.device()));if (indices.device().type() == torch::kCPU) {numActOut = create_conv_indice_pair_cpu(indices, outInds, gridOut, indicePairs, indiceNum, kernelSize, stride,padding, dilation, outSpatialShape, transpose, false, useHash);}#ifdef TV_CUDAelse if (indices.device().type() == torch::kCUDA) {numActOut = create_conv_indice_pair_p1_cuda(indices, indicePairs, indiceNum, indicePairUnique, kernelSize, stride,padding, dilation, outSpatialShape, transpose);if (numActOut > 0) {auto res = torch::_unique(indicePairUnique);indicePairUnique = std::get<0>(res);numActOut = create_conv_indice_pair_p2_cuda(indices, outInds, gridOut, indicePairs, indiceNum, indicePairUnique,outSpatialShape, transpose, false, useHash);if (numActOut == -1) {auto device = indices.device();outInds = outInds.to({torch::kCPU});indicePairs = indicePairs.to({torch::kCPU});indiceNum = indiceNum.to({torch::kCPU});indices = indices.to({torch::kCPU});numActOut = create_conv_indice_pair_cpu(indices, outInds, gridOut, indicePairs, indiceNum, kernelSize,stride, padding, dilation, outSpatialShape, transpose, false,useHash);return {outInds.to(device).slice(0, 0, numActOut),indicePairs.to(device), indiceNum.to(device)};}}}      #endif//...省略...



  1 template <typename Index, unsigned NDim, int KernelMaxVolume = 256,                                                                                                                                            2           typename Index1D = int>3 __global__ void prepareIndicePairsKernel(4     tv::TensorView<const Index> indicesIn, tv::TensorView<Index> indicePairs,5     tv::TensorView<Index> indiceNum, tv::TensorView<Index1D> indicePairUnique,6     const tv::SimpleVector<Index, NDim> kernelSize,7     const tv::SimpleVector<Index, NDim> stride,8     const tv::SimpleVector<Index, NDim> padding,9     const tv::SimpleVector<Index, NDim> dilation,10     const tv::SimpleVector<Index, NDim> outSpatialShape) {11   auto numActIn = indicesIn.dim(0);12   Index spatialVolume = 1;13 #pragma unroll14   for (int i = 0; i < NDim; ++i) {15     spatialVolume *= outSpatialShape[i];16   }17   Index kernelVolume = 1;18 #pragma unroll19   for (int i = 0; i < NDim; ++i) {20     kernelVolume *= kernelSize[i];21   }22   Index numValidPoints = 0;23   Index validPoints[KernelMaxVolume * (NDim + 1)]; //kernelMaxVolume??24   Index *pointPtr = nullptr;25   auto indicePairsDim2 = indicePairs.dim(2);26   Index index;27   for (int ix : tv::KernelLoopX<int>(numActIn)) {28     numValidPoints = getValidOutPos<Index, NDim>(29         indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),30         stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),31         validPoints);32     for (Index i = 0; i < numValidPoints; ++i) {33       pointPtr = validPoints + i * (NDim + 1);34       auto offset = pointPtr[NDim];35       Index oldNum = atomicAdd(indiceNum.data() + offset, Index(1));36       indicePairs(0, offset, oldNum) = ix;37       index = tv::ArrayIndexRowMajor<NDim, NDim>::runPtrs(38                   pointPtr, outSpatialShape.data(), 0) +39               spatialVolume * indicesIn(ix, 0);40       indicePairs(1, offset, oldNum) = index;41       indicePairUnique[offset * indicePairsDim2 + oldNum] = index;42     }43   }44 }


 1 template <typename Index, unsigned NDim>                                                                                                                                                                2 TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,3                                     const Index *kernelSize,4                                     const Index *stride, 5                                     const Index *padding,6                                     const Index *dilation,7                                     const Index *outSpatialShape, 8                                     Index *out) {9   Index lowers[NDim];10   Index uppers[NDim];11   Index counter[NDim];12   Index counterSize[NDim];13   Index pointCounter = 0;14   Index val,m,offset;15   Index numPoints = 1;16   bool valid = false;17 #pragma unroll18   for (int i = 0; i < NDim; ++i) {19     lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 +20                  stride[i] + padding[i]) / stride[i];21     uppers[i] = (input_pos[i] + padding[i]) / stride[i];22   }  23 #pragma unroll24   for (unsigned i = 0; i < NDim; ++i) {25     counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1);26     numPoints *= counterSize[i];27   }  28 #pragma unroll29   for (int i = 0; i < NDim; ++i) {30     counter[i] = 0;31   }  32   for (int i = 0; i < numPoints; ++i) {33     valid = true;34     m = 1;35     offset = 0;36 #pragma unroll37     for (int j = NDim - 1; j >= 0; --j) { 38       val = uppers[j] - counter[j] * dilation[j]; 39       out[pointCounter * (NDim + 1) + j] = val;40       if (val < 0 || (val > outSpatialShape[j] - 1)) {41         valid = false; 42         // break;43       }44       offset += m * (input_pos[j] - val * stride[j] + padding[j]) / dilation[j];45       m *= kernelSize[j];46     }47     out[pointCounter * (NDim + 1) + NDim] = offset;48     if (valid) ++pointCounter;49     counter[NDim - 1] += 1;50 #pragma unroll51     for (int c = NDim - 1; c >= 0; --c) {52       if (counter[c] == counterSize[c] && c > 0) {53         counter[c - 1] += 1;54         counter[c] = 0;55       }56     }57   }  58   return pointCounter;59 }

第18~22行对于一个特定的输入,求其在各个维度上的输出边界[lower,upper]。在spconv github项目上我看了网友对它的一番解释,特地将其粘贴至此。




for (Index i = 0; i < numValidPoints; ++i) {pointPtr = validPoints + i * (NDim + 1);auto offset = pointPtr[NDim];Index oldNum = atomicAdd(indiceNum.data() + offset, Index(1));//offset偏移处,第oldNum次运算,输入索引为idx,输出索引为indexindicePairs(0, offset, oldNum) = ix;index = tv::ArrayIndexRowMajor<NDim, NDim>::runPtrs(pointPtr, outSpatialShape.data(), 0) + spatialVolume * indicesIn(ix, 0);indicePairs(1, offset, oldNum) = index;//off0.....|off1....|off2.....|off3....|...{numActIn}..|....indicePairUnique[offset * indicePairsDim2 + oldNum] = index;


What's the meaning of function "getValidOutPos"? · Issue #224 · traveller59/spconv · GitHub


