KinectFusion: Real-time 3D Reconstruction and Interaction Using a Moving Depth Camera

先说下他的工作原理

把kinect每帧能得到一张深度图，然后这张深度图呢一个像素对应camera view 中的一个点，如果知道camera的变换矩阵，这些点就可以变成全局坐标. 我们把kinnect照出的点想成一个手电筒打出一堆点，以第一帧的手电筒的位置和方向做为世界坐标系，然后处理第二帧时，得到一堆点，这堆点是在第二个手电筒下的坐标值，这时我们把第二个手电筒放在第一个手电筒时，这些点是没有对齐的，这时我们用ICP, 让这些点对齐，对齐的同时，第二个手电筒也动，则当收敛时，第二个手电筒的位置和方向就确认了。这样后面每一帧手电筒的位置都可以通过前一帧来得到。

注意 ICP不是找最近, 而是找前后两帧Image view的对应点, 然后转到世界坐标系中来算camera pose.

然后就是voxel里面tsdf值的问题，体素是事先建好的，从第一帧开始，对于每个体素来说，先算体素中心点（注意中心点是世界坐标）到手电筒（camera）距离d1，然后该中心点通过手电筒的变换矩阵的逆转到camera view中，然后用perspective project投影，将坐标标准化，最后用这个标准化坐标去索引第一帧的深度图(注意不是整数的坐标会插值得到深度)，得到该中心点的深度，也就是手电筒到中心点的方向上，手电筒到surface的距离d2，这样d1-d2就是surface到该体素中心的距离，后面也照这个方向去算，只不过算出来的值会与以前的值进行权值平均，权值怎么算，怎么平均请看paper.

最后就是通过raycast从体素中提取点的问题了，对于最新一帧手电筒得到深度图，每个像素，执行[u,0]和[u,1]的反投影，得到对应在camera view的坐标，结合camera的变换矩阵，得到对应的世界坐标的值，然后得到对应的体素，用他们的体素坐标来算，然后沿着射线的方向，来依次搜索tsdf为0的点，如果搜索到提取出其世界坐标，具体怎么提取看paper，然后算该点（网格坐标下）的tsdf的梯度，作为该点的法向量。注意论文中global mesh vertex是其他已知的mesh，先按照camera的方向渲染出一个mesh map, 如果没有找到0截面就用mesh map对应的值，或者secondary ray.

1) Depth map conversion
vi(u)=Di(u)K−1[u,1]vi(u)=Di(u)K−1[u,1]v_i(u) = D_i(u)K^{-1}[u,1]可以找出深度图中每个像素对应的camera view中的坐标

2) Camera tracking
这里论文中的算法写得不太清楚
直接对照着源码来看吧
pcl/gpu/kinfu/src/cuda/estimate_combined.cu

__device__ __forceinline__ boolsearch (int x, int y, float3& n, float3& d, float3& s) const{float3 ncurr;ncurr.x = nmap_curr.ptr (y)[x];if (isnan (ncurr.x))return (false);float3 vcurr;//当前camera view中的坐标vcurr.x = vmap_curr.ptr (y       )[x];vcurr.y = vmap_curr.ptr (y + rows)[x];vcurr.z = vmap_curr.ptr (y + 2 * rows)[x];float3 vcurr_g = Rcurr * vcurr + tcurr;//用上一步的R和t作为这帧的R,t的估计,然后得到这帧中点的全局坐标float3 vcurr_cp = Rprev_inv * (vcurr_g - tprev);         // prev camera coo space 通过这个公式将该点称回到上一帧的camera view的坐标系中int2 ukr;         //projectionukr.x = __float2int_rn (vcurr_cp.x * intr.fx / vcurr_cp.z + intr.cx);      //4ukr.y = __float2int_rn (vcurr_cp.y * intr.fy / vcurr_cp.z + intr.cy);                      //4//获取该点在上一帧中image space的坐标if (ukr.x < 0 || ukr.y < 0 || ukr.x >= cols || ukr.y >= rows || vcurr_cp.z < 0)//如果在上一帧中的视图中,继续;否则结束 return (false);float3 nprev_g;//全局法向量通过前一帧的全局nmap来找的nprev_g.x = nmap_g_prev.ptr (ukr.y)[ukr.x];if (isnan (nprev_g.x))return (false);float3 vprev_g;//全局点坐标通过前一帧的全局vmap来找的vprev_g.x = vmap_g_prev.ptr (ukr.y       )[ukr.x];vprev_g.y = vmap_g_prev.ptr (ukr.y + rows)[ukr.x];vprev_g.z = vmap_g_prev.ptr (ukr.y + 2 * rows)[ukr.x];float dist = norm (vprev_g - vcurr_g); //前一帧点的全局坐标和当前帧点的全局坐标差if (dist > distThres)return (false);ncurr.y = nmap_curr.ptr (y + rows)[x];ncurr.z = nmap_curr.ptr (y + 2 * rows)[x];float3 ncurr_g = Rcurr * ncurr;//当前帧的全局法向量nprev_g.y = nmap_g_prev.ptr (ukr.y + rows)[ukr.x];nprev_g.z = nmap_g_prev.ptr (ukr.y + 2 * rows)[ukr.x];float sine = norm (cross (ncurr_g, nprev_g));if (sine >= angleThres)//如果当前帧的全局法向量和前一帧的全局法向量超过一定anlge则是不匹配return (false);n = nprev_g;d = vprev_g;s = vcurr_g;return (true);}

注意这里要先找correspondence, 再求Optimal transforamtion

论文的算法太confusing了, 直接跳过

3) volumetric representation
看下listing 2的算法

4) raycasting for rendering and tracking 看算法

      __device__ __forceinline__ voidoperator () () const{int x = threadIdx.x + blockIdx.x * CTA_SIZE_X;int y = threadIdx.y + blockIdx.y * CTA_SIZE_Y;if (x >= cols || y >= rows)return;vmap.ptr (y)[x] = numeric_limits<float>::quiet_NaN ();nmap.ptr (y)[x] = numeric_limits<float>::quiet_NaN ();float3 ray_start = tcurr;//摄像机的全局位置float3 ray_next = Rcurr * get_ray_next (x, y) + tcurr;//摄像机远平面对应像素x,y的点的全局坐标float3 ray_dir = normalized (ray_next - ray_start);//ensure that it isn't a degenerate caseray_dir.x = (ray_dir.x == 0.f) ? 1e-15 : ray_dir.x;ray_dir.y = (ray_dir.y == 0.f) ? 1e-15 : ray_dir.y;ray_dir.z = (ray_dir.z == 0.f) ? 1e-15 : ray_dir.z;// computer time when entry and exit volumefloat time_start_volume = getMinTime (volume_size, ray_start, ray_dir);float time_exit_volume = getMaxTime (volume_size, ray_start, ray_dir);const float min_dist = 0.f;         //in meterstime_start_volume = fmax (time_start_volume, min_dist);if (time_start_volume >= time_exit_volume)return;float time_curr = time_start_volume;int3 g = getVoxel (ray_start + ray_dir * time_curr);g.x = max (0, min (g.x, VOLUME_X - 1));g.y = max (0, min (g.y, VOLUME_Y - 1));g.z = max (0, min (g.z, VOLUME_Z - 1));float tsdf = readTsdf (g.x, g.y, g.z);//infinite loop guardconst float max_time = 3 * (volume_size.x + volume_size.y + volume_size.z);for (; time_curr < max_time; time_curr += time_step){float tsdf_prev = tsdf;int3 g = getVoxel (  ray_start + ray_dir * (time_curr + time_step)  );if (!checkInds (g))break;tsdf = readTsdf (g.x, g.y, g.z);if (tsdf_prev < 0.f && tsdf > 0.f)break;if (tsdf_prev > 0.f && tsdf < 0.f)           //zero crossing{float Ftdt = interpolateTrilineary (ray_start, ray_dir, time_curr + time_step);if (isnan (Ftdt))break;float Ft = interpolateTrilineary (ray_start, ray_dir, time_curr);//current time时该if (isnan (Ft))break;//float Ts = time_curr - time_step * Ft/(Ftdt - Ft);float Ts = time_curr - time_step * Ft / (Ftdt - Ft);//减少时间回去到tsdf为0的点float3 vetex_found = ray_start + ray_dir * Ts;//tsdf为0的点vmap.ptr (y       )[x] = vetex_found.x;vmap.ptr (y + rows)[x] = vetex_found.y;vmap.ptr (y + 2 * rows)[x] = vetex_found.z;int3 g = getVoxel ( ray_start + ray_dir * time_curr );if (g.x > 1 && g.y > 1 && g.z > 1 && g.x < VOLUME_X - 2 && g.y < VOLUME_Y - 2 && g.z < VOLUME_Z - 2){float3 t;float3 n;//开始求法向量t = vetex_found;t.x += cell_size.x;float Fx1 = interpolateTrilineary (t);t = vetex_found;t.x -= cell_size.x;float Fx2 = interpolateTrilineary (t);n.x = (Fx1 - Fx2);t = vetex_found;t.y += cell_size.y;float Fy1 = interpolateTrilineary (t);t = vetex_found;t.y -= cell_size.y;float Fy2 = interpolateTrilineary (t);n.y = (Fy1 - Fy2);t = vetex_found;t.z += cell_size.z;float Fz1 = interpolateTrilineary (t);t = vetex_found;t.z -= cell_size.z;float Fz2 = interpolateTrilineary (t);n.z = (Fz1 - Fz2);n = normalized (n);nmap.ptr (y       )[x] = n.x;nmap.ptr (y + rows)[x] = n.y;nmap.ptr (y + 2 * rows)[x] = n.z;}break;}}          /* for(;;)  */}};__global__ voidrayCastKernel (const RayCaster rc) {rc ();}}
}

__device__ __forceinline__ floatinterpolateTrilineary (const float3& point) const{int3 g = getVoxel (point);if (g.x <= 0 || g.x >= VOLUME_X - 1)//为了保证顺利进行插值, 把边界的体素去掉, return numeric_limits<float>::quiet_NaN ();if (g.y <= 0 || g.y >= VOLUME_Y - 1)return numeric_limits<float>::quiet_NaN ();if (g.z <= 0 || g.z >= VOLUME_Z - 1)return numeric_limits<float>::quiet_NaN ();float vx = (g.x + 0.5f) * cell_size.x;float vy = (g.y + 0.5f) * cell_size.y;float vz = (g.z + 0.5f) * cell_size.z;//算落在体素的哪个卦限, 根据卦限算出由voxel center组成的cube的最左下角的voxel center所在的voxel的索引g.x = (point.x < vx) ? (g.x - 1) : g.x;g.y = (point.y < vy) ? (g.y - 1) : g.y;g.z = (point.z < vz) ? (g.z - 1) : g.z;//在voxel center组成的cube中, 由当前位置减去该cube的左下角的点(体素center点)求出每维上的权值float a = (point.x - (g.x + 0.5f) * cell_size.x) / cell_size.x;float b = (point.y - (g.y + 0.5f) * cell_size.y) / cell_size.y;float c = (point.z - (g.z + 0.5f) * cell_size.z) / cell_size.z;这里是算该点到由八个voxel中心点围成的cube在每个维度上的权值float res = readTsdf (g.x + 0, g.y + 0, g.z + 0) * (1 - a) * (1 - b) * (1 - c) +readTsdf (g.x + 0, g.y + 0, g.z + 1) * (1 - a) * (1 - b) * c +readTsdf (g.x + 0, g.y + 1, g.z + 0) * (1 - a) * b * (1 - c) +readTsdf (g.x + 0, g.y + 1, g.z + 1) * (1 - a) * b * c +readTsdf (g.x + 1, g.y + 0, g.z + 0) * a * (1 - b) * (1 - c) +readTsdf (g.x + 1, g.y + 0, g.z + 1) * a * (1 - b) * c +readTsdf (g.x + 1, g.y + 1, g.z + 0) * a * b * (1 - c) +readTsdf (g.x + 1, g.y + 1, g.z + 1) * a * b * c;return res;}

KinectFusion: Real-time 3D Reconstruction and Interaction Using a Moving Depth Camera相关推荐

三维重建 3D reconstruction 有哪些实用算法？
点击上方"3D视觉工坊",选择"星标" 干货第一时间送达三维重建 3D reconstruction 有哪些实用算法? https://www.zhihu.c ...
自动驾驶领域大佬在CVPR2020中关于3D Reconstruction Learning的报告
点击上方"视学算法",选择"星标" 干货第一时间送达作者:黄浴 https://zhuanlan.zhihu.com/p/150948693 本文仅做学术分享 ...
Camera Calibration and 3D Reconstruction
3D RECONSTRUCTION WITH OPENCV AND POINT CLOUD LIBRARY http://stackoverflow.com/questions/19205557/op ...
3D Reconstruction三维重建halcon算子，持续更新
目录 3D Reconstruction三维重建 Binocular Stereo双目立体 binocular_disparity binocular_disparity_mg binocular_d ...
【论文阅读】Occupancy Networks: Learning 3D Reconstruction in Function Space
论文题目:Occupancy Networks: Learning 3D Reconstruction in Function Space(占据网络:在函数空间内学习三维重建,简称ONet) 论文作者 ...
（1）课程简介-CS231A：Computer Vision, From 3D Reconstruction to Recognition
斯坦福大学-源地址: CS231A: Computer Vision, From 3D Reconstruction to Recognition CS231AGitHub笔记:https://git ...
三维重建几何方法深度学习_三维重建 3D reconstruction 有哪些实用算法？
主要的深度学习系列算法有DBN, ConvNets, RNN等,基本上经典的深度学习算法都用过. 详细请看唐路路(研究方向深度学习 3D重建)在专栏机器学习&深度学习--学术水准的理解总 ...
论文阅读 An Information Gain Formulation for Active Volumetric 3D Reconstruction
这是一篇讲基于3D重建这个场景,如何更好地计算information gain的好像是2016ICRA 也是uzh实验室的 An Information Gain Formulation for A ...
Occupancy Networks: Learning 3D Reconstruction in Function Space论文笔记
Contribution Occupancy networks implicitly represent the 3D surface as the continuous decision bound ...

KinectFusion: Real-time 3D Reconstruction and Interaction Using a Moving Depth Camera

KinectFusion: Real-time 3D Reconstruction and Interaction Using a Moving Depth Camera相关推荐

最新文章

热门文章