1、主要参考

(1)github地址

ComputerVision/monocularDepth.py at master · niconielsen32/ComputerVision · GitHub

(2)Midas模型的地址

GitHub - isl-org/MiDaS: Code for robust monocular depth estimation described in "Ranftl et. al., Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer, TPAMI 2022" (3)Midas pytorch.hub百度下载地址,大佬的blog

机器学习笔记 - 基于Torch Hub的深度估计模型MiDaS_坐望云起的博客-CSDN博客_midas 深度估计

(4)重要参考,转onnx

成功将Midas模型转换为ONNX后出现ONNX运行时测试错误 - 问答 - Python中文网

(5)python生成棋盘格

Python图像拼接之自定义生成棋盘格_Thomson617的博客-CSDN博客

(6)内参矩阵解析

【OpenCV】OpenCV-Python实现相机标定+利用棋盘格相对位姿估计_Quentin_HIT的博客-CSDN博客_opencv python 棋盘格

(7)摄像头标定的参数说明,重要!!!

无人驾驶小车调试笔记(七)-- 相机校准_溪风沐雪的博客-CSDN博客_drawchessboardcorners

2、生成棋盘格

(1)直接参考了大佬的代码

Python图像拼接之自定义生成棋盘格_Thomson617的博客-CSDN博客

(2)具体代码如下

#代码来源
#https://blog.csdn.net/Thomson617/article/details/104022558
# -*- coding:utf-8 -*-import cv2
import numpy as npdef generatePattern(CheckerboardSize, Nx_cor, Ny_cor):'''自定义生成棋盘:param CheckerboardSize: 棋盘格大小,此处100即可:param Nx_cor: 棋盘格横向内角数:param Ny_cor: 棋盘格纵向内角数:return:'''black = np.zeros((CheckerboardSize, CheckerboardSize, 3), np.uint8)white = np.zeros((CheckerboardSize, CheckerboardSize, 3), np.uint8)black[:] = [0, 0, 0]  # 纯黑色white[:] = [255, 255, 255]  # 纯白色black_white = np.concatenate([black, white], axis=1)black_white2 = black_whitewhite_black = np.concatenate([white, black], axis=1)white_black2 = white_black# 横向连接if Nx_cor % 2 == 1:for i in range(1, (Nx_cor+1) // 2):black_white2 = np.concatenate([black_white2, black_white], axis=1)white_black2 = np.concatenate([white_black2, white_black], axis=1)else:for i in range(1, Nx_cor // 2):black_white2 = np.concatenate([black_white2, black_white], axis=1)white_black2 = np.concatenate([white_black2, white_black], axis=1)black_white2 = np.concatenate([black_white2, black], axis=1)white_black2 = np.concatenate([white_black2, white], axis=1)jj = 0black_white3 = black_white2for i in range(0, Ny_cor):jj += 1# 纵向连接if jj % 2 == 1:black_white3 = np.concatenate((black_white3, white_black2))  # =np.vstack((img1, img2))else:black_white3 = np.concatenate((black_white3, black_white2))  # =np.vstack((img1, img2))cv2.imshow('', black_white3)cv2.imwrite('pattern.jpg', black_white3)cv2.waitKey(5000)cv2.destroyAllWindows()if __name__ == '__main__':# generatePattern(100, 9, 6)generatePattern(100, 8, 5)

(3)用激光打印机打印一下,的3cm*3cm大小的棋盘格

简单的计算一下,打印即可

3、捕获图片

(1)编写简单的代码捕获图片

#加载opencv模块
import cv2 as cv#获取摄像头
cap = cv.VideoCapture(1)
count = 0
savepath = 'D:/RGBD_CAMERA/python_3d_process/1mono_to_3d/images/'
print("start")
while (cap.isOpened()):ret, frame = cap.read() #捕获图片if ret == False:breakframe = cv.flip(frame,1)    #镜像操作cv.imshow("video", frame)key = cv.waitKey(30)if key  == ord('q'):  #如果是按键q,则退出breakif key  == ord('r'):  #如果是按键r,则记录count = count+1cv.imwrite(savepath+str(count)+'.jpg',frame)cap.release()
cv.destroyAllWindows()

(2)注意:采集图片的数量建议超过12张

4、标定

(1)代码

import cv2
import numpy as np
import globsavepath = 'D:/RGBD_CAMERA/python_3d_process/1mono_to_3d/images/'
# 找棋盘格角点
# 设置寻找亚像素角点的参数,采用的停止准则是最大循环次数30和最大误差容限0.001
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001) # 阈值#棋盘格模板规格
# w = 9   # 10 - 1
# h = 9   # 10  - 1
w = 8   # 9 - 1
h = 5   # 6  - 1#实际打印方格尺寸
real_size = 30  #mm
# 世界坐标系中的棋盘格点,例如(0,0,0), (1,0,0), (2,0,0) ....,(8,5,0),去掉Z坐标,记为二维矩阵
objp = np.zeros((w*h,3), np.float32)
objp[:,:2] = np.mgrid[0:w,0:h].T.reshape(-1,2)
# objp = objp*18.1  # 18.1 mm
objp = objp*real_size  # 30 mm# 储存棋盘格角点的世界坐标和图像坐标对
objpoints = [] # 在世界坐标系中的三维点
imgpoints = [] # 在图像平面的二维点
#加载pic文件夹下所有的jpg图像
# images = glob.glob('./*.jpg')  #   拍摄的十几张棋盘图片所在目录
images = glob.glob(savepath+'*.jpg')  #   拍摄的十几张棋盘图片所在目录i=0
for fname in images:img = cv2.imread(fname)# 获取画面中心点#获取图像的长宽h1, w1 = img.shape[0], img.shape[1]gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)u, v = img.shape[:2]# 找到棋盘格角点ret, corners = cv2.findChessboardCorners(gray, (w,h),None)# 如果找到足够点对,将其存储起来if ret == True:print("i:", i)i = i+1# 在原角点的基础上寻找亚像素角点cv2.cornerSubPix(gray,corners,(11,11),(-1,-1),criteria)#追加进入世界三维点和平面二维点中objpoints.append(objp)imgpoints.append(corners)# 将角点在图像上显示cv2.drawChessboardCorners(img, (w,h), corners, ret)cv2.namedWindow('findCorners', cv2.WINDOW_NORMAL)cv2.resizeWindow('findCorners', 640, 480)cv2.imshow('findCorners',img)cv2.waitKey(200)
cv2.destroyAllWindows()## 标定
print('正在计算')
#标定
ret, mtx, dist, rvecs, tvecs = \cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)print("ret:",ret  )
print("mtx:\n",mtx)      # 内参数矩阵
print("dist畸变值:\n",dist   )   # 畸变系数   distortion cofficients = (k_1,k_2,p_1,p_2,k_3)
print("rvecs旋转(向量)外参:\n",rvecs)   # 旋转向量  # 外参数
print("tvecs平移(向量)外参:\n",tvecs  )  # 平移向量  # 外参数
newcameramtx, roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (u, v), 0, (u, v))
print('newcameramtx视场调节参数矩阵',newcameramtx)print('保存摄像头参数:视场调节参数矩阵,重要!!')
cv_file = cv2.FileStorage('cameraIntrinsic.xml',cv2.FILE_STORAGE_WRITE)
cv_file.write('intrinsic',newcameramtx)
print(type(newcameramtx))
cv_file.release()#打开摄像机
camera=cv2.VideoCapture(1)
print("打开摄像头")
# camera=cv2.VideoCapture(0)
while True:(grabbed,frame)=camera.read()h1, w1 = frame.shape[:2]newcameramtx, roi = cv2.getOptimalNewCameraMatrix(mtx, dist, (u, v), 0, (u, v))# 纠正畸变dst1 = cv2.undistort(frame, mtx, dist, None, newcameramtx)#dst2 = cv2.undistort(frame, mtx, dist, None, newcameramtx)mapx,mapy=cv2.initUndistortRectifyMap(mtx,dist,None,newcameramtx,(w1,h1),5)dst2=cv2.remap(frame,mapx,mapy,cv2.INTER_LINEAR)# 裁剪图像,输出纠正畸变以后的图片x, y, w1, h1 = roidst1 = dst1[y:y + h1, x:x + w1]#cv2.imshow('frame',dst2)#cv2.imshow('dst1',dst1)cv2.imshow('dst2', dst2)if cv2.waitKey(1) & 0xFF == ord('q'):  # 按q保存一张图片cv2.imwrite("../u4/frame.jpg", dst1)breakcamera.release()
cv2.destroyAllWindows()

(2)得到的内参

mtx:
 [[801.31799138   0.         319.96097314]
 [  0.         804.76125593 206.79594003]
 [  0.           0.           1.        ]]
dist畸变值:
 [[-7.21246445e-02 -6.84714453e-01 -1.25501966e-02  5.75752614e-03
   9.50679972e+00]]

(3)内参矩阵参数解析

参考了

【OpenCV】OpenCV-Python实现相机标定+利用棋盘格相对位姿估计_Quentin_HIT的博客-CSDN博客_opencv python 棋盘格

内参矩阵的具体表达式如下:

其中,分别是每个像素在图像平面方向上的物理尺寸,是图像坐标系原点在像素坐标系中的坐标,为摄像头的焦距,为焦距与像素物理尺寸的比值,单位为个(像素数目)。

  • 据此可以得到,这台摄像头的fx约为801,fy约为805,说明焦距fx约等于801个像素的物理尺寸,fy约等于804个像素的物理尺寸。
  • u0约为320,v0约为207。这台摄像头当前设定的像素为640×480,因此的理论值应为320,的理论值应为240。误差主要是因为摄像头的分辨率太低,实际角点在像素坐标系中显示不准;此外,目标坐标系的测量时也会带来误差。

(4)得到获取视场调节参数,并以xml形式保存了下来

<?xml version="1.0"?>
<opencv_storage>
<intrinsic type_id="opencv-matrix">
  <rows>3</rows>
  <cols>3</cols>
  <dt>d</dt>
  <data>
    1.1476093750000000e+03 0. 2.8008979619154707e+02 0.
    1.1125493164062500e+03 2.7150911855935556e+02 0. 0. 1.</data></intrinsic>
</opencv_storage>

cv2.getOptimalNewCameraMatrix 获取视场调节参数

优化内参数和畸变系数,通过设定自由比例因子alpha。当alpha设为0的时候,将会返回一个剪裁过的将去畸变后不想要的像素去掉的内参数和畸变系数;当alpha设为1的时候,将会返回一个包含额外黑色像素点的内参数和畸变系数,并返回一个ROI用于将其剪裁掉。

newcameramtx, roi=cv2.getOptimalNewCameraMatrix(mtx,dist,(w,h),alpha,(w,h))

  • 参数1:mtx,相机内参矩阵
  • 参数2:dist,相机径向畸变参数
  • 参数3:(w,h)原图像尺寸
  • 参数4:alpha 视场大小调节,0放大,1不变
  • 参数5:(w,h)新图像大小
  • 返回值newcameramtx:视场调节参数矩阵
  • 返回值roi:感兴趣区域

5、单目深度估计方法

5.1 安装依赖

pip install timm

5.2手动下载Midas模型

5.2.1下载模型

(1)参考大佬的blog,好人啊,大家给他点赞!

机器学习笔记 - 基于Torch Hub的深度估计模型MiDaS_坐望云起的博客-CSDN博客_midas 深度估计

(2)或者直接进github官网地址下载

GitHub - isl-org/MiDaS: Code for robust monocular depth estimation described in "Ranftl et. al., Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer, TPAMI 2022"

(3)下载的巨大模型

5.2.2下载代码

(1)进官网下载代码

GitHub - isl-org/MiDaS: Code for robust monocular depth estimation described in "Ranftl et. al., Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer, TPAMI 2022"

(2)代码不大,偷懒的话就直接下载zip

(3)解压一下

 (3)为了本地使用,修改hubconf.py内容

        # state_dict = torch.hub.load_state_dict_from_url(#     checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True# )state_dict = torch.load('D:/BaiduNetdiskDownload/dpt_large-midas-2f21e586.pt', map_location=torch.device('cpu'))

6 单目图像深度估计代码的实现

6.1 细节可以参考pytorch官方的教程(仅供参考)

MiDaS | PyTorch

6.2实际实现

注意:由于torch.hub不好用,所以接着5.2.2的设置测试 

6.3 把测试的dog图下载得到

(1)官网地址

https://github.com/pytorch/hub/raw/master/images/dog.jpg

如果下载不了,使用bing搜索一下,我找到的地址

pytorch/hub/raw/master/images/ dog.jpg - Bing

6.4测试代码和实现

(1)下载模型后测试一下本地调用


import torch
import matplotlib.pyplot as plt
import cv2##直接将D:/RGBD_CAMERA/mis/MiDaS-master/hubconf.py中的transforms拿来使用
def transforms():import cv2from torchvision.transforms import Composefrom midas.transforms import Resize, NormalizeImage, PrepareForNetfrom midas import transformstransforms.default_transform = Compose([lambda img: {"image": img / 255.0},Resize(384,384,resize_target=None,keep_aspect_ratio=True,ensure_multiple_of=32,resize_method="upper_bound",image_interpolation_method=cv2.INTER_CUBIC,),NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),PrepareForNet(),lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0),])transforms.small_transform = Compose([lambda img: {"image": img / 255.0},Resize(256,256,resize_target=None,keep_aspect_ratio=True,ensure_multiple_of=32,resize_method="upper_bound",image_interpolation_method=cv2.INTER_CUBIC,),NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),PrepareForNet(),lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0),])transforms.dpt_transform = Compose([lambda img: {"image": img / 255.0},Resize(384,384,resize_target=None,keep_aspect_ratio=True,ensure_multiple_of=32,resize_method="minimal",image_interpolation_method=cv2.INTER_CUBIC,),NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),PrepareForNet(),lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0),])return transforms# (一)方法一、使用torch.hub或者从官网下载
# https://github.com/isl-org/MiDaS#Accuracy
# model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
# #model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
# #model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)
# midas = torch.hub.load("intel-isl/MiDaS", model_type)# (二)方法二、下载本地后直接加载
# (1)Load a model
model_type = "DPT_Large"
# midas = torch.hub.load('intel-isl/MiDaS', path='D:/BaiduNetdiskDownload/dpt_large-midas-2f21e586.pt', source='local',model =model_type )
# midas = torch.hub.load('D:/RGBD_CAMERA/mis/MiDaS-master', path='D:/BaiduNetdiskDownload/dpt_large-midas-2f21e586.pt', source='local',model =model_type,force_reload = False )
midas = torch.hub.load('D:/RGBD_CAMERA/mis/MiDaS-master', source='local',model =model_type,force_reload = False )#(2)Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()#(3)Load transforms to resize and normalize the image for large or small model
# midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
midas_transforms = transforms()if model_type == "DPT_Large" or model_type == "DPT_Hybrid":transform = midas_transforms.dpt_transform
else:transform = midas_transforms.small_transformprint("chen0")
#(4)Load image and apply transforms
filename = 'D:/RGBD_CAMERA/python_3d_process/dog.jpg'
img = cv2.imread(filename)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
print("chen1")
input_batch = transform(img).to(device)#(5)Predict and resize to original resolution
with torch.no_grad():prediction = midas(input_batch)prediction = torch.nn.functional.interpolate(prediction.unsqueeze(1),size=img.shape[:2],mode="bicubic",align_corners=False,).squeeze()output = prediction.cpu().numpy()
print(output.shape)
print("chen2")
#(6)Show result
plt.imshow(output)
plt.show()
# plt.show()

(2)测试结果


7、单目摄像头深度图完整例子

(1)完整代码


import torch
import matplotlib.pyplot as plt
import cv2
import numpy as np
import time##直接将D:/RGBD_CAMERA/mis/MiDaS-master/hubconf.py中的transforms拿来使用
def transforms():import cv2from torchvision.transforms import Composefrom midas.transforms import Resize, NormalizeImage, PrepareForNetfrom midas import transformstransforms.default_transform = Compose([lambda img: {"image": img / 255.0},Resize(384,384,resize_target=None,keep_aspect_ratio=True,ensure_multiple_of=32,resize_method="upper_bound",image_interpolation_method=cv2.INTER_CUBIC,),NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),PrepareForNet(),lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0),])transforms.small_transform = Compose([lambda img: {"image": img / 255.0},Resize(256,256,resize_target=None,keep_aspect_ratio=True,ensure_multiple_of=32,resize_method="upper_bound",image_interpolation_method=cv2.INTER_CUBIC,),NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),PrepareForNet(),lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0),])transforms.dpt_transform = Compose([lambda img: {"image": img / 255.0},Resize(384,384,resize_target=None,keep_aspect_ratio=True,ensure_multiple_of=32,resize_method="minimal",image_interpolation_method=cv2.INTER_CUBIC,),NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),PrepareForNet(),lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0),])return transforms# (一)方法一、使用torch.hub或者从官网下载
# https://github.com/isl-org/MiDaS#Accuracy
# model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
# #model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
# #model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)
# midas = torch.hub.load("intel-isl/MiDaS", model_type)# (二)方法二、下载本地后直接加载
# (1)Load a model
model_type = "DPT_Large"
# midas = torch.hub.load('intel-isl/MiDaS', path='D:/BaiduNetdiskDownload/dpt_large-midas-2f21e586.pt', source='local',model =model_type )
# midas = torch.hub.load('D:/RGBD_CAMERA/mis/MiDaS-master', path='D:/BaiduNetdiskDownload/dpt_large-midas-2f21e586.pt', source='local',model =model_type,force_reload = False )
midas = torch.hub.load('D:/RGBD_CAMERA/mis/MiDaS-master', source='local',model =model_type,force_reload = False )#(2)Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()#(3)Load transforms to resize and normalize the image for large or small model
# midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")
midas_transforms = transforms()if model_type == "DPT_Large" or model_type == "DPT_Hybrid":transform = midas_transforms.dpt_transform
else:transform = midas_transforms.small_transformprint("chen0")
#(4)Load image and apply transforms
filename = 'D:/RGBD_CAMERA/python_3d_process/dog.jpg'
img = cv2.imread(filename)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
print("chen1")
input_batch = transform(img).to(device)#(5)Predict and resize to original resolution
with torch.no_grad():prediction = midas(input_batch)prediction = torch.nn.functional.interpolate(prediction.unsqueeze(1),size=img.shape[:2],mode="bicubic",align_corners=False,).squeeze()output = prediction.cpu().numpy()
print(output.shape)
print("chen2")
#(6)Show result
plt.imshow(output)
plt.show()
cv2.waitKey(0)##下面是通过摄像头捕获图片,而后三维重建相关的
######三维重建
Q = np.array(([1.0, 0.0, 0.0, -160.0],[0.0, 1.0, 0.0, -120.0],[0.0, 0.0, 0.0, 350.0],[0.0, 0.0, 1.0/90.0, 0.0]),dtype=np.float32)# Open up the video capture from a webcam
cap = cv2.VideoCapture(1)
print("chencap")while cap.isOpened():success, img = cap.read()start = time.time()cv2.imshow("origin_pic",img)img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)# Apply input transformsinput_batch = transform(img).to(device)# Prediction and resize to original resolutionwith torch.no_grad():prediction = midas(input_batch)prediction = torch.nn.functional.interpolate(prediction.unsqueeze(1),size=img.shape[:2],mode="bicubic",align_corners=False,).squeeze()depth_map = prediction.cpu().numpy()depth_map = cv2.normalize(depth_map, None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)#Reproject points into 3Dpoints_3D = cv2.reprojectImageTo3D(depth_map, Q, handleMissingValues=False)#Get rid of points with value 0 (i.e no depth)mask_map = depth_map > 0.4#Mask colors and points. output_points = points_3D[mask_map]output_colors = img[mask_map]end = time.time()totalTime = end - startfps = 1 / totalTimeimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)depth_map = (depth_map*255).astype(np.uint8)depth_map = cv2.applyColorMap(depth_map , cv2.COLORMAP_MAGMA)cv2.putText(img, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)cv2.imshow('Image', img)cv2.imshow('Depth Map', depth_map)if cv2.waitKey(5) & 0xFF == 27:break# --------------------- Create The Point Clouds ----------------------------------------#Function to create point cloud file
def create_output(vertices, colors, filename):colors = colors.reshape(-1,3)vertices = np.hstack([vertices.reshape(-1,3),colors])ply_header = '''plyformat ascii 1.0element vertex %(vert_num)dproperty float xproperty float yproperty float zproperty uchar redproperty uchar greenproperty uchar blueend_header'''with open(filename, 'w') as f:f.write(ply_header %dict(vert_num=len(vertices)))np.savetxt(f,vertices,'%f %f %f %d %d %d')output_file = 'pointCloudDeepLearning.ply'
#Generate point cloud
create_output(output_points, output_colors, output_file)cap.release()
cv2.destroyAllWindows()

(2)显示的代码

# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple open3d
# pip install open3d -i https://pypi.tuna.tsinghua.edu.cn/simple
#xachen 显示可以了,20221128import open3d as o3d
import numpy as np##--方法(1)去除Nan------------------
# path = "D:/RGBD_CAMERA/python_3d_process/1_hezi.pcd"
# pcd = o3d.io.read_point_cloud(path)  # path为文件路径
# pcd_new = o3d.geometry.PointCloud.remove_non_finite_points(
#                         pcd, remove_nan = True, remove_infinite = False)
# o3d.visualization.draw_geometries([pcd_new])##--方法(2)去除Nan------------------
# path = "D:/RGBD_CAMERA/python_3d_process/1_hezi.pcd"
# path = "D:/RGBD_CAMERA/python_3d_process/chenmobile.pcd"
path = "D:/RGBD_CAMERA/python_3d_process/pointCloudDeepLearning.ply"
pcd = o3d.io.read_point_cloud(path)  # path为文件路径
# res = pcd.remove_non_finite_points(True, True)#剔除无效值
pcd = pcd.remove_non_finite_points(True, False)#剔除无效值
o3d.visualization.draw_geometries([pcd],                                    window_name="窗口名字测试",point_show_normal=False,width=800,  # 窗口宽度height=600)  # 窗口高度

完结撒花

------------------------------------------------------------

-------------------------------------------------------------

#注意:下面的教程和描述先不要管,待完善!

------------------------------------------

???

5.2大佬的测试教程

(1)测试代码

import cv2
import torch
import time
import numpy as np#该文件参考地址
# https://github.com/niconielsen32/ComputerVision/blob/master/depthToPointCloud.py# model = torch.hub.load('ultralytics/yolov5', 'custom', path='path/to/best.pt')# Q matrix - Camera parameters - Can also be found using stereoRectify
Q = np.array(([1.0, 0.0, 0.0, -160.0],[0.0, 1.0, 0.0, -120.0],[0.0, 0.0, 0.0, 350.0],[0.0, 0.0, 1.0/90.0, 0.0]),dtype=np.float32)# Load a MiDas model for depth estimation
model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)midas = torch.hub.load("intel-isl/MiDaS", model_type)# Move model to GPU if available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()# Load transforms to resize and normalize the image
midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")if model_type == "DPT_Large" or model_type == "DPT_Hybrid":transform = midas_transforms.dpt_transform
else:transform = midas_transforms.small_transform# Open up the video capture from a webcam
cap = cv2.VideoCapture(2)while cap.isOpened():success, img = cap.read()start = time.time()img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)# Apply input transformsinput_batch = transform(img).to(device)# Prediction and resize to original resolutionwith torch.no_grad():prediction = midas(input_batch)prediction = torch.nn.functional.interpolate(prediction.unsqueeze(1),size=img.shape[:2],mode="bicubic",align_corners=False,).squeeze()depth_map = prediction.cpu().numpy()depth_map = cv2.normalize(depth_map, None, 0, 1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)#Reproject points into 3Dpoints_3D = cv2.reprojectImageTo3D(depth_map, Q, handleMissingValues=False)#Get rid of points with value 0 (i.e no depth)mask_map = depth_map > 0.4#Mask colors and points. output_points = points_3D[mask_map]output_colors = img[mask_map]end = time.time()totalTime = end - startfps = 1 / totalTimeimg = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)depth_map = (depth_map*255).astype(np.uint8)depth_map = cv2.applyColorMap(depth_map , cv2.COLORMAP_MAGMA)cv2.putText(img, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)cv2.imshow('Image', img)cv2.imshow('Depth Map', depth_map)if cv2.waitKey(5) & 0xFF == 27:break# --------------------- Create The Point Clouds ----------------------------------------#Function to create point cloud file
def create_output(vertices, colors, filename):colors = colors.reshape(-1,3)vertices = np.hstack([vertices.reshape(-1,3),colors])ply_header = '''plyformat ascii 1.0element vertex %(vert_num)dproperty float xproperty float yproperty float zproperty uchar redproperty uchar greenproperty uchar blueend_header'''with open(filename, 'w') as f:f.write(ply_header %dict(vert_num=len(vertices)))np.savetxt(f,vertices,'%f %f %f %d %d %d')output_file = 'pointCloudDeepLearning.ply'
#Generate point cloud
create_output(output_points, output_colors, output_file)cap.release()
cv2.destroyAllWindows()

?????

测试方法

2.1下载模型

GitHub - isl-org/MiDaS: Code for robust monocular depth estimation described in "Ranftl et. al., Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer, TPAMI 2022"

 (2)真是一个big的模型啊

2.2 转为onnx

实际上转完后opencv无法调研

3.使用pytorch hub的方法直接调用

机器学习笔记 - 基于Torch Hub的深度估计模型MiDaS_坐望云起的博客-CSDN博客_midas 模型

(7)点云数据处理学习——单摄像头深度估计相关推荐

  1. (15)点云数据处理学习——单目深度估计获得RGBD图再重建点云

    1.主要参考 (1)大佬视频 Create Your Own Point Clouds from Depth Maps - Point Cloud Processing in Open3D_哔哩哔哩_ ...

  2. 重磅!如何高效学习单目深度估计

  3. 基于传统方法的单目深度估计

    如果想要深入学习单目深度估计方面的知识,可以关注我们工坊推出的课程: 单目深度估计方法:算法梳理与代码实现 单目深度估计一直以来都是计算机视觉领域中的一项非常具有挑战的难题.随着计算机技术.数字图像处 ...

  4. 无监督单目深度估计 Unsupervised Monocular Depth Estimation with Left-Right Consistency 论文方法分析

    最近在做深度估计相关的毕业设计,一般的基于深度学习单目深度估计算法都是基于监督学习的方法,也就是说我希望输入一张拍摄到的单目照片,将它通过卷积神经网络后生成一张深度图.在这个过程中我们就要求需要有大量 ...

  5. 深度学习之单目深度估计:无监督学习篇

    点击上方"3D视觉工坊",选择"星标" 干货第一时间送达 作者:桔子毛 https://zhuanlan.zhihu.com/p/29968267 本文仅做学术 ...

  6. 单目深度估计 | Learning Depth from Monocular Videos using Direct Methods 学习笔记

    文章目录 摘要 1. 论文主要贡献: 2. 从视频中学习预测深度 2.1 尺度模糊 2.2 建模姿态估计预测器 3. 可微分直接视觉测距法 3.1 直接视觉测距法(DVO) 3.2 可微分的实现 4 ...

  7. 基于深度学习的单目深度估计综述

    点击上方"3D视觉工坊",选择"星标" 干货第一时间送达 文章:Monocular Depth Estimation Based On Deep Learnin ...

  8. 单目深度估计与伪雷达点云、可视化

    单目深度估计 项目代码下载地址:下载地址 深度图转点云: def depth2ptc_universal(depth, intrinsics):"""depth: dep ...

  9. 单目深度估计学习笔记

    GYDepth 没开源: CVPR2021单目深度估计:腾讯光影研究室优势夺冠,成果落地应用 竞赛结果及相关论文可见:https://arxiv.org/pdf/2105.08630.pd 比赛的最终 ...

最新文章

  1. 压力管道流量计算公式_带你全面了解各种流量计!
  2. mysql 自带工具详解
  3. hadoop 计数器
  4. 面试题: 数据库 真实面试题已看1 操作语句 存储过程 挺好 sql语句练习 有用
  5. 以太网供电技术/有源以太网技术POE简单介绍
  6. linux打包工具tar及一些压缩工具
  7. Windows Phone 7 立体旋转动画的实现
  8. Java网络编程(一)- 一个简单的服务端/客户端应用程序
  9. 三维点云学习(1)下-点云体素降采样
  10. 橘子无法启动计算机丢失,《战地1》橘子平台无法启动 橘子意料之外的问题解决方法...
  11. html音乐播放器代码大全,关于HTML 音乐播放器代码|音乐播放器网页代码大全(转)...
  12. SQL Server 2019重新安装失败的处理方法
  13. solr java 客户端
  14. python批量解压文件_Python 批量解压ZIP和RAR压缩文件(循环验证密码)
  15. 微信隐藏功能系列:微信状态里如何加音乐?有2个小技巧
  16. Python super(钻石继承)
  17. 微信小程序超级占内存_实测:微信小程序占多少内存?或许真没有你想象的那么“小”!...
  18. h5页面在新版微信iOS 端出现底部白色导航条
  19. 又新又全 |制图符号库、最新市级国土空间总体规划制图规范“GIS符号库“下载
  20. Apple十条黄金服务法则

热门文章

  1. iQQ 基于WebQQ3.0协议Java开发 跨平台QQ客户端
  2. TCP/IP卷一:87---TCP拥塞控制之(对标准算法的改进:NewReno、采用选择确认机制、转发确认(FACK)和速率减半、限制传输、拥塞窗口校验)
  3. 如何替换 JavaScript 中所有出现的字符串
  4. 四柱排盘系统--命理学和程序开发的结合
  5. 逐梦人工智能,普通人也能用百度AI实现梦想
  6. 卷积层TSNE可视化
  7. colorkey唇釉是否安全_colorkey唇釉成分安全吗
  8. python用牛顿迭代法求平方根_利用牛顿迭代法求平方根 - 业精于勤,荒于嬉;行成于思,毁于随! - OSCHINA - 中文开源技术交流社区...
  9. 笔记:论文摘要和引言的写法
  10. 易助工资总额管控产品介绍