cuda学习笔记5——CUDA实现图像形态学腐蚀、膨胀
cuda学习笔记5——CUDA实现图像形态学腐蚀、膨胀
- 代码
- linux如何编译cuda和opencv代码
- 耗时情况
代码
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda.h>
#include <device_functions.h>
#include <opencv2\opencv.hpp>
//#include "/usr/include/opencv4/opencv2/opencv.hpp"
#include <iostream>
using namespace std;
using namespace cv;//腐蚀
__global__ void erodeInCuda(unsigned char *dataIn, unsigned char *dataOut, Size erodeElement, int imgWidth, int imgHeight)
{//Grid中x方向上的索引int xIndex = threadIdx.x + blockIdx.x * blockDim.x;//Grid中y方向上的索引int yIndex = threadIdx.y + blockIdx.y * blockDim.y;int elementWidth = erodeElement.width;int elementHeight = erodeElement.height;int halfEW = elementWidth / 2;int halfEH = elementHeight / 2;//初始化输出图dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;//防止越界if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH){for (int i = -halfEH; i < halfEH + 1; i++){for (int j = -halfEW; j < halfEW + 1; j++){if (dataIn[(i + yIndex) * imgWidth + xIndex + j] < dataOut[yIndex * imgWidth + xIndex]){dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];}}}}
}//膨胀
__global__ void dilateInCuda(unsigned char *dataIn, unsigned char *dataOut, Size dilateElement, int imgWidth, int imgHeight)
{//Grid中x方向上的索引int xIndex = threadIdx.x + blockIdx.x * blockDim.x;//Grid中y方向上的索引int yIndex = threadIdx.y + blockIdx.y * blockDim.y;int elementWidth = dilateElement.width;int elementHeight = dilateElement.height;int halfEW = elementWidth / 2;int halfEH = elementHeight / 2;//初始化输出图dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;//防止越界if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH){for (int i = -halfEH; i < halfEH + 1; i++){for (int j = -halfEW; j < halfEW + 1; j++){if (dataIn[(i + yIndex) * imgWidth + xIndex + j] > dataOut[yIndex * imgWidth + xIndex]){dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];}}}}
}int main()
{Mat srcImg = imread("1.jpg");//输入图片Mat grayImg = imread("1.jpg", 0);//输入的灰度图cv::namedWindow("srcImg", 0);cv::imshow("srcImg", srcImg);cv::waitKey(1000);cv::namedWindow("grayImg", 0);cv::imshow("grayImg", grayImg);cv::waitKey(1000);unsigned char *d_in;//输入图片在GPU内的内存unsigned char *d_out1;//腐蚀后输出图片在GPU内的内存unsigned char *d_out2;//膨胀后输出图片在GPU内的内存int imgWidth = grayImg.cols;int imgHeight = grayImg.rows;Mat dstImg1(imgHeight, imgWidth, CV_8UC1, Scalar(0));//腐蚀后输出图片在CPU内的内存Mat dstImg2(imgHeight, imgWidth, CV_8UC1, Scalar(0));//膨胀后输出图片在CPU内的内存//在GPU中开辟内存cudaMalloc((void**)&d_in, imgWidth * imgHeight * sizeof(unsigned char));cudaMalloc((void**)&d_out1, imgWidth * imgHeight * sizeof(unsigned char));cudaMalloc((void**)&d_out2, imgWidth * imgHeight * sizeof(unsigned char));//将输入图片传入GPUcudaMemcpy(d_in, grayImg.data, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);//定义block中thread的分布dim3 threadsPerBlock(32, 32);//根据输入图片的宽高定义block的大小dim3 blocksPerGrid((imgWidth + threadsPerBlock.x - 1) / threadsPerBlock.x, (imgHeight + threadsPerBlock.y - 1) / threadsPerBlock.y);//算子大小Size Element(3, 5);//CUDA腐蚀erodeInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out1, Element, imgWidth, imgHeight);//将结果传回CPUcudaMemcpy(dstImg1.data, d_out1, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);//CPU内腐蚀(OpenCV实现)Mat erodeImg;Mat element = getStructuringElement(MORPH_RECT, Size(3, 5));erode(grayImg, erodeImg, element);//CUDA膨胀dilateInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out2, Element, imgWidth, imgHeight);//将结果传回CPUcudaMemcpy(dstImg2.data, d_out2, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);//CPU内膨胀(OpenCV实现)Mat dilateImg;dilate(grayImg, dilateImg, element);cv::namedWindow("dilateImg11111", 0);cv::imshow("dilateImg11111", dilateImg);cv::waitKey(1000);cv::waitKey(0);return 0;
}
linux如何编译cuda和opencv代码
nvcc `pkg-config opencv4 --cflags --libs` test5.cu -o test5 #或者nvcc `pkg-config --libs opencv4` -L. -L/usr/local/cuda/lib -lcuda -lcudart `pkg-config --cflags opencv4` -I. -I/usr/local/cuda/include test5.cu -o test5
耗时情况
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda.h>
#include <device_functions.h>
#include <opencv2/opencv.hpp>
#include <time.h>//#include "/usr/include/opencv4/opencv2/opencv.hpp"
#include <iostream>
using namespace std;
using namespace cv;//腐蚀
__global__ void erodeInCuda(unsigned char *dataIn, unsigned char *dataOut, Size erodeElement, int imgWidth, int imgHeight)
{//Grid中x方向上的索引int xIndex = threadIdx.x + blockIdx.x * blockDim.x;//Grid中y方向上的索引int yIndex = threadIdx.y + blockIdx.y * blockDim.y;int elementWidth = erodeElement.width;int elementHeight = erodeElement.height;int halfEW = elementWidth / 2;int halfEH = elementHeight / 2;//初始化输出图dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;//防止越界if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH){for (int i = -halfEH; i < halfEH + 1; i++){for (int j = -halfEW; j < halfEW + 1; j++){if (dataIn[(i + yIndex) * imgWidth + xIndex + j] < dataOut[yIndex * imgWidth + xIndex]){dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];}}}}
}//膨胀
__global__ void dilateInCuda(unsigned char *dataIn, unsigned char *dataOut, Size dilateElement, int imgWidth, int imgHeight)
{//Grid中x方向上的索引int xIndex = threadIdx.x + blockIdx.x * blockDim.x;//Grid中y方向上的索引int yIndex = threadIdx.y + blockIdx.y * blockDim.y;int elementWidth = dilateElement.width;int elementHeight = dilateElement.height;int halfEW = elementWidth / 2;int halfEH = elementHeight / 2;//初始化输出图dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;//防止越界if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH){for (int i = -halfEH; i < halfEH + 1; i++){for (int j = -halfEW; j < halfEW + 1; j++){if (dataIn[(i + yIndex) * imgWidth + xIndex + j] > dataOut[yIndex * imgWidth + xIndex]){dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];}}}}
}int main()
{Mat srcImg = imread("1.jpg");//输入图片Mat grayImg = imread("1.jpg", 0);//输入的灰度图cv::namedWindow("srcImg", 0);cv::imshow("srcImg", srcImg);cv::waitKey(1000);cv::namedWindow("grayImg", 0);cv::imshow("grayImg", grayImg);cv::waitKey(1000);unsigned char *d_in;//输入图片在GPU内的内存unsigned char *d_out1;//腐蚀后输出图片在GPU内的内存unsigned char *d_out2;//膨胀后输出图片在GPU内的内存int imgWidth = grayImg.cols;int imgHeight = grayImg.rows;Mat dstImg1(imgHeight, imgWidth, CV_8UC1, Scalar(0));//腐蚀后输出图片在CPU内的内存Mat dstImg2(imgHeight, imgWidth, CV_8UC1, Scalar(0));//膨胀后输出图片在CPU内的内存const clock_t time_1 = clock(); //在GPU中开辟内存cudaMalloc((void**)&d_in, imgWidth * imgHeight * sizeof(unsigned char));cudaMalloc((void**)&d_out1, imgWidth * imgHeight * sizeof(unsigned char));cudaMalloc((void**)&d_out2, imgWidth * imgHeight * sizeof(unsigned char));//将输入图片传入GPUcudaMemcpy(d_in, grayImg.data, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);//定义block中thread的分布dim3 threadsPerBlock(32, 32);//根据输入图片的宽高定义block的大小dim3 blocksPerGrid((imgWidth + threadsPerBlock.x - 1) / threadsPerBlock.x, (imgHeight + threadsPerBlock.y - 1) / threadsPerBlock.y);//算子大小Size Element(3, 5);//CUDA腐蚀const clock_t time_5 = clock(); erodeInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out1, Element, imgWidth, imgHeight);const clock_t time_6 = clock(); float diff_3 =(double)( time_6 - time_5 )/1000.0; printf("\n\n[ALG][%s][%4d]diff_3 = %f ms \n",__FUNCTION__, __LINE__, diff_3);//将结果传回CPUcudaMemcpy(dstImg1.data, d_out1, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);const clock_t time_2 = clock(); float diff_1 = (double)(time_2 - time_1 )/1000.0; printf("\n\n[ALG][%s][%4d]diff_1 = %f ms\n",__FUNCTION__, __LINE__, diff_1);//CPU内腐蚀(OpenCV实现)const clock_t time_3 = clock(); Mat erodeImg;Mat element = getStructuringElement(MORPH_RECT, Size(3, 5));erode(grayImg, erodeImg, element);const clock_t time_4 = clock(); float diff_2 =(double)( time_4 - time_3 )/1000.0; printf("\n\n[ALG][%s][%4d]diff_2 = %f ms \n",__FUNCTION__, __LINE__, diff_2);//CUDA膨胀dilateInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out2, Element, imgWidth, imgHeight);//将结果传回CPUcudaMemcpy(dstImg2.data, d_out2, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);//CPU内膨胀(OpenCV实现)Mat dilateImg;dilate(grayImg, dilateImg, element);imwrite("dilateImg_230117.jpg", dilateImg);cv::namedWindow("dilateImg11111", 0);cv::imshow("dilateImg11111", dilateImg);cv::waitKey(1000);cv::waitKey(0);return 0;
}
参考:https://blog.csdn.net/MGotze/article/details/76448702
cuda学习笔记5——CUDA实现图像形态学腐蚀、膨胀相关推荐
- CUDA学习笔记之 CUDA存储器模型
CUDA学习笔记之 CUDA存储器模型 标签: cuda存储bindingcache编程api 2010-12-14 01:33 1223人阅读 评论(0) 收藏 举报 分类: CUDA(26) GP ...
- Opencv学习笔记(六)图像形态学处理
文章目录 形态学变换 主要操作 代码 参考 形态学变换 形态学处理主要应用于二值图像,目的是微调分割区域的形状,获得比较理想的目标图像.腐蚀和膨胀是形态学处理的基础操作,其他操作包括:开运算.闭运算. ...
- OpenCV3学习(4.3)——图像形态学(膨胀,腐蚀)
在图像处理技术中,有一些的操作会对图像的形态发生改变,这些操作一般称之为形态学操作(phology).数学形态学是基于集合论的图像处理方法,最早出现在生物学的形态与结构中,图像处理中的形态学操作用于图 ...
- CUDA学习笔记之程序优化
CUDA学习笔记之程序优化 标签: cuda优化conflict存储算法数学计算 2010-01-05 17:18 5035人阅读 评论(4) 收藏 举报 分类: CUDA(6) 版权声明:本文为博主 ...
- 深度学习(三十六)异构计算CUDA学习笔记(1)
异构计算CUDA学习笔记(1) 原文地址:http://blog.csdn.net/hjimce/article/details/51506207 作者:hjimce 近日因为感觉自己在深度学习工程化 ...
- CUDA学习笔记(持续更新——蜗速)
CUDA学习笔记(持续更新--蜗速) 1.CUDA 程序实现流程如下 2.内存管理 3.核函数 4.全局数据访问唯一索引 5.设备管理 附录代码 1.CUDA 程序实现流程如下 将数据从CPU内存拷贝 ...
- Cuda学习笔记(一)——sm流处理器簇对blocks的调度策略
由于GPU目前在各行各业的广泛应用,无论是深度学习.大数据.云计算等都离不开GPU的并行加速,前阵子自学了Cuda-c编程,希望将来的研究工作能够用得上. Cuda系列总共有4篇,这里主要用于记录本人 ...
- OpenCV学习笔记(七):形态学morpholgy(1):腐蚀/膨胀:enrode(),dilate()
OpenCV学习笔记(七):形态学(morpholgy):腐蚀/膨胀:enrode(),dilate() 数学形态学(Mathematical morphology) 是一门建立在格论和拓扑学基础之上 ...
- OpenCV学习笔记(八):形态学morpholgy(2):开/闭运算,形态学梯度、顶帽/黑帽morphologyEx()
OpenCV学习笔记(八):形态学morpholgy(2):开.闭运算,形态学梯度.顶帽.黑帽:morphologyEx() 数学形态学(Mathematical morphology) 是一门建立在 ...
最新文章
- php鼠标经过显示文本,CSS_HTML和CSS做网页实例教程:鼠标滑过文字改变,关于HTML+CSS的实例效果很多, - phpStudy...
- the largest issue in management
- C++对自定义结构体变量排序
- ActiveMQ消息优先级:工作原理
- java gps 距离计算_Java教程之地图中计算两个GPS坐标点的距离
- 重磅资料!Github上的PHP资源汇总大全
- 都在夸官方文档 Vue.js 2021 年度报告出炉!
- c++ 结构体初始化_C/C++编程笔记:C语言和C++语言的 struct 对比!区别在哪里?
- 是驴是骡,遛一遛就知道了
- 基于OpenCV的图像去雾程序(Single Image Haze Removal Using Dark Channel Prior)
- MySql学习笔记(六):扫描范围
- 关于E-Prime 2.0 无法呈现音频的一种解决方案
- cad图形如何导入到奥维地图_如何将CAD图导入奥维地图
- 来兄弟连了,你还在用面向过程唱高音歌曲的你就OUT了
- CAN通信波特率计算
- Mac 上设置锁屏快捷键
- css 的rgba属性
- 如何做好网站建设需求分析
- 5G多卡聚合路由器如何使用
- You called this URL via POST, but the URL doesn't end in a slash and you have APPEND_SLASH set. Djan