cuda学习笔记5——CUDA实现图像形态学腐蚀、膨胀

  • 代码
  • linux如何编译cuda和opencv代码
  • 耗时情况

代码

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda.h>
#include <device_functions.h>
#include <opencv2\opencv.hpp>
//#include "/usr/include/opencv4/opencv2/opencv.hpp"
#include <iostream>
using namespace std;
using namespace cv;//腐蚀
__global__ void erodeInCuda(unsigned char *dataIn, unsigned char *dataOut, Size erodeElement, int imgWidth, int imgHeight)
{//Grid中x方向上的索引int xIndex = threadIdx.x + blockIdx.x * blockDim.x;//Grid中y方向上的索引int yIndex = threadIdx.y + blockIdx.y * blockDim.y;int elementWidth = erodeElement.width;int elementHeight = erodeElement.height;int halfEW = elementWidth / 2;int halfEH = elementHeight / 2;//初始化输出图dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;//防止越界if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH){for (int i = -halfEH; i < halfEH + 1; i++){for (int j = -halfEW; j < halfEW + 1; j++){if (dataIn[(i + yIndex) * imgWidth + xIndex + j] < dataOut[yIndex * imgWidth + xIndex]){dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];}}}}
}//膨胀
__global__ void dilateInCuda(unsigned char *dataIn, unsigned char *dataOut, Size dilateElement, int imgWidth, int imgHeight)
{//Grid中x方向上的索引int xIndex = threadIdx.x + blockIdx.x * blockDim.x;//Grid中y方向上的索引int yIndex = threadIdx.y + blockIdx.y * blockDim.y;int elementWidth = dilateElement.width;int elementHeight = dilateElement.height;int halfEW = elementWidth / 2;int halfEH = elementHeight / 2;//初始化输出图dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;//防止越界if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH){for (int i = -halfEH; i < halfEH + 1; i++){for (int j = -halfEW; j < halfEW + 1; j++){if (dataIn[(i + yIndex) * imgWidth + xIndex + j] > dataOut[yIndex * imgWidth + xIndex]){dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];}}}}
}int main()
{Mat srcImg = imread("1.jpg");//输入图片Mat grayImg = imread("1.jpg", 0);//输入的灰度图cv::namedWindow("srcImg", 0);cv::imshow("srcImg", srcImg);cv::waitKey(1000);cv::namedWindow("grayImg", 0);cv::imshow("grayImg", grayImg);cv::waitKey(1000);unsigned char *d_in;//输入图片在GPU内的内存unsigned char *d_out1;//腐蚀后输出图片在GPU内的内存unsigned char *d_out2;//膨胀后输出图片在GPU内的内存int imgWidth = grayImg.cols;int imgHeight = grayImg.rows;Mat dstImg1(imgHeight, imgWidth, CV_8UC1, Scalar(0));//腐蚀后输出图片在CPU内的内存Mat dstImg2(imgHeight, imgWidth, CV_8UC1, Scalar(0));//膨胀后输出图片在CPU内的内存//在GPU中开辟内存cudaMalloc((void**)&d_in, imgWidth * imgHeight * sizeof(unsigned char));cudaMalloc((void**)&d_out1, imgWidth * imgHeight * sizeof(unsigned char));cudaMalloc((void**)&d_out2, imgWidth * imgHeight * sizeof(unsigned char));//将输入图片传入GPUcudaMemcpy(d_in, grayImg.data, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);//定义block中thread的分布dim3 threadsPerBlock(32, 32);//根据输入图片的宽高定义block的大小dim3 blocksPerGrid((imgWidth + threadsPerBlock.x - 1) / threadsPerBlock.x, (imgHeight + threadsPerBlock.y - 1) / threadsPerBlock.y);//算子大小Size Element(3, 5);//CUDA腐蚀erodeInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out1, Element, imgWidth, imgHeight);//将结果传回CPUcudaMemcpy(dstImg1.data, d_out1, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);//CPU内腐蚀(OpenCV实现)Mat erodeImg;Mat element = getStructuringElement(MORPH_RECT, Size(3, 5));erode(grayImg, erodeImg, element);//CUDA膨胀dilateInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out2, Element, imgWidth, imgHeight);//将结果传回CPUcudaMemcpy(dstImg2.data, d_out2, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);//CPU内膨胀(OpenCV实现)Mat dilateImg;dilate(grayImg, dilateImg, element);cv::namedWindow("dilateImg11111", 0);cv::imshow("dilateImg11111", dilateImg);cv::waitKey(1000);cv::waitKey(0);return 0;
}

linux如何编译cuda和opencv代码

 nvcc  `pkg-config opencv4 --cflags --libs` test5.cu -o test5 #或者nvcc `pkg-config --libs opencv4` -L. -L/usr/local/cuda/lib -lcuda -lcudart `pkg-config --cflags opencv4` -I. -I/usr/local/cuda/include test5.cu -o test5

耗时情况

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <cuda.h>
#include <device_functions.h>
#include <opencv2/opencv.hpp>
#include <time.h>//#include "/usr/include/opencv4/opencv2/opencv.hpp"
#include <iostream>
using namespace std;
using namespace cv;//腐蚀
__global__ void erodeInCuda(unsigned char *dataIn, unsigned char *dataOut, Size erodeElement, int imgWidth, int imgHeight)
{//Grid中x方向上的索引int xIndex = threadIdx.x + blockIdx.x * blockDim.x;//Grid中y方向上的索引int yIndex = threadIdx.y + blockIdx.y * blockDim.y;int elementWidth = erodeElement.width;int elementHeight = erodeElement.height;int halfEW = elementWidth / 2;int halfEH = elementHeight / 2;//初始化输出图dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;//防止越界if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH){for (int i = -halfEH; i < halfEH + 1; i++){for (int j = -halfEW; j < halfEW + 1; j++){if (dataIn[(i + yIndex) * imgWidth + xIndex + j] < dataOut[yIndex * imgWidth + xIndex]){dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];}}}}
}//膨胀
__global__ void dilateInCuda(unsigned char *dataIn, unsigned char *dataOut, Size dilateElement, int imgWidth, int imgHeight)
{//Grid中x方向上的索引int xIndex = threadIdx.x + blockIdx.x * blockDim.x;//Grid中y方向上的索引int yIndex = threadIdx.y + blockIdx.y * blockDim.y;int elementWidth = dilateElement.width;int elementHeight = dilateElement.height;int halfEW = elementWidth / 2;int halfEH = elementHeight / 2;//初始化输出图dataOut[yIndex * imgWidth + xIndex] = dataIn[yIndex * imgWidth + xIndex];;//防止越界if (xIndex > halfEW && xIndex < imgWidth - halfEW && yIndex > halfEH && yIndex < imgHeight - halfEH){for (int i = -halfEH; i < halfEH + 1; i++){for (int j = -halfEW; j < halfEW + 1; j++){if (dataIn[(i + yIndex) * imgWidth + xIndex + j] > dataOut[yIndex * imgWidth + xIndex]){dataOut[yIndex * imgWidth + xIndex] = dataIn[(i + yIndex) * imgWidth + xIndex + j];}}}}
}int main()
{Mat srcImg = imread("1.jpg");//输入图片Mat grayImg = imread("1.jpg", 0);//输入的灰度图cv::namedWindow("srcImg", 0);cv::imshow("srcImg", srcImg);cv::waitKey(1000);cv::namedWindow("grayImg", 0);cv::imshow("grayImg", grayImg);cv::waitKey(1000);unsigned char *d_in;//输入图片在GPU内的内存unsigned char *d_out1;//腐蚀后输出图片在GPU内的内存unsigned char *d_out2;//膨胀后输出图片在GPU内的内存int imgWidth = grayImg.cols;int imgHeight = grayImg.rows;Mat dstImg1(imgHeight, imgWidth, CV_8UC1, Scalar(0));//腐蚀后输出图片在CPU内的内存Mat dstImg2(imgHeight, imgWidth, CV_8UC1, Scalar(0));//膨胀后输出图片在CPU内的内存const clock_t time_1 = clock();    //在GPU中开辟内存cudaMalloc((void**)&d_in, imgWidth * imgHeight * sizeof(unsigned char));cudaMalloc((void**)&d_out1, imgWidth * imgHeight * sizeof(unsigned char));cudaMalloc((void**)&d_out2, imgWidth * imgHeight * sizeof(unsigned char));//将输入图片传入GPUcudaMemcpy(d_in, grayImg.data, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyHostToDevice);//定义block中thread的分布dim3 threadsPerBlock(32, 32);//根据输入图片的宽高定义block的大小dim3 blocksPerGrid((imgWidth + threadsPerBlock.x - 1) / threadsPerBlock.x, (imgHeight + threadsPerBlock.y - 1) / threadsPerBlock.y);//算子大小Size Element(3, 5);//CUDA腐蚀const clock_t time_5 = clock();  erodeInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out1, Element, imgWidth, imgHeight);const clock_t time_6 = clock();   float diff_3 =(double)( time_6 - time_5 )/1000.0;  printf("\n\n[ALG][%s][%4d]diff_3 = %f ms \n",__FUNCTION__, __LINE__, diff_3);//将结果传回CPUcudaMemcpy(dstImg1.data, d_out1, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);const clock_t time_2 = clock();   float diff_1 = (double)(time_2 - time_1 )/1000.0; printf("\n\n[ALG][%s][%4d]diff_1 = %f ms\n",__FUNCTION__, __LINE__, diff_1);//CPU内腐蚀(OpenCV实现)const clock_t time_3 = clock();  Mat erodeImg;Mat element = getStructuringElement(MORPH_RECT, Size(3, 5));erode(grayImg, erodeImg, element);const clock_t time_4 = clock();   float diff_2 =(double)( time_4 - time_3 )/1000.0;  printf("\n\n[ALG][%s][%4d]diff_2 = %f ms \n",__FUNCTION__, __LINE__, diff_2);//CUDA膨胀dilateInCuda << <blocksPerGrid, threadsPerBlock >> >(d_in, d_out2, Element, imgWidth, imgHeight);//将结果传回CPUcudaMemcpy(dstImg2.data, d_out2, imgWidth * imgHeight * sizeof(unsigned char), cudaMemcpyDeviceToHost);//CPU内膨胀(OpenCV实现)Mat dilateImg;dilate(grayImg, dilateImg, element);imwrite("dilateImg_230117.jpg", dilateImg);cv::namedWindow("dilateImg11111", 0);cv::imshow("dilateImg11111", dilateImg);cv::waitKey(1000);cv::waitKey(0);return 0;
}

参考:https://blog.csdn.net/MGotze/article/details/76448702

cuda学习笔记5——CUDA实现图像形态学腐蚀、膨胀相关推荐

  1. CUDA学习笔记之 CUDA存储器模型

    CUDA学习笔记之 CUDA存储器模型 标签: cuda存储bindingcache编程api 2010-12-14 01:33 1223人阅读 评论(0) 收藏 举报 分类: CUDA(26) GP ...

  2. Opencv学习笔记(六)图像形态学处理

    文章目录 形态学变换 主要操作 代码 参考 形态学变换 形态学处理主要应用于二值图像,目的是微调分割区域的形状,获得比较理想的目标图像.腐蚀和膨胀是形态学处理的基础操作,其他操作包括:开运算.闭运算. ...

  3. OpenCV3学习(4.3)——图像形态学(膨胀,腐蚀)

    在图像处理技术中,有一些的操作会对图像的形态发生改变,这些操作一般称之为形态学操作(phology).数学形态学是基于集合论的图像处理方法,最早出现在生物学的形态与结构中,图像处理中的形态学操作用于图 ...

  4. CUDA学习笔记之程序优化

    CUDA学习笔记之程序优化 标签: cuda优化conflict存储算法数学计算 2010-01-05 17:18 5035人阅读 评论(4) 收藏 举报 分类: CUDA(6) 版权声明:本文为博主 ...

  5. 深度学习(三十六)异构计算CUDA学习笔记(1)

    异构计算CUDA学习笔记(1) 原文地址:http://blog.csdn.net/hjimce/article/details/51506207 作者:hjimce 近日因为感觉自己在深度学习工程化 ...

  6. CUDA学习笔记(持续更新——蜗速)

    CUDA学习笔记(持续更新--蜗速) 1.CUDA 程序实现流程如下 2.内存管理 3.核函数 4.全局数据访问唯一索引 5.设备管理 附录代码 1.CUDA 程序实现流程如下 将数据从CPU内存拷贝 ...

  7. Cuda学习笔记(一)——sm流处理器簇对blocks的调度策略

    由于GPU目前在各行各业的广泛应用,无论是深度学习.大数据.云计算等都离不开GPU的并行加速,前阵子自学了Cuda-c编程,希望将来的研究工作能够用得上. Cuda系列总共有4篇,这里主要用于记录本人 ...

  8. OpenCV学习笔记(七):形态学morpholgy(1):腐蚀/膨胀:enrode(),dilate()

    OpenCV学习笔记(七):形态学(morpholgy):腐蚀/膨胀:enrode(),dilate() 数学形态学(Mathematical morphology) 是一门建立在格论和拓扑学基础之上 ...

  9. OpenCV学习笔记(八):形态学morpholgy(2):开/闭运算,形态学梯度、顶帽/黑帽morphologyEx()

    OpenCV学习笔记(八):形态学morpholgy(2):开.闭运算,形态学梯度.顶帽.黑帽:morphologyEx() 数学形态学(Mathematical morphology) 是一门建立在 ...

最新文章

  1. php鼠标经过显示文本,CSS_HTML和CSS做网页实例教程:鼠标滑过文字改变,关于HTML+CSS的实例效果很多, - phpStudy...
  2. the largest issue in management
  3. C++对自定义结构体变量排序
  4. ActiveMQ消息优先级:工作原理
  5. java gps 距离计算_Java教程之地图中计算两个GPS坐标点的距离
  6. 重磅资料!Github上的PHP资源汇总大全
  7. 都在夸官方文档 Vue.js 2021 年度报告出炉!
  8. c++ 结构体初始化_C/C++编程笔记:C语言和C++语言的 struct 对比!区别在哪里?
  9. 是驴是骡,遛一遛就知道了
  10. 基于OpenCV的图像去雾程序(Single Image Haze Removal Using Dark Channel Prior)
  11. MySql学习笔记(六):扫描范围
  12. 关于E-Prime 2.0 无法呈现音频的一种解决方案
  13. cad图形如何导入到奥维地图_如何将CAD图导入奥维地图
  14. 来兄弟连了,你还在用面向过程唱高音歌曲的你就OUT了
  15. CAN通信波特率计算
  16. Mac 上设置锁屏快捷键
  17. css 的rgba属性
  18. 如何做好网站建设需求分析
  19. 5G多卡聚合路由器如何使用
  20. You called this URL via POST, but the URL doesn't end in a slash and you have APPEND_SLASH set. Djan

热门文章

  1. 机器学习实例--预测美国人口收入状况
  2. 使用Qt合并图片的算法
  3. Vue中使用echart实现中国地图统计图
  4. xtu1395 字符频度
  5. 联想小新Air13高定黑使用初体验
  6. 用老虎机小例子理解Multi-armed Bandit Learning
  7. 用计算机打青春不打烊,彩色的青春不打烊
  8. 可兼容NAND512W3A2SZA6E物料的AFND1208U1-CKAI
  9. Gitee+Typora+PicGo+脚本
  10. 淮师计算机网络试题库,淮阴师范学院(淮师)计算机基础一至六章习题