CUDA编程实现求解单源Bellman-Ford最短路径算法

初始化：源节点到自己的距离为0，到其他节点的距离为∞；
对于源节点的邻接节点，比较从源节点加上边的和与其本身距离的值，若小，则更新。以此类推，
循环 V - 1 次（即节点数减1次）。
最后检查是否还可以松弛，如果可以，说明有环，返回false。

核函数结构：

// __global__ GPU执行部分 核函数
// n：顶点数量
// d_mat：矩阵
// d_dist：距离
// blockDim.x是线程块在x方向上的线程数（在x方向上每一个线程块有多少个线程）
// blockIdx.x是线程块在x方向上的索引
// threadIdx.x是线程在X方向上的索引（在自己的线程块内）
// gridDim.x表示x方向有几个block
__global__ void bellman_ford_one_iter(int n, int *d_mat, int *d_dist, bool *d_has_next, int iter_num) {int global_tid = blockDim.x * blockIdx.x + threadIdx.x; // 计算线程号global_tid 一维只需要在x方向上做计算int elementSkip = blockDim.x * gridDim.x;if(global_tid >= n) return;for(int u = 0 ; u < n ; u ++) {for(int v = global_tid; v < n; v+= elementSkip) {int weight = d_mat[u * n + v];if(weight < INF) {// 松弛操作int new_dist = d_dist[u] + weight;if(new_dist < d_dist[v]) {d_dist[v] = new_dist;*d_has_next = true;}}}}
}

核函数线程分配

源代码

// 这是CUDA版本的单源Bellman-Ford最短路径算法// 编译
// nvcc cuda.cu -o cuda
// 运行
// ./cuda input1.txt <每个网格的线程块的数量> <每个线程块的线程数量>// 可以在output.txt中看到结果记录#include <string>
#include <cassert>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <iomanip>
#include <cstring>
#include <sys/time.h>
#include <cuda_runtime.h>
#include <device_launch_parameters.h>// using namespace std 的进化版本
using std::string;
using std::cout;
using std::endl;// 无穷大
#define INF 1000000/** This is a CHECK function to check CUDA calls*/
#define CHECK(call) {                                                                \const cudaError_t error = call;                                                  \if (error != cudaSuccess) {                                                      \fprintf(stderr, "Error: %s:%d, ", __FILE__, __LINE__);                       \fprintf(stderr, "code: %d, reason: %s\n", error, cudaGetErrorString(error)); \exit(1);                                                                     \}                                                                                \
}// utils是实用程序功能的命名空间
// 包括I/O（读取输入文件和打印结果）和矩阵维度转换（2D->1D）函数
namespace utils {int N;    // 顶点数量int *mat; // the adjacency matrix 邻接矩阵void abort_with_error_message(string msg) {std::cerr << msg << endl;abort();}// 将二维转换为一维int convert_dimension_2D_1D(int x, int y, int n) {return x * n + y;}// 读取文件 获取整个矩阵 并经过转换存储在一维数组中int read_file(string filename) {std::ifstream inputf(filename, std::ifstream::in);if (!inputf.good()) {abort_with_error_message("ERROR OCCURRED WHILE READING INPUT FILE");}inputf >> N;// 对输入的矩阵做一个 20MB * 20MB 的限制 (400MB)assert(N < (1024 * 1024 * 20));mat = (int *) malloc(N * N * sizeof(int));for (int i = 0; i < N; i++)for (int j = 0; j < N; j++) {// 通过convert_dimension_2D_1D函数将二维数据转换成一维存放到mat数组中inputf >> mat[convert_dimension_2D_1D(i, j, N)];}return 0;}// 结果输出到output.txt中int print_result(bool has_negative_cycle, int *dist) {std::ofstream outputf("output.txt", std::ofstream::out);if (!has_negative_cycle) { // 不含有负权环则输出正确结果到文件中for (int i = 0; i < N; i++) {if (dist[i] > INF)dist[i] = INF;outputf << dist[i] << '\n';}outputf.flush();} else { // 含有负权环 输出FOUND NEGATIVE CYCLE!outputf << "FOUND NEGATIVE CYCLE!" << endl;}outputf.close();return 0;}
} // utils命名空间// __global__ GPU执行部分 核函数
// n：顶点数量
// d_mat：矩阵
// d_dist：距离
// blockDim.x是线程块在x方向上的线程数（在x方向上每一个线程块有多少个线程）
// blockIdx.x是线程块在x方向上的索引
// threadIdx.x是线程在X方向上的索引（在自己的线程块内）
// gridDim.x表示x方向有几个block
__global__ void bellman_ford_one_iter(int n, int *d_mat, int *d_dist, bool *d_has_next, int iter_num) {int global_tid = blockDim.x * blockIdx.x + threadIdx.x; // 计算线程号global_tid 一维只需要在x方向上做计算int elementSkip = blockDim.x * gridDim.x;if(global_tid >= n) return;for(int u = 0 ; u < n ; u ++) {for(int v = global_tid; v < n; v+= elementSkip) {int weight = d_mat[u * n + v];if(weight < INF) {// 松弛操作int new_dist = d_dist[u] + weight;if(new_dist < d_dist[v]) {d_dist[v] = new_dist;*d_has_next = true;}}}}
}// Bellman-Ford算法。找到从顶点0到其他顶点的最短路径。
/*** @param blockPerGrid 每个网格中线程块的个数* @param threadsPerBlock 每个线程块中线程的个数* @param n input size* @param *mat input adjacency matrix* @param *dist 距离数组* @param *has_negative_cycle 一个记录是否含有负权环的变量*/
void bellman_ford(int blocksPerGrid, int threadsPerBlock, int n, int *mat, int *dist, bool *has_negative_cycle) {dim3 blocks(blocksPerGrid);dim3 threads(threadsPerBlock);// 计数 记录循环次数int iter_num = 0;int *d_mat, *d_dist;bool *d_has_next, h_has_next;// GPU全局内存分配// cudaError_t cudaMalloc(void **devPtr, size_t size);cudaMalloc(&d_mat, sizeof(int) * n * n);cudaMalloc(&d_dist, sizeof(int) *n);cudaMalloc(&d_has_next, sizeof(bool));// 是否有负权环的标志位*has_negative_cycle = false;// 初始化 将所有距离置为无穷大for(int i = 0 ; i < n; i ++) {dist[i] = INF;}// 到自己的距离置为0dist[0] = 0;// CPU与GPU内存同步拷贝// cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, cudaMemcpyKind kind)// kind: cudaMemcpyHostToHost, cudaMemcpyHostToDevice, cudaMemcpyDeviceToHost, cudaMemcpyDeviceToDevice, or cudaMemcpyDefaultcudaMemcpy(d_mat, mat, sizeof(int) * n * n, cudaMemcpyHostToDevice);cudaMemcpy(d_dist, dist, sizeof(int) * n, cudaMemcpyHostToDevice);for(;;) {h_has_next = false;cudaMemcpy(d_has_next, &h_has_next, sizeof(bool), cudaMemcpyHostToDevice);// 松弛操作 由GPU进行bellman_ford_one_iter<<<blocks, threads>>>(n, d_mat, d_dist, d_has_next, iter_num);CHECK(cudaDeviceSynchronize());cudaMemcpy(&h_has_next, d_has_next, sizeof(bool), cudaMemcpyDeviceToHost);iter_num++; // 循环次数加一if(iter_num >= n-1) { // 如果循环次数大于n-1 说明存在负权环*has_negative_cycle = true;break;}if(!h_has_next) {break;}}if(! *has_negative_cycle) {cudaMemcpy(dist, d_dist, sizeof(int) * n, cudaMemcpyDeviceToHost);}// GPU全局内存释放// cudaError_t cudaFree(void *devPtr);cudaFree(d_mat);cudaFree(d_dist);cudaFree(d_has_next);
}int main(int argc, char **argv) {if (argc <= 1) { // 没有写输入文件的情况utils::abort_with_error_message("INPUT FILE WAS NOT FOUND!");}if (argc <= 3) { // 没有写块数量或者进程数的情况utils::abort_with_error_message("blocksPerGrid or threadsPerBlock WAS NOT FOUND!");}// 保存输入文件名string filename = argv[1];// 每个网格中线程块的个数int blockPerGrid = atoi(argv[2]);// 每个线程块中线程的个数int threadsPerBlock = atoi(argv[3]);int *dist;bool has_negative_cycle = false;assert(utils::read_file(filename) == 0);dist = (int *) calloc(sizeof(int), utils::N);// 计时器timeval start_wall_time_t, end_wall_time_t;float ms_wall;cudaDeviceReset();// 开始计时gettimeofday(&start_wall_time_t, nullptr);//bellman-ford算法bellman_ford(blockPerGrid, threadsPerBlock, utils::N, utils::mat, dist, &has_negative_cycle);CHECK(cudaDeviceSynchronize());// 结束计时gettimeofday(&end_wall_time_t, nullptr);ms_wall = ((end_wall_time_t.tv_sec - start_wall_time_t.tv_sec) * 1000 * 1000+ end_wall_time_t.tv_usec - start_wall_time_t.tv_usec) / 1000.0;std::cerr.setf(std::ios::fixed);std::cerr << std::setprecision(6) << "Time(s): " << (ms_wall/1000.0) << endl;utils::print_result(has_negative_cycle, dist);free(dist);free(utils::mat);return 0;
}

CUDA编程实现求解单源Bellman-Ford最短路径算法相关推荐

求解单源最短路径的几种算法
方法一:Dijkstra算法 #include<iostream> #include<algorithm> #include<cstring> #include&l ...
c++ 单源最短路径-狄杰斯特拉算法
文章目录源码输出狄杰斯特拉算法适用范围:没有负权重的图, 可以有环,有向无向图均可求解问题:单源最短路径时间复杂度:V^2 源码 #include <iostream> #in ...
【PAT数据结构与算法题目集】旅游规划（单源最短路径，长度+路径查找）
[PAT数据结构与算法题目集] 旅游规划(单源最短路径,长度+路径查找) 题目有了一张自驾旅游路线图,你会知道城市间的高速公路长度.以及该公路要收取的过路费.现在需要你写一个程序,帮助前来咨询的游客 ...
spfa 单源最短路究极算法
学习博客链接:SPFA 求单源最短路的SPFA算法的全称是:Shortest Path Faster Algorithm. SPFA算法是西南交通大学段凡丁于1994年发表的. 从名 ...
Dijkstra算法求解单源最短路径问题
文章目录一前言二 Dijkstra 算法讲解 1. 贪心算法的证明 2. 算法实现说明 3. 初版Dijkstra算法代码三时间复杂度优化 1. 优化策略 2. 优化后的代码四结语一 ...
单源最短路径问题（dijkstra算法）
一.问题描述给定一个随机带权有向图,每条边的权是一个实数.另外给定图中一个顶点,称为源.计算源到各顶点的最短路径长度(即距离),要求能随机生成图,随机指定源点计算出到顶点的最短距离. 二.解题思路 ...
图论(四)------非负权有向图的单源最短路径问题，Dijkstra算法
Dijkstra算法解决了有向图G=(V,E)上带权的单源最短路径问题,但要求所有边的权值非负. Dijkstra算法是贪婪算法的一个很好的例子.设置一顶点集合S,从源点s到集合中的顶点的最终最短路径 ...
单源最短路之Dijkstra算法代码模板
常用算法代码模板总结目录注意事项数组定义实现步骤练习例题:Dijkstra求最短路注意事项用来求单源最短路所有边权都是正数,时间复杂度O(n^2) 可通过堆优化达到时间复杂度为O(ml ...
【单源最短路】Dijkstra算法求最短路
题目描述给定一个 n 个点 m 条边的有向图,图中可能存在重边和自环,所有边权均为正值. 请你求出 1 号点到 n 号点的最短距离,如果无法从 1 号点走到 n 号点,则输出 −1. 输入格式第一 ...

CUDA编程实现求解单源Bellman-Ford最短路径算法

CUDA编程实现求解单源Bellman-Ford最短路径算法相关推荐

最新文章

热门文章

CUDA编程实现求解单源Bellman-Ford最短 路径算法

CUDA编程实现求解单源Bellman-Ford最短 路径算法相关推荐

最新文章

热门文章

CUDA编程实现求解单源Bellman-Ford最短路径算法

CUDA编程实现求解单源Bellman-Ford最短路径算法相关推荐