|
| 1 | +#include <chrono> |
| 2 | +#include <cstdio> |
| 3 | +#include <cstdlib> |
| 4 | +#include <stdexcept> |
| 5 | +#include <vector> |
| 6 | + |
| 7 | + |
| 8 | +#include <cublas_v2.h> |
| 9 | +#include <cuda_runtime.h> |
| 10 | + |
| 11 | +// CUDA API error checking |
| 12 | +#define CUDA_CHECK(err) \ |
| 13 | + do { \ |
| 14 | + cudaError_t err_ = (err); \ |
| 15 | + if (err_ != cudaSuccess) { \ |
| 16 | + std::printf("CUDA error %d at %s:%d\n", err_, __FILE__, __LINE__); \ |
| 17 | + throw std::runtime_error("CUDA error"); \ |
| 18 | + } \ |
| 19 | + } while (0) |
| 20 | + |
| 21 | +// cublas API error checking |
| 22 | +#define CUBLAS_CHECK(err) \ |
| 23 | + do { \ |
| 24 | + cublasStatus_t err_ = (err); \ |
| 25 | + if (err_ != CUBLAS_STATUS_SUCCESS) { \ |
| 26 | + std::printf("cublas error %d at %s:%d\n", err_, __FILE__, __LINE__); \ |
| 27 | + throw std::runtime_error("cublas error"); \ |
| 28 | + } \ |
| 29 | + } while (0) |
| 30 | + |
| 31 | +int main(int argc, char *argv[]) { |
| 32 | + cublasHandle_t cublasH = NULL; |
| 33 | + cudaStream_t stream = NULL; |
| 34 | + |
| 35 | + cudaEvent_t startEvent { nullptr }; |
| 36 | + cudaEvent_t stopEvent { nullptr }; |
| 37 | + float elapsed_gpu_ms {}; |
| 38 | + |
| 39 | + int m, n, k; |
| 40 | + int lda, ldb, ldc; |
| 41 | + double alpha, beta; |
| 42 | + |
| 43 | + printf ("\n This example computes real matrix C=alpha*A*B+beta*C using \n" |
| 44 | + " cuBLAS dgemm, where A, B, and C are matrices and \n" |
| 45 | + " alpha and beta are double precision scalars\n\n"); |
| 46 | + |
| 47 | + int size = 4092; |
| 48 | + m = size, k = size, n = size; |
| 49 | + lda = size, ldb = size, ldc = size; |
| 50 | + printf (" Initializing data for matrix multiplication C=A*B for matrix \n" |
| 51 | + " A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n); |
| 52 | + alpha = 1.0, beta = 0.0; |
| 53 | + |
| 54 | + printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n" |
| 55 | + " performance \n\n"); |
| 56 | + |
| 57 | + double *d_A = nullptr; |
| 58 | + double *d_B = nullptr; |
| 59 | + double *d_C = nullptr; |
| 60 | + |
| 61 | + /* step 1: create cublas handle, bind a stream */ |
| 62 | + CUBLAS_CHECK(cublasCreate(&cublasH)); |
| 63 | + |
| 64 | + CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); |
| 65 | + CUBLAS_CHECK(cublasSetStream(cublasH, stream)); |
| 66 | + |
| 67 | + /* step 2: copy data to device */ |
| 68 | + CUDA_CHECK(cudaMalloc(reinterpret_cast<void **>(&d_A), sizeof(double) * m * k)); |
| 69 | + CUDA_CHECK(cudaMalloc(reinterpret_cast<void **>(&d_B), sizeof(double) * k * n)); |
| 70 | + CUDA_CHECK(cudaMalloc(reinterpret_cast<void **>(&d_C), sizeof(double) * m * n)); |
| 71 | + |
| 72 | + /* step 3: compute */ |
| 73 | + printf (" Computing matrix product using cuBLAS dgemm function \n\n"); |
| 74 | + |
| 75 | + cudaEventCreate( &startEvent, cudaEventBlockingSync ); |
| 76 | + cudaEventRecord( startEvent ); |
| 77 | + |
| 78 | + for (int i =0; i< 10; i++) |
| 79 | + CUBLAS_CHECK( |
| 80 | + cublasDgemm(cublasH, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, d_A, lda, d_B, ldb, &beta, d_C, ldc)); |
| 81 | + |
| 82 | + CUDA_CHECK(cudaStreamSynchronize(stream)); |
| 83 | + |
| 84 | + cudaEventCreate( &stopEvent, cudaEventBlockingSync ); |
| 85 | + cudaEventRecord( stopEvent ); |
| 86 | + cudaEventSynchronize( stopEvent ); |
| 87 | + |
| 88 | + printf ("\n Computations completed.\n\n"); |
| 89 | + |
| 90 | + cudaEventElapsedTime( &elapsed_gpu_ms, startEvent, stopEvent ); |
| 91 | + printf( " Time Elapsed: %0.2f ms \n\n", elapsed_gpu_ms/10); |
| 92 | + |
| 93 | + /* free resources */ |
| 94 | + printf ("\n Deallocating memory \n\n"); |
| 95 | + CUDA_CHECK(cudaFree(d_A)); |
| 96 | + CUDA_CHECK(cudaFree(d_B)); |
| 97 | + CUDA_CHECK(cudaFree(d_C)); |
| 98 | + |
| 99 | + CUBLAS_CHECK(cublasDestroy(cublasH)); |
| 100 | + |
| 101 | + CUDA_CHECK(cudaStreamDestroy(stream)); |
| 102 | + |
| 103 | + CUDA_CHECK(cudaDeviceReset()); |
| 104 | + |
| 105 | + printf (" Example completed. \n\n"); |
| 106 | + return 0; |
| 107 | +} |
0 commit comments