-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_cuda_i128_div.cu
More file actions
89 lines (71 loc) · 2.87 KB
/
test_cuda_i128_div.cu
File metadata and controls
89 lines (71 loc) · 2.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
// Copyright Matt Borland 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#define BOOST_INT128_ALLOW_SIGN_CONVERSION
#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION
#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <limits>
#include <boost/safe_numbers/signed_integers.hpp>
#include <boost/safe_numbers/cuda_error_reporting.hpp>
#include <boost/safe_numbers/detail/int128/random.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"
#include <cuda_runtime.h>
using test_type = boost::safe_numbers::i128;
using basis_type = test_type::basis_type;
__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < numElements)
{
out[i] = in[i] / in2[i];
}
}
int main(void)
{
std::mt19937_64 rng{42};
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;
cuda_managed_ptr<test_type> input_vector(numElements);
cuda_managed_ptr<test_type> input_vector2(numElements);
cuda_managed_ptr<test_type> output_vector(numElements);
boost::random::uniform_int_distribution<basis_type> dist{(std::numeric_limits<basis_type>::min)(), (std::numeric_limits<basis_type>::max)()};
boost::random::uniform_int_distribution<basis_type> dist2{basis_type{1}, (std::numeric_limits<basis_type>::max)()};
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = test_type{dist(rng)};
input_vector2[i] = test_type{dist2(rng)};
}
int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;
boost::safe_numbers::device_error_context ctx;
watch w;
cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements);
ctx.synchronize();
std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl;
std::vector<test_type> results;
results.reserve(numElements);
w.reset();
for (int i = 0; i < numElements; ++i)
{
results.push_back(input_vector[i] / input_vector2[i]);
}
double t = w.elapsed();
for (int i = 0; i < numElements; ++i)
{
if (output_vector[i] != results[i])
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}
std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";
return 0;
}