Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/modules/ROOT/pages/cuda.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ https://www.boost.org/LICENSE_1_0.txt

== Description

The types of this library support compilation with NVCC.
All integer types of this library (unsigned `u8`-`u128`, signed `i8`-`i128`, and bounded types) support compilation with NVCC.
To get the safety guarantees, there are some small modifications to the way that CUDA code is written.
Normally you would have something like this:

Expand Down
177 changes: 138 additions & 39 deletions include/boost/safe_numbers/detail/signed_integer_basis.hpp

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions test/cuda_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,41 @@ run test_cuda_u128_gcd.cu ;
run test_cuda_u128_lcm.cu ;
run test_cuda_u128_midpoint.cu ;

# i8 tests
run test_cuda_i8_add.cu ;
run test_cuda_i8_sub.cu ;
run test_cuda_i8_mul.cu ;
run test_cuda_i8_div.cu ;
run test_cuda_i8_mod.cu ;

# i16 tests
run test_cuda_i16_add.cu ;
run test_cuda_i16_sub.cu ;
run test_cuda_i16_mul.cu ;
run test_cuda_i16_div.cu ;
run test_cuda_i16_mod.cu ;

# i32 tests
run test_cuda_i32_add.cu ;
run test_cuda_i32_sub.cu ;
run test_cuda_i32_mul.cu ;
run test_cuda_i32_div.cu ;
run test_cuda_i32_mod.cu ;

# i64 tests
run test_cuda_i64_add.cu ;
run test_cuda_i64_sub.cu ;
run test_cuda_i64_mul.cu ;
run test_cuda_i64_div.cu ;
run test_cuda_i64_mod.cu ;

# i128 tests
run test_cuda_i128_add.cu ;
run test_cuda_i128_sub.cu ;
run test_cuda_i128_mul.cu ;
run test_cuda_i128_div.cu ;
run test_cuda_i128_mod.cu ;

# Examples
run ../examples/cuda.cu ;
run ../examples/cuda_error_handling.cu ;
Expand Down
86 changes: 86 additions & 0 deletions test/test_cuda_i128_add.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright Matt Borland 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#define BOOST_INT128_ALLOW_SIGN_CONVERSION
#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION

#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <limits>
#include <boost/safe_numbers/signed_integers.hpp>
#include <boost/safe_numbers/cuda_error_reporting.hpp>
#include <boost/safe_numbers/detail/int128/random.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

#include <cuda_runtime.h>

using test_type = boost::safe_numbers::i128;
using basis_type = test_type::basis_type;

__global__ void cuda_test(const test_type *in, test_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = in[i] + in[i];
}
}

int main(void)
{
std::mt19937_64 rng{42};

int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

cuda_managed_ptr<test_type> input_vector(numElements);
cuda_managed_ptr<test_type> output_vector(numElements);

boost::random::uniform_int_distribution<basis_type> dist{(std::numeric_limits<basis_type>::min)() / 2, (std::numeric_limits<basis_type>::max)() / 2};
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = test_type{dist(rng)};
}

int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

boost::safe_numbers::device_error_context ctx;
watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), output_vector.get(), numElements);
ctx.synchronize();

std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl;

std::vector<test_type> results;
results.reserve(numElements);
w.reset();
for (int i = 0; i < numElements; ++i)
{
results.push_back(input_vector[i] + input_vector[i]);
}
double t = w.elapsed();

for (int i = 0; i < numElements; ++i)
{
if (output_vector[i] != results[i])
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
89 changes: 89 additions & 0 deletions test/test_cuda_i128_div.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright Matt Borland 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#define BOOST_INT128_ALLOW_SIGN_CONVERSION
#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION

#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <limits>
#include <boost/safe_numbers/signed_integers.hpp>
#include <boost/safe_numbers/cuda_error_reporting.hpp>
#include <boost/safe_numbers/detail/int128/random.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

#include <cuda_runtime.h>

using test_type = boost::safe_numbers::i128;
using basis_type = test_type::basis_type;

__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = in[i] / in2[i];
}
}

int main(void)
{
std::mt19937_64 rng{42};

int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

cuda_managed_ptr<test_type> input_vector(numElements);
cuda_managed_ptr<test_type> input_vector2(numElements);
cuda_managed_ptr<test_type> output_vector(numElements);

boost::random::uniform_int_distribution<basis_type> dist{(std::numeric_limits<basis_type>::min)(), (std::numeric_limits<basis_type>::max)()};
boost::random::uniform_int_distribution<basis_type> dist2{basis_type{1}, (std::numeric_limits<basis_type>::max)()};
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = test_type{dist(rng)};
input_vector2[i] = test_type{dist2(rng)};
}

int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

boost::safe_numbers::device_error_context ctx;
watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements);
ctx.synchronize();

std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl;

std::vector<test_type> results;
results.reserve(numElements);
w.reset();
for (int i = 0; i < numElements; ++i)
{
results.push_back(input_vector[i] / input_vector2[i]);
}
double t = w.elapsed();

for (int i = 0; i < numElements; ++i)
{
if (output_vector[i] != results[i])
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
89 changes: 89 additions & 0 deletions test/test_cuda_i128_mod.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright Matt Borland 2026.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#define BOOST_INT128_ALLOW_SIGN_CONVERSION
#define BOOST_SAFE_NUMBERS_DETAIL_INT128_ALLOW_SIGN_CONVERSION

#include <iostream>
#include <iomanip>
#include <vector>
#include <random>
#include <limits>
#include <boost/safe_numbers/signed_integers.hpp>
#include <boost/safe_numbers/cuda_error_reporting.hpp>
#include <boost/safe_numbers/detail/int128/random.hpp>
#include <boost/random/uniform_int_distribution.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

#include <cuda_runtime.h>

using test_type = boost::safe_numbers::i128;
using basis_type = test_type::basis_type;

__global__ void cuda_test(const test_type *in, const test_type *in2, test_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < numElements)
{
out[i] = in[i] % in2[i];
}
}

int main(void)
{
std::mt19937_64 rng{42};

int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

cuda_managed_ptr<test_type> input_vector(numElements);
cuda_managed_ptr<test_type> input_vector2(numElements);
cuda_managed_ptr<test_type> output_vector(numElements);

boost::random::uniform_int_distribution<basis_type> dist{(std::numeric_limits<basis_type>::min)(), (std::numeric_limits<basis_type>::max)()};
boost::random::uniform_int_distribution<basis_type> dist2{basis_type{1}, (std::numeric_limits<basis_type>::max)()};
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = test_type{dist(rng)};
input_vector2[i] = test_type{dist2(rng)};
}

int threadsPerBlock = 256;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

boost::safe_numbers::device_error_context ctx;
watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(input_vector.get(), input_vector2.get(), output_vector.get(), numElements);
ctx.synchronize();

std::cout << "CUDA kernel done in: " << w.elapsed() << "s" << std::endl;

std::vector<test_type> results;
results.reserve(numElements);
w.reset();
for (int i = 0; i < numElements; ++i)
{
results.push_back(input_vector[i] % input_vector2[i]);
}
double t = w.elapsed();

for (int i = 0; i < numElements; ++i)
{
if (output_vector[i] != results[i])
{
std::cerr << "Result verification failed at element " << i << "!" << std::endl;
return EXIT_FAILURE;
}
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
Loading
Loading