From 4935d1da4f0675b547fc5b72fa5d9cb280444881 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 7 Apr 2026 14:48:25 -0400 Subject: [PATCH 1/5] Add example of what will happen if user uses CUDA API --- ...da_error_handling_without_error_context.cu | 124 ++++++++++++++++++ test/cuda_jamfile | 1 + 2 files changed, 125 insertions(+) create mode 100644 examples/cuda_error_handling_without_error_context.cu diff --git a/examples/cuda_error_handling_without_error_context.cu b/examples/cuda_error_handling_without_error_context.cu new file mode 100644 index 0000000..aeebd43 --- /dev/null +++ b/examples/cuda_error_handling_without_error_context.cu @@ -0,0 +1,124 @@ +// Copyright 2026 Matt Borland +// Distributed under the Boost Software License, Version 1.0. +// https://www.boost.org/LICENSE_1_0.txt + +// This example demonstrates how to catch arithmetic errors that occur +// on a CUDA device using device_error_context. When a safe_numbers +// operation overflows on the GPU, the error is captured in managed +// memory and rethrown with BOOST_THROW_EXCEPTION on the host when +// you call ctx.synchronize(). +// +// The device_error_context manages a dynamically allocated managed +// memory buffer. When an error is detected, synchronize() clears the +// error state and throws. After catching the exception, the same +// context can be reused immediately for new kernel launches. + +#include +#include +#include +#include + +#include + +using test_type = boost::safe_numbers::u32; +using basis_type = test_type::basis_type; + +// This kernel deliberately overflows: it adds 1 to the maximum u32 value +__global__ void overflow_kernel(test_type* out) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i == 0) + { + const test_type max_val {(std::numeric_limits::max)()}; + out[0] = max_val + test_type{1}; // Overflow! + } +} + +// This kernel performs valid arithmetic +__global__ void safe_kernel(const test_type* in, test_type* out, int n) +{ + int i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i < n) + { + out[i] = in[i] + test_type{1}; + } +} + +int main() +{ + // --------------------------------------------------------------- + // Step 1: Launch a kernel that overflows + // + // Instead of using the booost::safe_numbers::cuda_error_context + // we will use standard CUDA machinery in case the user forgets + // or does not want to use it. + // --------------------------------------------------------------- + + test_type* result = nullptr; + cudaMallocManaged(&result, sizeof(test_type)); + cudaDeviceSynchronize(); + + std::cout << "=== Launching kernel that overflows ===" << std::endl; + + overflow_kernel<<<1, 1>>>(result); + auto status = cudaDeviceSynchronize(); + + // The default should have trapped the kernel, leading to failure conditions + // Attempt to reset the device (which we know fails) + if (status != cudaSuccess) + { + const auto error = cudaGetLastError(); + std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl; + cudaDeviceReset(); + std::cerr << "Kernel has been reset via CUDA API" << std::endl; + } + + // --------------------------------------------------------------- + // Step 2: Try to launch another kernel from the same process + // + // Since we have trapped the CUDA device in the background, + // it is unable to be launched again from the same process. + // This error cannot be cleared even though we called cudaDeviceReset() + // --------------------------------------------------------------- + + std::cout << "\n=== Launching kernel with valid arithmetic ===" << std::endl; + + test_type* data = nullptr; + test_type* out = nullptr; + + cudaMallocManaged(&data, 4 * sizeof(test_type)); + cudaMallocManaged(&out, 4 * sizeof(test_type)); + status = cudaDeviceSynchronize(); + + if (status != cudaSuccess) + { + const auto error = cudaGetLastError(); + std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl; + } + + data[0] = test_type{10}; + data[1] = test_type{20}; + data[2] = test_type{30}; + data[3] = test_type{40}; + + safe_kernel<<<1, 4>>>(data, out, 4); + status = cudaDeviceSynchronize(); + + if (status != cudaSuccess) + { + const auto error = cudaGetLastError(); + std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl; + } + + // --------------------------------------------------------------- + // Cleanup + // --------------------------------------------------------------- + + cudaFree(result); + cudaFree(data); + cudaFree(out); + + return 0; +} diff --git a/test/cuda_jamfile b/test/cuda_jamfile index 43241bc..24b7ddd 100644 --- a/test/cuda_jamfile +++ b/test/cuda_jamfile @@ -312,3 +312,4 @@ run test_cuda_u128_midpoint.cu ; # Examples run ../examples/cuda.cu ; run ../examples/cuda_error_handling.cu ; +run ../examples/cuda_error_handling_without_error_context.cu ; From b8cb3720b925fe0526020675cdfcfa5f31d7f807 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 7 Apr 2026 14:54:23 -0400 Subject: [PATCH 2/5] Fix copy paste error --- examples/cuda_error_handling_without_error_context.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/cuda_error_handling_without_error_context.cu b/examples/cuda_error_handling_without_error_context.cu index aeebd43..eab3869 100644 --- a/examples/cuda_error_handling_without_error_context.cu +++ b/examples/cuda_error_handling_without_error_context.cu @@ -70,7 +70,7 @@ int main() if (status != cudaSuccess) { const auto error = cudaGetLastError(); - std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl; + std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl; cudaDeviceReset(); std::cerr << "Kernel has been reset via CUDA API" << std::endl; } @@ -95,7 +95,7 @@ int main() if (status != cudaSuccess) { const auto error = cudaGetLastError(); - std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl; + std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl; } data[0] = test_type{10}; @@ -109,7 +109,7 @@ int main() if (status != cudaSuccess) { const auto error = cudaGetLastError(); - std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl; + std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl; } // --------------------------------------------------------------- From 7642d801348341dd718a171d1fbb49c7179e982f Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 7 Apr 2026 15:01:00 -0400 Subject: [PATCH 3/5] Replace segfault with comment --- ...da_error_handling_without_error_context.cu | 39 ++++++++++++------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/examples/cuda_error_handling_without_error_context.cu b/examples/cuda_error_handling_without_error_context.cu index eab3869..eeb704d 100644 --- a/examples/cuda_error_handling_without_error_context.cu +++ b/examples/cuda_error_handling_without_error_context.cu @@ -97,28 +97,41 @@ int main() const auto error = cudaGetLastError(); std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl; } + else + { + // If we had not checked status, this following code would have terminated with SegFault at data[0] - data[0] = test_type{10}; - data[1] = test_type{20}; - data[2] = test_type{30}; - data[3] = test_type{40}; + data[0] = test_type{10}; + data[1] = test_type{20}; + data[2] = test_type{30}; + data[3] = test_type{40}; - safe_kernel<<<1, 4>>>(data, out, 4); - status = cudaDeviceSynchronize(); + safe_kernel<<<1, 4>>>(data, out, 4); + status = cudaDeviceSynchronize(); - if (status != cudaSuccess) - { - const auto error = cudaGetLastError(); - std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl; + if (status != cudaSuccess) + { + const auto error = cudaGetLastError(); + std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl; + } } // --------------------------------------------------------------- // Cleanup // --------------------------------------------------------------- - cudaFree(result); - cudaFree(data); - cudaFree(out); + if (result != nullptr) + { + cudaFree(result); + } + if (data != nullptr) + { + cudaFree(data); + } + if (out != nullptr) + { + cudaFree(out); + } return 0; } From 8e563c97e5fd93f717ec0ab00b22308f922267df Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 7 Apr 2026 15:05:47 -0400 Subject: [PATCH 4/5] Add this example to examples.adoc --- doc/modules/ROOT/pages/examples.adoc | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc index fb31fb3..8122f13 100644 --- a/doc/modules/ROOT/pages/examples.adoc +++ b/doc/modules/ROOT/pages/examples.adoc @@ -608,3 +608,29 @@ result[2] = 31 result[3] = 41 ---- ==== + +[#examples_cuda_error_handling_without_error_context] +== CUDA Error Handling Without Error Context + +This example demonstrates what happens when a safe_numbers overflow occurs on a CUDA device *without* using `device_error_context`. +The overflow triggers a device-side trap that corrupts the CUDA context, making `cudaDeviceSynchronize()` report an unspecified launch failure. +Even after calling `cudaDeviceReset()`, the process cannot launch any further kernels — all subsequent CUDA operations fail. +This motivates the use of `device_error_context` shown in the previous example. + +.This https://github.com/boostorg/safe_numbers/blob/develop/examples/cuda_error_handling_without_error_context.cu[example] demonstrates the consequences of not using `device_error_context` for GPU error handling. +==== +[source, c++] +---- +include::example$cuda_error_handling_without_error_context.cu[] +---- + +Output: +---- +=== Launching kernel that overflows === +Kernel failed with error: unspecified launch failure +Kernel has been reset via CUDA API + +=== Launching kernel with valid arithmetic === +Kernel failed with error: CUDA-capable device(s) is/are busy or unavailable +---- +==== From ee44630567cc32a836655b38e174ee1145b98712 Mon Sep 17 00:00:00 2001 From: Matt Borland Date: Tue, 7 Apr 2026 15:05:59 -0400 Subject: [PATCH 5/5] Add links to all CUDA related examples --- doc/modules/ROOT/pages/cuda.adoc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/modules/ROOT/pages/cuda.adoc b/doc/modules/ROOT/pages/cuda.adoc index 2d7a0a3..2da5f89 100644 --- a/doc/modules/ROOT/pages/cuda.adoc +++ b/doc/modules/ROOT/pages/cuda.adoc @@ -290,3 +290,8 @@ Be aware that other threads in the kernel may continue executing with incorrect } ---- +== Examples + +* xref:examples.adoc#examples_cuda[CUDA Device Support] — demonstrates that all safe_numbers types and free functions work on a CUDA device. +* xref:examples.adoc#examples_cuda_error_handling[CUDA Error Handling] — shows how to use `device_error_context` to catch device-side overflow on the host and recover gracefully. +* xref:examples.adoc#examples_cuda_error_handling_without_error_context[CUDA Error Handling Without Error Context] — demonstrates what happens when an overflow occurs on the device *without* `device_error_context`: the CUDA context is irrecoverably corrupted and no further kernels can be launched.