From 4935d1da4f0675b547fc5b72fa5d9cb280444881 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Tue, 7 Apr 2026 14:48:25 -0400
Subject: [PATCH 1/5] Add example of what will happen if user uses CUDA API

---
 ...da_error_handling_without_error_context.cu | 124 ++++++++++++++++++
 test/cuda_jamfile                             |   1 +
 2 files changed, 125 insertions(+)
 create mode 100644 examples/cuda_error_handling_without_error_context.cu
diff --git a/examples/cuda_error_handling_without_error_context.cu b/examples/cuda_error_handling_without_error_context.cu
new file mode 100644
index 0000000..aeebd43
--- /dev/null
+++ b/examples/cuda_error_handling_without_error_context.cu
@@ -0,0 +1,124 @@
+// Copyright 2026 Matt Borland
+// Distributed under the Boost Software License, Version 1.0.
+// https://www.boost.org/LICENSE_1_0.txt
+
+// This example demonstrates how to catch arithmetic errors that occur
+// on a CUDA device using device_error_context. When a safe_numbers
+// operation overflows on the GPU, the error is captured in managed
+// memory and rethrown with BOOST_THROW_EXCEPTION on the host when
+// you call ctx.synchronize().
+//
+// The device_error_context manages a dynamically allocated managed
+// memory buffer. When an error is detected, synchronize() clears the
+// error state and throws. After catching the exception, the same
+// context can be reused immediately for new kernel launches.
+
+#include <iostream>
+#include <limits>
+#include <boost/safe_numbers/unsigned_integers.hpp>
+#include <boost/safe_numbers/cuda_error_reporting.hpp>
+
+#include <cuda_runtime.h>
+
+using test_type = boost::safe_numbers::u32;
+using basis_type = test_type::basis_type;
+
+// This kernel deliberately overflows: it adds 1 to the maximum u32 value
+__global__ void overflow_kernel(test_type* out)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i == 0)
+    {
+        const test_type max_val {(std::numeric_limits<basis_type>::max)()};
+        out[0] = max_val + test_type{1};  // Overflow!
+    }
+}
+
+// This kernel performs valid arithmetic
+__global__ void safe_kernel(const test_type* in, test_type* out, int n)
+{
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+
+    if (i < n)
+    {
+        out[i] = in[i] + test_type{1};
+    }
+}
+
+int main()
+{
+    // ---------------------------------------------------------------
+    // Step 1: Launch a kernel that overflows
+    //
+    // Instead of using the booost::safe_numbers::cuda_error_context
+    // we will use standard CUDA machinery in case the user forgets
+    // or does not want to use it.
+    // ---------------------------------------------------------------
+
+    test_type* result = nullptr;
+    cudaMallocManaged(&result, sizeof(test_type));
+    cudaDeviceSynchronize();
+
+    std::cout << "=== Launching kernel that overflows ===" << std::endl;
+
+    overflow_kernel<<<1, 1>>>(result);
+    auto status = cudaDeviceSynchronize();
+
+    // The default should have trapped the kernel, leading to failure conditions
+    // Attempt to reset the device (which we know fails)
+    if (status != cudaSuccess)
+    {
+        const auto error = cudaGetLastError();
+        std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl;
+        cudaDeviceReset();
+        std::cerr << "Kernel has been reset via CUDA API" << std::endl;
+    }
+
+    // ---------------------------------------------------------------
+    // Step 2: Try to launch another kernel from the same process
+    //
+    // Since we have trapped the CUDA device in the background,
+    // it is unable to be launched again from the same process.
+    // This error cannot be cleared even though we called cudaDeviceReset()
+    // ---------------------------------------------------------------
+
+    std::cout << "\n=== Launching kernel with valid arithmetic ===" << std::endl;
+
+    test_type* data = nullptr;
+    test_type* out = nullptr;
+
+    cudaMallocManaged(&data, 4 * sizeof(test_type));
+    cudaMallocManaged(&out, 4 * sizeof(test_type));
+    status = cudaDeviceSynchronize();
+
+    if (status != cudaSuccess)
+    {
+        const auto error = cudaGetLastError();
+        std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl;
+    }
+
+    data[0] = test_type{10};
+    data[1] = test_type{20};
+    data[2] = test_type{30};
+    data[3] = test_type{40};
+
+    safe_kernel<<<1, 4>>>(data, out, 4);
+    status = cudaDeviceSynchronize();
+
+    if (status != cudaSuccess)
+    {
+        const auto error = cudaGetLastError();
+        std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl;
+    }
+
+    // ---------------------------------------------------------------
+    // Cleanup
+    // ---------------------------------------------------------------
+
+    cudaFree(result);
+    cudaFree(data);
+    cudaFree(out);
+
+    return 0;
+}
diff --git a/test/cuda_jamfile b/test/cuda_jamfile
index 43241bc..24b7ddd 100644
--- a/test/cuda_jamfile
+++ b/test/cuda_jamfile
@@ -312,3 +312,4 @@ run test_cuda_u128_midpoint.cu ;
 # Examples
 run ../examples/cuda.cu ;
 run ../examples/cuda_error_handling.cu ;
+run ../examples/cuda_error_handling_without_error_context.cu ;

From b8cb3720b925fe0526020675cdfcfa5f31d7f807 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Tue, 7 Apr 2026 14:54:23 -0400
Subject: [PATCH 2/5] Fix copy paste error

---
 examples/cuda_error_handling_without_error_context.cu | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/cuda_error_handling_without_error_context.cu b/examples/cuda_error_handling_without_error_context.cu
index aeebd43..eab3869 100644
--- a/examples/cuda_error_handling_without_error_context.cu
+++ b/examples/cuda_error_handling_without_error_context.cu
@@ -70,7 +70,7 @@ int main()
     if (status != cudaSuccess)
     {
         const auto error = cudaGetLastError();
-        std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl;
+        std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl;
         cudaDeviceReset();
         std::cerr << "Kernel has been reset via CUDA API" << std::endl;
     }
@@ -95,7 +95,7 @@ int main()
     if (status != cudaSuccess)
     {
         const auto error = cudaGetLastError();
-        std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl;
+        std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl;
     }
 
     data[0] = test_type{10};
@@ -109,7 +109,7 @@ int main()
     if (status != cudaSuccess)
     {
         const auto error = cudaGetLastError();
-        std::cerr << "Kernel failed with error: " << cudaGetErrorString(err) << std::endl;
+        std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl;
     }
 
     // ---------------------------------------------------------------

From 7642d801348341dd718a171d1fbb49c7179e982f Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Tue, 7 Apr 2026 15:01:00 -0400
Subject: [PATCH 3/5] Replace segfault with comment

---
 ...da_error_handling_without_error_context.cu | 39 ++++++++++++-------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/examples/cuda_error_handling_without_error_context.cu b/examples/cuda_error_handling_without_error_context.cu
index eab3869..eeb704d 100644
--- a/examples/cuda_error_handling_without_error_context.cu
+++ b/examples/cuda_error_handling_without_error_context.cu
@@ -97,28 +97,41 @@ int main()
         const auto error = cudaGetLastError();
         std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl;
     }
+    else
+    {
+        // If we had not checked status, this following code would have terminated with SegFault at data[0]
 
-    data[0] = test_type{10};
-    data[1] = test_type{20};
-    data[2] = test_type{30};
-    data[3] = test_type{40};
+        data[0] = test_type{10};
+        data[1] = test_type{20};
+        data[2] = test_type{30};
+        data[3] = test_type{40};
 
-    safe_kernel<<<1, 4>>>(data, out, 4);
-    status = cudaDeviceSynchronize();
+        safe_kernel<<<1, 4>>>(data, out, 4);
+        status = cudaDeviceSynchronize();
 
-    if (status != cudaSuccess)
-    {
-        const auto error = cudaGetLastError();
-        std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl;
+        if (status != cudaSuccess)
+        {
+            const auto error = cudaGetLastError();
+            std::cerr << "Kernel failed with error: " << cudaGetErrorString(error) << std::endl;
+        }
     }
 
     // ---------------------------------------------------------------
     // Cleanup
     // ---------------------------------------------------------------
 
-    cudaFree(result);
-    cudaFree(data);
-    cudaFree(out);
+    if (result != nullptr)
+    {
+        cudaFree(result);
+    }
+    if (data != nullptr)
+    {
+        cudaFree(data);
+    }
+    if (out != nullptr)
+    {
+        cudaFree(out);
+    }
 
     return 0;
 }

From 8e563c97e5fd93f717ec0ab00b22308f922267df Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Tue, 7 Apr 2026 15:05:47 -0400
Subject: [PATCH 4/5] Add this example to examples.adoc

---
 doc/modules/ROOT/pages/examples.adoc | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/doc/modules/ROOT/pages/examples.adoc b/doc/modules/ROOT/pages/examples.adoc
index fb31fb3..8122f13 100644
--- a/doc/modules/ROOT/pages/examples.adoc
+++ b/doc/modules/ROOT/pages/examples.adoc
@@ -608,3 +608,29 @@ result[2] = 31
 result[3] = 41
 ----
 ====
+
+[#examples_cuda_error_handling_without_error_context]
+== CUDA Error Handling Without Error Context
+
+This example demonstrates what happens when a safe_numbers overflow occurs on a CUDA device *without* using `device_error_context`.
+The overflow triggers a device-side trap that corrupts the CUDA context, making `cudaDeviceSynchronize()` report an unspecified launch failure.
+Even after calling `cudaDeviceReset()`, the process cannot launch any further kernels — all subsequent CUDA operations fail.
+This motivates the use of `device_error_context` shown in the previous example.
+
+.This https://github.com/boostorg/safe_numbers/blob/develop/examples/cuda_error_handling_without_error_context.cu[example] demonstrates the consequences of not using `device_error_context` for GPU error handling.
+====
+[source, c++]
+----
+include::example$cuda_error_handling_without_error_context.cu[]
+----
+
+Output:
+----
+=== Launching kernel that overflows ===
+Kernel failed with error: unspecified launch failure
+Kernel has been reset via CUDA API
+
+=== Launching kernel with valid arithmetic ===
+Kernel failed with error: CUDA-capable device(s) is/are busy or unavailable
+----
+====

From ee44630567cc32a836655b38e174ee1145b98712 Mon Sep 17 00:00:00 2001
From: Matt Borland <matt@mattborland.com>
Date: Tue, 7 Apr 2026 15:05:59 -0400
Subject: [PATCH 5/5] Add links to all CUDA related examples

---
 doc/modules/ROOT/pages/cuda.adoc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/modules/ROOT/pages/cuda.adoc b/doc/modules/ROOT/pages/cuda.adoc
index 2d7a0a3..2da5f89 100644
--- a/doc/modules/ROOT/pages/cuda.adoc
+++ b/doc/modules/ROOT/pages/cuda.adoc
@@ -290,3 +290,8 @@ Be aware that other threads in the kernel may continue executing with incorrect
 }
 ----
 
+== Examples
+
+* xref:examples.adoc#examples_cuda[CUDA Device Support] — demonstrates that all safe_numbers types and free functions work on a CUDA device.
+* xref:examples.adoc#examples_cuda_error_handling[CUDA Error Handling] — shows how to use `device_error_context` to catch device-side overflow on the host and recover gracefully.
+* xref:examples.adoc#examples_cuda_error_handling_without_error_context[CUDA Error Handling Without Error Context] — demonstrates what happens when an overflow occurs on the device *without* `device_error_context`: the CUDA context is irrecoverably corrupted and no further kernels can be launched.