Skip to content

Commit 3a495ce

Browse files
authored
Merge pull request #52 from NVIDIA-developer-blog/printf-alternative
Printf-alternative blog files
2 parents 708ce91 + 6cc699a commit 3a495ce

4 files changed

Lines changed: 515 additions & 0 deletions

File tree

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
/*
2+
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
* SPDX-License-Identifier: BSD-3-Clause
4+
*
5+
* Redistribution and use in source and binary forms, with or without
6+
* modification, are permitted provided that the following conditions are met:
7+
*
8+
* 1. Redistributions of source code must retain the above copyright notice, this
9+
* list of conditions and the following disclaimer.
10+
*
11+
* 2. Redistributions in binary form must reproduce the above copyright notice,
12+
* this list of conditions and the following disclaimer in the documentation
13+
* and/or other materials provided with the distribution.
14+
*
15+
* 3. Neither the name of the copyright holder nor the names of its
16+
* contributors may be used to endorse or promote products derived from
17+
* this software without specific prior written permission.
18+
*
19+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29+
*/
30+
31+
#pragma once
32+
#include <iostream>
33+
#include <stdio.h>
34+
#include <assert.h>
35+
#include "cuda.h"
36+
#include <memory>
37+
#include <cuda/atomic>
38+
39+
namespace CASError {
40+
41+
enum AtomicStatus {
42+
ATOMIC_NO_ERROR = 0,
43+
ATOMIC_ERROR_REPORTED = 1
44+
};
45+
46+
inline
47+
cudaError_t checkCuda(cudaError_t result)
48+
{
49+
#if defined(DEBUG) || defined(_DEBUG)
50+
if (result != cudaSuccess) {
51+
fprintf(stderr, "CUDA Runtime Error: %s\n",
52+
cudaGetErrorString(result));
53+
assert(result == cudaSuccess);
54+
}
55+
#endif
56+
return result;
57+
}
58+
59+
// Allocates system-pinned memory of type ErrorType
60+
template < typename ErrorType>
61+
struct PinnedMemory
62+
{
63+
using data_type = ErrorType;
64+
65+
PinnedMemory() {
66+
checkCuda(cudaMallocHost(&hdata, sizeof(data_type)));
67+
}
68+
69+
~PinnedMemory(){
70+
cudaFreeHost(hdata);
71+
}
72+
data_type *hdata;
73+
};
74+
75+
// DeviceStatus allocates system-pinned memory of StatusType and also allocates corresponding device memory of StatusType
76+
template <typename StatusType>
77+
struct DeviceStatus
78+
{
79+
80+
using status_type = StatusType;
81+
82+
DeviceStatus () {
83+
checkCuda(cudaMallocHost(&host_status, sizeof(status_type), cudaHostAllocMapped));
84+
checkCuda(cudaMalloc(&device_status, sizeof(status_type)));
85+
}
86+
87+
~DeviceStatus() {
88+
checkCuda(cudaFreeHost(host_status));
89+
checkCuda(cudaFree(device_status));
90+
}
91+
92+
status_type __host__ status() {
93+
return static_cast<volatile cuda::std::atomic<StatusType> *>(host_status)->load(cuda::memory_order_acquire);
94+
}
95+
96+
cuda::std::atomic<StatusType> *host_status;
97+
StatusType *device_status;
98+
};
99+
100+
// This struct represents the data accessible and modifiable on the device and contains pointers to relevant information
101+
template <typename ErrorType>
102+
struct MappedErrorTypeDeviceData {
103+
using status_type = AtomicStatus;
104+
// these two members are used so that they can be accessed from the device directly
105+
cuda::std::atomic<status_type> * host_status;
106+
status_type * device_status;
107+
108+
// pointer to pinned data to be accessed from the device directly
109+
ErrorType * host_data;
110+
111+
void inline __device__ synchronizeStatus() {
112+
host_status->store(ATOMIC_ERROR_REPORTED, cuda::memory_order_release);
113+
}
114+
};
115+
116+
117+
/* The MappedErrorType creates system-pinned memory of ErrorType, as well as corresponding DeviceStatus.
118+
* Using the available methods guarantees the necessary memory fences to avoid asynchronous race conditions.
119+
*/
120+
121+
template <typename ErrorType>
122+
struct MappedErrorType
123+
{
124+
// Use same status type as MappedErrorTypeDeviceData
125+
using status_type = typename MappedErrorTypeDeviceData<ErrorType>::status_type;
126+
127+
// System-pinned error payload to be written by supplied function
128+
std::shared_ptr<PinnedMemory<ErrorType>> error_data;
129+
130+
// Error reporting indicator to coordinate asynchronous soft error reporting
131+
std::shared_ptr<DeviceStatus<status_type>> status;
132+
133+
// The necessary device-side pointers needed for proper reporting
134+
MappedErrorTypeDeviceData<ErrorType> deviceData;
135+
136+
MappedErrorType (cudaStream_t stream = 0)
137+
: error_data(new PinnedMemory<ErrorType>()),
138+
status(new DeviceStatus<status_type>()),
139+
deviceData (MappedErrorTypeDeviceData<ErrorType>{.host_status =status->host_status,
140+
.device_status =status->device_status,
141+
.host_data = error_data->hdata})
142+
{
143+
deviceData.host_status->store(ATOMIC_NO_ERROR, cuda::memory_order_release);
144+
checkCuda(cudaMemsetAsync(deviceData.device_status, ATOMIC_NO_ERROR, sizeof(status_type), stream));
145+
}
146+
147+
/// Checks on the host if an error has been reported
148+
bool __host__ checkErrorReported() {
149+
return (status->status() == ATOMIC_ERROR_REPORTED);
150+
}
151+
152+
/** Returns host-pinned error payload
153+
* Note: If error data includes device pointers (e.g. const char*) you will need to properly post-processes these pointers
154+
*/
155+
volatile ErrorType & __host__ get() {
156+
return *static_cast<volatile ErrorType *>(deviceData.host_data);
157+
}
158+
159+
/// Clears both the device-side status and host-side
160+
void __host__ clear(cudaStream_t stream = 0) {
161+
checkCuda(cudaMemsetAsync(deviceData.device_status, ATOMIC_NO_ERROR, sizeof(status_type), stream));
162+
(deviceData.host_status)->store(ATOMIC_NO_ERROR, cuda::memory_order_release);
163+
}
164+
165+
void inline __device__ synchronizeStatus() {
166+
deviceData.synchronizeStatus();
167+
}
168+
169+
};
170+
171+
/// Retrieve variable like __FILE__ when set in device memory to host
172+
std::string getDeviceString(const char * device_string, cudaStream_t stream = 0) {
173+
CUdeviceptr pbase;
174+
std::size_t psize;
175+
cuMemGetAddressRange(&pbase, &psize, reinterpret_cast<CUdeviceptr>(device_string));
176+
std::string str;
177+
str.resize(psize);
178+
cudaMemcpyAsync(str.data(), device_string, psize, cudaMemcpyDeviceToHost, stream);
179+
return str;
180+
}
181+
182+
template <typename ErrorType, typename FunctionType>
183+
inline __device__ void report_first_error(MappedErrorType<ErrorType> & error_dat, FunctionType func){
184+
if (atomicCAS(reinterpret_cast<int*>(error_dat.deviceData.device_status), static_cast<int>(ATOMIC_NO_ERROR), static_cast<int>(ATOMIC_ERROR_REPORTED)) == static_cast<int>(ATOMIC_NO_ERROR) ) {
185+
func(*error_dat.deviceData.host_data);
186+
__threadfence_system();
187+
error_dat.synchronizeStatus();
188+
}
189+
}
190+
191+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
cmake_minimum_required(VERSION 3.16)
2+
project(error_example CXX CUDA)
3+
4+
set(CMAKE_CUDA_ARCHITECTURES "60;70;80")
5+
6+
find_package(CUDAToolkit REQUIRED)
7+
8+
add_executable(error_example main.cu )
9+
target_link_libraries(error_example CUDA::cuda_driver)
10+
target_compile_options(error_example PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:
11+
--generate-line-info
12+
#-G
13+
#-g
14+
--extended-lambda
15+
-Xptxas=-v
16+
>)
17+

posts/printf-alternative/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Printf-alternative
2+
3+
Often times users may be tempted to use `printf` to signal soft warnings; however the compiler may reserve extra registers for `printf` even if the warning rarely occurs. This in turn can affect performance. The `CASerror.h` header-only library implements a performant soft-warning printf-alternative using cuda atomics and system pinned memory. An example of how to use this header-only library in action is provided in `main.cu`.

0 commit comments

Comments
 (0)