Skip to content

Commit 95a4b57

Browse files
author
Jeff Hammond
committed
new implementation of atomics
New version uses functions not macros. The use of template functions allows for enforcement of type-safety, which is implemented using static_assert. The old implementation is preserved for posterity. A header guard was added. I found the old macro names confusing, so I used new names, but I map the old names in the source onto them so the application source does not change. Signed-off-by: Jeff Hammond <jeff.r.hammond@intel.com>
1 parent 50a5f3f commit 95a4b57

1 file changed

Lines changed: 157 additions & 1 deletion

File tree

src/AtomicMacro.hh

Lines changed: 157 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
#ifndef AtomicMacro_HH_
2+
#define AtomicMacro_HH_
3+
4+
#define USE_MACRO_FUNCTIONS 1
5+
16
//Determine which atomics to use based on platform being compiled for
27
//
38
//If compiling with CUDA
@@ -8,6 +13,153 @@
813
#define USE_OPENMP_ATOMICS
914
#endif
1015

16+
// --------------------------------------------------
17+
// Original Names -> Inline function names
18+
// --------------------------------------------------
19+
// ATOMIC_WRITE( x, v ) -> ATOMIC_WRITE
20+
// ATOMIC_UPDATE( x ) -> ATOMIC_INCREMENT
21+
// ATOMIC_ADD( x, v ) -> ATOMIC_ADD
22+
// ATOMIC_CAPTURE( x, v, p ) -> ATOMIC_FETCH_ADD
23+
// --------------------------------------------------
24+
25+
#if defined (USE_MACRO_FUNCTIONS)
26+
27+
#define ATOMIC_CAPTURE( x, v, p ) ATOMIC_FETCH_ADD((x),(v),(p))
28+
#define ATOMIC_UPDATE( x ) ATOMIC_INCREMENT((x))
29+
30+
#if defined(HAVE_CUDA) && defined(__CUDA_ARCH__)
31+
32+
template <typename T>
33+
inline void ATOMIC_WRITE(T & x, T v) {
34+
x = v;
35+
}
36+
37+
template <typename T>
38+
inline void ATOMIC_INCREMENT(T& x) {
39+
atomicAdd( &x, 1 );
40+
}
41+
42+
template <typename T>
43+
inline void ATOMIC_ADD(T& x, T v) {
44+
atomicAdd( &x, v );
45+
}
46+
47+
template <typename T1, typename T2>
48+
inline void ATOMIC_ADD(T1& x, T2 v) {
49+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
50+
atomicAdd( &x, v );
51+
}
52+
53+
template <typename T>
54+
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
55+
p = atomicAdd( &x, v );
56+
}
57+
58+
template <typename T1, typename T2>
59+
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
60+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
61+
p = atomicAdd( &x, v );
62+
}
63+
64+
template <typename T1, typename T2, typename T3>
65+
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
66+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
67+
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
68+
p = atomicAdd( &x, v );
69+
}
70+
71+
#elif defined(USE_OPENMP_ATOMICS)
72+
73+
template <typename T>
74+
inline void ATOMIC_WRITE(T & x, T v) {
75+
_Pragma("omp atomic write")
76+
x = v;
77+
}
78+
79+
template <typename T>
80+
inline void ATOMIC_INCREMENT(T& x) {
81+
_Pragma("omp atomic update")
82+
x++;
83+
}
84+
85+
template <typename T>
86+
inline void ATOMIC_ADD(T& x, T v) {
87+
_Pragma("omp atomic")
88+
x += v;
89+
}
90+
91+
template <typename T1, typename T2>
92+
inline void ATOMIC_ADD(T1& x, T2 v) {
93+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
94+
_Pragma("omp atomic")
95+
x += v;
96+
}
97+
98+
template <typename T>
99+
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
100+
_Pragma("omp atomic capture")
101+
{p = x; x = x + v;}
102+
}
103+
104+
template <typename T1, typename T2>
105+
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
106+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
107+
_Pragma("omp atomic capture")
108+
{p = x; x = x + v;}
109+
}
110+
111+
template <typename T1, typename T2, typename T3>
112+
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
113+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
114+
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
115+
_Pragma("omp atomic capture")
116+
{p = x; x = x + v;}
117+
}
118+
119+
#else // SEQUENTIAL
120+
121+
template <typename T>
122+
inline void ATOMIC_WRITE(T & x, T v) {
123+
x = v;
124+
}
125+
126+
template <typename T>
127+
inline void ATOMIC_INCREMENT(T& x) {
128+
x++;
129+
}
130+
131+
template <typename T>
132+
inline void ATOMIC_ADD(T& x, T v) {
133+
x += v;
134+
}
135+
136+
template <typename T1, typename T2>
137+
inline void ATOMIC_ADD(T1& x, T2 v) {
138+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
139+
x += v;
140+
}
141+
142+
template <typename T>
143+
inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
144+
{p = x; x = x + v;}
145+
}
146+
147+
template <typename T1, typename T2>
148+
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
149+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
150+
{p = x; x = x + v;}
151+
}
152+
153+
template <typename T1, typename T2, typename T3>
154+
inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
155+
static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
156+
static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
157+
{p = x; x = x + v;}
158+
}
159+
160+
#endif // BACKENDS
161+
162+
#else // ! USE_MACRO_FUNCTIONS
11163

12164
#if defined (HAVE_CUDA)
13165

@@ -97,4 +249,8 @@
97249
#define ATOMIC_CAPTURE( x, v, p ) \
98250
{p = x; x = x + v;}
99251

100-
#endif
252+
#endif // BACKENDS
253+
254+
#endif // USE_MACRO_FUNCTIONS
255+
256+
#endif // AtomicMacro_HH_

0 commit comments

Comments
 (0)