new implementation of atomics

Jeff Hammond · Jeff Hammond · commit 95a4b57856ab · 2020-06-27T09:12:01.000-07:00
New version uses functions not macros.

The use of template functions allows for enforcement of type-safety,
which is implemented using static_assert.

The old implementation is preserved for posterity.

A header guard was added.

I found the old macro names confusing, so I used new names, but I map
the old names in the source onto them so the application source does not
change.

Signed-off-by: Jeff Hammond &lt;jeff.r.hammond@intel.com&gt;
diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh
@@ -1,3 +1,8 @@
+#ifndef AtomicMacro_HH_
+#define AtomicMacro_HH_
+
+#define USE_MACRO_FUNCTIONS 1
+
 //Determine which atomics to use based on platform being compiled for
 //
 //If compiling with CUDA
@@ -8,6 +13,153 @@
     #define USE_OPENMP_ATOMICS
 #endif
 
+// --------------------------------------------------
+// Original Names            -> Inline function names
+// --------------------------------------------------
+// ATOMIC_WRITE( x, v )      -> ATOMIC_WRITE
+// ATOMIC_UPDATE( x )        -> ATOMIC_INCREMENT
+// ATOMIC_ADD( x, v )        -> ATOMIC_ADD
+// ATOMIC_CAPTURE( x, v, p ) -> ATOMIC_FETCH_ADD
+// --------------------------------------------------
+
+#if defined (USE_MACRO_FUNCTIONS)
+
+#define ATOMIC_CAPTURE( x, v, p )  ATOMIC_FETCH_ADD((x),(v),(p))
+#define ATOMIC_UPDATE( x )         ATOMIC_INCREMENT((x))
+
+#if defined(HAVE_CUDA) && defined(__CUDA_ARCH__)
+
+template <typename T>
+inline void ATOMIC_WRITE(T & x, T v) {
+    x = v;
+}
+
+template <typename T>
+inline void ATOMIC_INCREMENT(T& x) {
+    atomicAdd( &x, 1 );
+}
+
+template <typename T>
+inline void ATOMIC_ADD(T& x, T v) {
+    atomicAdd( &x, v );
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_ADD(T1& x, T2 v) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    atomicAdd( &x, v );
+}
+
+template <typename T>
+inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
+    p = atomicAdd( &x, v );
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    p = atomicAdd( &x, v );
+}
+
+template <typename T1, typename T2, typename T3>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
+    p = atomicAdd( &x, v );
+}
+
+#elif defined(USE_OPENMP_ATOMICS)
+
+template <typename T>
+inline void ATOMIC_WRITE(T & x, T v) {
+    _Pragma("omp atomic write")
+    x = v;
+}
+
+template <typename T>
+inline void ATOMIC_INCREMENT(T& x) {
+    _Pragma("omp atomic update")
+    x++;
+}
+
+template <typename T>
+inline void ATOMIC_ADD(T& x, T v) {
+    _Pragma("omp atomic")
+    x += v;
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_ADD(T1& x, T2 v) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    _Pragma("omp atomic")
+    x += v;
+}
+
+template <typename T>
+inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
+    _Pragma("omp atomic capture")
+    {p = x; x = x + v;}
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    _Pragma("omp atomic capture")
+    {p = x; x = x + v;}
+}
+
+template <typename T1, typename T2, typename T3>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
+    _Pragma("omp atomic capture")
+    {p = x; x = x + v;}
+}
+
+#else // SEQUENTIAL
+
+template <typename T>
+inline void ATOMIC_WRITE(T & x, T v) {
+    x = v;
+}
+
+template <typename T>
+inline void ATOMIC_INCREMENT(T& x) {
+    x++;
+}
+
+template <typename T>
+inline void ATOMIC_ADD(T& x, T v) {
+    x += v;
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_ADD(T1& x, T2 v) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    x += v;
+}
+
+template <typename T>
+inline void ATOMIC_FETCH_ADD(T& x, T v, T& p) {
+    {p = x; x = x + v;}
+}
+
+template <typename T1, typename T2>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T1& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    {p = x; x = x + v;}
+}
+
+template <typename T1, typename T2, typename T3>
+inline void ATOMIC_FETCH_ADD(T1& x, T2 v, T3& p) {
+    static_assert( sizeof(T1) >= sizeof(T2), "Unsafe: small += large");
+    static_assert( sizeof(T3) >= sizeof(T1), "Unsafe: small := large");
+    {p = x; x = x + v;}
+}
+
+#endif // BACKENDS
+
+#else // ! USE_MACRO_FUNCTIONS
 
 #if defined (HAVE_CUDA)
 
@@ -97,4 +249,8 @@
     #define ATOMIC_CAPTURE( x, v, p ) \
         {p = x; x = x + v;}
 
-#endif
+#endif // BACKENDS
+
+#endif // USE_MACRO_FUNCTIONS
+
+#endif // AtomicMacro_HH_