Implement AtomicInterfaceControl/Execution with free functions

Now that we have atomic free functions (e.g. `vtkm::AtomicAdd()`), we no longer need special implementations for control and each execution device. (Well, technically we do have special implementations for each, but they are handled with compiler directives in the free functions.) Convert the old atomic interface classes (`AtomicInterfaceControl` and `AtomicInterfaceExecution`) to use the new atomic free functions. This will allow us to test the new atomic functions everywhere that atomics are used in VTK-m. Once verified, we can deprecate the old atomic interface classes.
2024-09-08 13:23:51 +00:00 · 2020-08-19 09:48:12 -06:00 · 2020-08-19 09:48:12 -06:00 · d3503bfaba
commit d3503bfaba
parent ebbebd7369
19 changed files with 77 additions and 627 deletions
--- a/vtkm/cont/DeviceAdapterAlgorithm.h
+++ b/vtkm/cont/DeviceAdapterAlgorithm.h
@ -722,14 +722,6 @@ public:
 #endif
 };

-/// \brief Class providing a device-specific support for atomic operations.
-///
-/// AtomicInterfaceControl provides atomic operations for the control
-/// environment, and may be subclassed to implement the device interface when
-/// appropriate for a CPU-based device.
-template <typename DeviceTag>
-class AtomicInterfaceExecution;
-
 /// \brief Class providing a device-specific support for selecting the optimal
 /// Task type for a given worklet.
 ///
--- a/vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h
+++ b/vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h
@ -1,143 +0,0 @@
-//============================================================================
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//============================================================================
-#ifndef vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
-#define vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
-
-#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
-
-#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
-
-#include <vtkm/List.h>
-#include <vtkm/Types.h>
-
-namespace vtkm
-{
-namespace cont
-{
-namespace internal
-{
-
-template <>
-class AtomicInterfaceExecution<DeviceAdapterTagCuda>
-{
-
-public:
-  // Note: There are 64-bit atomics available, but not on all devices. Stick
-  // with 32-bit only until we require compute capability 3.5+
-  using WordTypes = vtkm::List<vtkm::UInt32>;
-  using WordTypePreferred = vtkm::UInt32;
-
-#define VTKM_ATOMIC_OPS_FOR_TYPE(type)                                                             \
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Load(const type* addr)                        \
-  {                                                                                                \
-    const volatile type* vaddr = addr; /* volatile to bypass cache*/                               \
-    const type value = *vaddr;                                                                     \
-    /* fence to ensure that dependent reads are correctly ordered */                               \
-    __threadfence();                                                                               \
-    return value;                                                                                  \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static void Store(type* addr, type value)                 \
-  {                                                                                                \
-    volatile type* vaddr = addr; /* volatile to bypass cache */                                    \
-    /* fence to ensure that previous non-atomic stores are visible to other threads */             \
-    __threadfence();                                                                               \
-    *vaddr = value;                                                                                \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Add(type* addr, type arg)                     \
-  {                                                                                                \
-    __threadfence();                                                                               \
-    auto result = atomicAdd(addr, arg);                                                            \
-    __threadfence();                                                                               \
-    return result;                                                                                 \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Not(type* addr)                               \
-  {                                                                                                \
-    return AtomicInterfaceExecution::Xor(addr, static_cast<type>(~type{ 0u }));                    \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type And(type* addr, type mask)                    \
-  {                                                                                                \
-    __threadfence();                                                                               \
-    auto result = atomicAnd(addr, mask);                                                           \
-    __threadfence();                                                                               \
-    return result;                                                                                 \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Or(type* addr, type mask)                     \
-  {                                                                                                \
-    __threadfence();                                                                               \
-    auto result = atomicOr(addr, mask);                                                            \
-    __threadfence();                                                                               \
-    return result;                                                                                 \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type Xor(type* addr, type mask)                    \
-  {                                                                                                \
-    __threadfence();                                                                               \
-    auto result = atomicXor(addr, mask);                                                           \
-    __threadfence();                                                                               \
-    return result;                                                                                 \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static type CompareAndSwap(                               \
-    type* addr, type newWord, type expected)                                                       \
-  {                                                                                                \
-    __threadfence();                                                                               \
-    auto result = atomicCAS(addr, expected, newWord);                                              \
-    __threadfence();                                                                               \
-    return result;                                                                                 \
-  }
-
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
-
-#undef VTKM_ATOMIC_OPS_FOR_TYPE
-
-  // We also support Load, Add & CAS for 64-bit unsigned ints in order to
-  // support AtomicArray usecases. We can't generally support UInt64 without
-  // bumping our minimum device req to compute capability 3.5 (though we could
-  // just use CAS for everything if this becomes a need). All of our supported
-  // devices do support add / CAS on UInt64, just not all the bit stuff.
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static vtkm::UInt64 Load(const vtkm::UInt64* addr)
-  {
-    const volatile vtkm::UInt64* vaddr = addr; /* volatile to bypass cache*/
-    const vtkm::UInt64 value = *vaddr;
-    /* fence to ensure that dependent reads are correctly ordered */
-    __threadfence();
-    return value;
-  }
-
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static void Store(vtkm::UInt64* addr, vtkm::UInt64 value)
-  {
-    volatile vtkm::UInt64* vaddr = addr; /* volatile to bypass cache */
-    /* fence to ensure that previous non-atomic stores are visible to other threads */
-    __threadfence();
-    *vaddr = value;
-  }
-
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static vtkm::UInt64 Add(vtkm::UInt64* addr,
-                                                                 vtkm::UInt64 arg)
-  {
-    __threadfence();
-    auto result = atomicAdd(addr, arg);
-    __threadfence();
-    return result;
-  }
-
-  VTKM_SUPPRESS_EXEC_WARNINGS __device__ static vtkm::UInt64 CompareAndSwap(vtkm::UInt64* addr,
-                                                                            vtkm::UInt64 newWord,
-                                                                            vtkm::UInt64 expected)
-  {
-    __threadfence();
-    auto result = atomicCAS(addr, expected, newWord);
-    __threadfence();
-    return result;
-  }
-};
-}
-}
-} // end namespace vtkm::cont::internal
-
-#endif // vtk_m_cont_cuda_internal_AtomicInterfaceExecutionCuda_h
--- a/vtkm/cont/cuda/internal/CMakeLists.txt
+++ b/vtkm/cont/cuda/internal/CMakeLists.txt
@ -10,7 +10,6 @@

 set(headers
  ArrayManagerExecutionCuda.h
-  AtomicInterfaceExecutionCuda.h
  CudaAllocator.h
  DeviceAdapterAlgorithmCuda.h
  DeviceAdapterMemoryManagerCuda.h
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
@ -27,8 +27,6 @@
 #include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>

 #include <vtkm/cont/cuda/ErrorCuda.h>
-#include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
-#include <vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h>
 #include <vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.h>
 #include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
 #include <vtkm/cont/cuda/internal/DeviceAdapterTimerImplementationCuda.h>
--- a/vtkm/cont/internal/AtomicInterfaceControl.h
+++ b/vtkm/cont/internal/AtomicInterfaceControl.h
@ -10,19 +10,7 @@
 #ifndef vtk_m_cont_internal_AtomicInterfaceControl_h
 #define vtk_m_cont_internal_AtomicInterfaceControl_h

-#include <vtkm/internal/Configure.h>
-#include <vtkm/internal/Windows.h>
-
-#include <vtkm/List.h>
-#include <vtkm/Types.h>
-
-#if defined(VTKM_MSVC) && !defined(VTKM_CUDA)
-#include <intrin.h> // For MSVC atomics
-#endif
-
-#include <atomic>
-#include <cstdint>
-#include <cstring>
+#include <vtkm/Atomic.h>

 namespace vtkm
 {
@ -31,195 +19,58 @@ namespace cont
 namespace internal
 {

-/**
- * Implementation of AtomicInterfaceDevice that uses control-side atomics.
- */
-class AtomicInterfaceControl
+struct AtomicInterfaceControl
 {
-public:
-  using WordTypes = vtkm::List<vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64>;
+  using WordTypes = vtkm::AtomicTypesSupported;
+  using WordTypePreferred = vtkm::AtomicTypePreferred;

-  // TODO These support UInt64, too. This should be benchmarked to see which
-  // is faster.
-  using WordTypePreferred = vtkm::UInt32;
-
-#ifdef VTKM_MSVC
-private:
-  template <typename To, typename From>
-  VTKM_EXEC_CONT static To BitCast(const From& src)
+  template <typename T>
+  VTKM_EXEC_CONT static T Load(const T* addr)
  {
-    // The memcpy should be removed by the compiler when possible, but this
-    // works around a host of issues with bitcasting using reinterpret_cast.
-    VTKM_STATIC_ASSERT(sizeof(From) == sizeof(To));
-    To dst;
-    std::memcpy(&dst, &src, sizeof(From));
-    return dst;
+    return vtkm::AtomicLoad(addr);
  }

  template <typename T>
-  VTKM_EXEC_CONT static T BitCast(T&& src)
+  VTKM_EXEC_CONT static void Store(T* addr, T value)
  {
-    return std::forward<T>(src);
+    vtkm::AtomicStore(addr, value);
  }

-public:
-  // Note about Load and Store implementations:
-  //
-  // "Simple reads and writes to properly-aligned 32-bit variables are atomic
-  //  operations"
-  //
-  // "Simple reads and writes to properly aligned 64-bit variables are atomic on
-  // 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
-  // atomic on 32-bit Windows."
-  //
-  // "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
-  // guaranteed to be atomic on any platform."
-  //
-  // https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
-
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt8 Load(const vtkm::UInt8* addr)
+  template <typename T>
+  VTKM_EXEC_CONT static T Add(T* addr, T arg)
  {
-    // This assumes that the memory interface is smart enough to load a 32-bit
-    // word atomically and a properly aligned 8-bit word from it.
-    // We could build address masks and do shifts to perform this manually if
-    // this assumption is incorrect.
-    auto result = *static_cast<volatile const vtkm::UInt8*>(addr);
-    std::atomic_thread_fence(std::memory_order_acquire);
-    return result;
+    return vtkm::AtomicAdd(addr, arg);
  }
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt16 Load(const vtkm::UInt16* addr)
+
+  template <typename T>
+  VTKM_EXEC_CONT static T Not(T* addr)
  {
-    // This assumes that the memory interface is smart enough to load a 32-bit
-    // word atomically and a properly aligned 16-bit word from it.
-    // We could build address masks and do shifts to perform this manually if
-    // this assumption is incorrect.
-    auto result = *static_cast<volatile const vtkm::UInt16*>(addr);
-    std::atomic_thread_fence(std::memory_order_acquire);
-    return result;
+    return vtkm::AtomicNot(addr);
  }
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt32 Load(const vtkm::UInt32* addr)
+
+  template <typename T>
+  VTKM_EXEC_CONT static T And(T* addr, T mask)
  {
-    auto result = *static_cast<volatile const vtkm::UInt32*>(addr);
-    std::atomic_thread_fence(std::memory_order_acquire);
-    return result;
+    return vtkm::AtomicAnd(addr, mask);
  }
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkm::UInt64 Load(const vtkm::UInt64* addr)
+
+  template <typename T>
+  VTKM_EXEC_CONT static T Or(T* addr, T mask)
  {
-    auto result = *static_cast<volatile const vtkm::UInt64*>(addr);
-    std::atomic_thread_fence(std::memory_order_acquire);
-    return result;
+    return vtkm::AtomicOr(addr, mask);
  }
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt8* addr, vtkm::UInt8 val)
+
+  template <typename T>
+  VTKM_EXEC_CONT static T Xor(T* addr, T mask)
  {
-    // There doesn't seem to be an atomic store instruction in the windows
-    // API, so just exchange and discard the result.
-    _InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
+    return vtkm::AtomicXor(addr, mask);
  }
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt16* addr, vtkm::UInt16 val)
+
+  template <typename T>
+  VTKM_EXEC_CONT static T CompareAndSwap(T* addr, T newWord, T expected)
  {
-    // There doesn't seem to be an atomic store instruction in the windows
-    // API, so just exchange and discard the result.
-    _InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
+    return vtkm::AtomicCompareAndSwap(addr, expected, newWord);
  }
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt32* addr, vtkm::UInt32 val)
-  {
-    std::atomic_thread_fence(std::memory_order_release);
-    *addr = val;
-  }
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(vtkm::UInt64* addr, vtkm::UInt64 val)
-  {
-    std::atomic_thread_fence(std::memory_order_release);
-    *addr = val;
-  }
-
-#define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix)                                        \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Add(vtkmType* addr, vtkmType arg)     \
-  {                                                                                                \
-    return BitCast<vtkmType>(_InterlockedExchangeAdd##suffix(                                      \
-      reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg)));                          \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Not(vtkmType* addr)                   \
-  {                                                                                                \
-    return Xor(addr, static_cast<vtkmType>(~vtkmType{ 0u }));                                      \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType And(vtkmType* addr, vtkmType mask)    \
-  {                                                                                                \
-    return BitCast<vtkmType>(                                                                      \
-      _InterlockedAnd##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Or(vtkmType* addr, vtkmType mask)     \
-  {                                                                                                \
-    return BitCast<vtkmType>(                                                                      \
-      _InterlockedOr##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask)));  \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType Xor(vtkmType* addr, vtkmType mask)    \
-  {                                                                                                \
-    return BitCast<vtkmType>(                                                                      \
-      _InterlockedXor##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(mask))); \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static vtkmType CompareAndSwap(                       \
-    vtkmType* addr, vtkmType newWord, vtkmType expected)                                           \
-  {                                                                                                \
-    return BitCast<vtkmType>(                                                                      \
-      _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr),               \
-                                          BitCast<winType>(newWord),                               \
-                                          BitCast<winType>(expected)));                            \
-  }
-
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16, SHORT, 16)
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)
-
-#undef VTKM_ATOMIC_OPS_FOR_TYPE
-
-#else // gcc/clang
-
-#define VTKM_ATOMIC_OPS_FOR_TYPE(type)                                                             \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Load(const type* addr)                    \
-  {                                                                                                \
-    return __atomic_load_n(addr, __ATOMIC_ACQUIRE);                                                \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static void Store(type* addr, type value)             \
-  {                                                                                                \
-    return __atomic_store_n(addr, value, __ATOMIC_RELEASE);                                        \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Add(type* addr, type arg)                 \
-  {                                                                                                \
-    return __atomic_fetch_add(addr, arg, __ATOMIC_SEQ_CST);                                        \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Not(type* addr)                           \
-  {                                                                                                \
-    return Xor(addr, static_cast<type>(~type{ 0u }));                                              \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type And(type* addr, type mask)                \
-  {                                                                                                \
-    return __atomic_fetch_and(addr, mask, __ATOMIC_SEQ_CST);                                       \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Or(type* addr, type mask)                 \
-  {                                                                                                \
-    return __atomic_fetch_or(addr, mask, __ATOMIC_SEQ_CST);                                        \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type Xor(type* addr, type mask)                \
-  {                                                                                                \
-    return __atomic_fetch_xor(addr, mask, __ATOMIC_SEQ_CST);                                       \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC_CONT static type CompareAndSwap(                           \
-    type* addr, type newWord, type expected)                                                       \
-  {                                                                                                \
-    __atomic_compare_exchange_n(                                                                   \
-      addr, &expected, newWord, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);                        \
-    return expected;                                                                               \
-  }
-
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8)
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16)
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64)
-
-#undef VTKM_ATOMIC_OPS_FOR_TYPE
-
-#endif
 };
 }
 }
--- a/vtkm/cont/internal/AtomicInterfaceExecution.h
+++ b/vtkm/cont/internal/AtomicInterfaceExecution.h
@ -10,7 +10,7 @@
 #ifndef vtk_m_cont_internal_AtomicInterfaceExecution_h
 #define vtk_m_cont_internal_AtomicInterfaceExecution_h

-#include <vtkm/Types.h>
+#include <vtkm/Atomic.h>

 namespace vtkm
 {
@ -19,104 +19,60 @@ namespace cont
 namespace internal
 {

-/// Class template that provides a collection of static methods that perform
-/// atomic operations on raw addresses. It is the responsibility of the caller
-/// to ensure that the addresses are properly aligned.
-///
-/// The class defines a WordTypePreferred member that is the fastest available
-/// for bitwise operations of the given device. At minimum, the interface must
-/// support operations on WordTypePreferred and vtkm::WordTypeDefault, which may
-/// be the same. A full list of supported word types is advertised in the type
-/// list @a WordTypes.
-///
-/// In addition, each atomic interface must support Add and CompareAndSwap on
-/// UInt32 and UInt64, as these are required for the AtomicArray implementation.
-///
-/// To implement this on devices that share the control environment, subclass
-/// vtkm::cont::internal::AtomicInterfaceControl, which may also be used
-/// directly from control-side code.
 template <typename DeviceTag>
-class AtomicInterfaceExecution
-#ifdef VTKM_DOXYGEN_ONLY
+struct AtomicInterfaceExecution
 {
-  /// The preferred word type for the target device for bitwise atomic
-  /// operations.
-  using WordTypePreferred = FastestWordTypeForDevice;
+  using WordTypes = vtkm::AtomicTypesSupported;
+  using WordTypePreferred = vtkm::AtomicTypePreferred;

-  using WordTypes = vtkm::List<vtkm::WordTypeDefault, WordTypePreferred>;
+  template <typename T>
+  VTKM_EXEC_CONT static T Load(const T* addr)
+  {
+    return vtkm::AtomicLoad(addr);
+  }

-  /// Atomically load a value from memory while enforcing, at minimum, "acquire"
-  /// memory ordering.
-  VTKM_EXEC static vtkm::WordTypeDefault Load(vtkm::WordTypeDefault* addr);
-  VTKM_EXEC static WordTypePreferred Load(WordTypePreferred* addr);
+  template <typename T>
+  VTKM_EXEC_CONT static void Store(T* addr, T value)
+  {
+    vtkm::AtomicStore(addr, value);
+  }

-  /// Atomically write a value to memory while enforcing, at minimum, "release"
-  /// memory ordering.
-  VTKM_EXEC static void Store(vtkm::WordTypeDefault* addr, vtkm::WordTypeDefault value);
-  VTKM_EXEC static void Store(WordTypePreferred* addr, WordTypePreferred value);
+  template <typename T>
+  VTKM_EXEC_CONT static T Add(T* addr, T arg)
+  {
+    return vtkm::AtomicAdd(addr, arg);
+  }

-  /// Perform an atomic integer add operation on the word at @a addr, adding
-  /// @arg. This operation performs a full memory barrier around the atomic
-  /// access.
-  ///
-  /// The value at @a addr prior to the addition is returned.
-  ///
-  /// @note Overflow behavior is not defined for this operation.
-  /// @{
-  VTKM_EXEC static vtkm::WordTypeDefault Add(vtkm::WordTypeDefault* addr,
-                                             vtkm::WordTypeDefault arg);
-  VTKM_EXEC static WordTypePreferred Add(WordTypePreferred* addr, WordTypePreferred arg);
-  /// @}
+  template <typename T>
+  VTKM_EXEC_CONT static T Not(T* addr)
+  {
+    return vtkm::AtomicNot(addr);
+  }

-  /// Perform a bitwise atomic not operation on the word at @a addr.
-  /// This operation performs a full memory barrier around the atomic access.
-  /// @{
-  VTKM_EXEC static vtkm::WordTypeDefault Not(vtkm::WordTypeDefault* addr);
-  VTKM_EXEC static WordTypePreferred Not(WordTypePreferred* addr);
-  /// @}
+  template <typename T>
+  VTKM_EXEC_CONT static T And(T* addr, T mask)
+  {
+    return vtkm::AtomicAnd(addr, mask);
+  }

-  /// Perform a bitwise atomic and operation on the word at @a addr.
-  /// This operation performs a full memory barrier around the atomic access.
-  /// @{
-  VTKM_EXEC static vtkm::WordTypeDefault And(vtkm::WordTypeDefault* addr,
-                                             vtkm::WordTypeDefault mask);
-  VTKM_EXEC static WordTypePreferred And(WordTypePreferred* addr, WordTypePreferred mask);
-  /// @}
+  template <typename T>
+  VTKM_EXEC_CONT static T Or(T* addr, T mask)
+  {
+    return vtkm::AtomicOr(addr, mask);
+  }

-  /// Perform a bitwise atomic or operation on the word at @a addr.
-  /// This operation performs a full memory barrier around the atomic access.
-  /// @{
-  VTKM_EXEC static vtkm::WordTypeDefault Or(vtkm::WordTypeDefault* addr,
-                                            vtkm::WordTypeDefault mask);
-  VTKM_EXEC static WordTypePreferred Or(WordTypePreferred* addr, WordTypePreferred mask);
-  /// @}
+  template <typename T>
+  VTKM_EXEC_CONT static T Xor(T* addr, T mask)
+  {
+    return vtkm::AtomicXor(addr, mask);
+  }

-  /// Perform a bitwise atomic xor operation on the word at @a addr.
-  /// This operation performs a full memory barrier around the atomic access.
-  /// @{
-  VTKM_EXEC static vtkm::WordTypeDefault Xor(vtkm::WordTypeDefault* addr,
-                                             vtkm::WordTypeDefault mask);
-  VTKM_EXEC static WordTypePreferred Xor(WordTypePreferred* addr, WordTypePreferred mask);
-  /// @}
-
-  /// Perform an atomic CAS operation on the word at @a addr.
-  ///
-  /// If the value at @a addr equals @a expected, @a addr will be set to
-  /// @a newWord and @a expected is returned. Otherwise, the value at @a addr
-  /// is returned and not modified.
-  ///
-  /// This operation performs a full memory barrier around the atomic access.
-  /// @{
-  VTKM_EXEC static vtkm::WordTypeDefault CompareAndSwap(vtkm::WordTypeDefault* addr,
-                                                        vtkm::WordTypeDefault newWord,
-                                                        vtkm::WordTypeDefault expected);
-  VTKM_EXEC static WordTypePreferred CompareAndSwap(WordTypePreferred* addr,
-                                                    WordTypePreferred newWord,
-                                                    WordTypePreferred expected);
-  /// @}
-}
-#endif // VTKM_DOXYGEN_ONLY
-;
+  template <typename T>
+  VTKM_EXEC_CONT static T CompareAndSwap(T* addr, T newWord, T expected)
+  {
+    return vtkm::AtomicCompareAndSwap(addr, expected, newWord);
+  }
+};
 }
 }
 } // end namespace vtkm::cont::internal
--- a/vtkm/cont/kokkos/DeviceAdapterKokkos.h
+++ b/vtkm/cont/kokkos/DeviceAdapterKokkos.h
@ -17,7 +17,6 @@
 #if !defined(VTKM_KOKKOS_CUDA) || defined(VTKM_CUDA)

 #include <vtkm/cont/kokkos/internal/ArrayManagerExecutionKokkos.h>
-#include <vtkm/cont/kokkos/internal/AtomicInterfaceExecutionKokkos.h>
 #include <vtkm/cont/kokkos/internal/DeviceAdapterAlgorithmKokkos.h>
 #include <vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.h>
 #include <vtkm/cont/kokkos/internal/DeviceAdapterRuntimeDetectorKokkos.h>
--- a/vtkm/cont/kokkos/internal/AtomicInterfaceExecutionKokkos.h
+++ b/vtkm/cont/kokkos/internal/AtomicInterfaceExecutionKokkos.h
@ -1,85 +0,0 @@
-//============================================================================
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//============================================================================
-#ifndef vtk_m_cont_kokkos_internal_AtomicInterfaceExecutionKokkos_h
-#define vtk_m_cont_kokkos_internal_AtomicInterfaceExecutionKokkos_h
-
-#include <vtkm/cont/kokkos/internal/DeviceAdapterTagKokkos.h>
-
-#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
-
-#include <vtkm/List.h>
-#include <vtkm/Types.h>
-
-VTKM_THIRDPARTY_PRE_INCLUDE
-#include <Kokkos_Core.hpp>
-VTKM_THIRDPARTY_POST_INCLUDE
-
-namespace vtkm
-{
-namespace cont
-{
-namespace internal
-{
-
-template <>
-class AtomicInterfaceExecution<DeviceAdapterTagKokkos>
-{
-
-public:
-  // Note: There are 64-bit atomics available, but not on all devices. Stick
-  // with 32-bit only until we require compute capability 3.5+
-  using WordTypes = vtkm::List<vtkm::UInt32, vtkm::UInt64>;
-  using WordTypePreferred = vtkm::UInt32;
-
-#define VTKM_ATOMIC_OPS_FOR_TYPE(type)                                                             \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC static type Load(const type* addr)                         \
-  {                                                                                                \
-    return Kokkos::Impl::atomic_load(addr);                                                        \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC static void Store(type* addr, type value)                  \
-  {                                                                                                \
-    Kokkos::Impl::atomic_store(addr, value);                                                       \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC static type Add(type* addr, type arg)                      \
-  {                                                                                                \
-    return Kokkos::atomic_fetch_add(addr, arg);                                                    \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC static type Not(type* addr)                                \
-  {                                                                                                \
-    return Kokkos::atomic_fetch_xor(addr, static_cast<type>(~type{ 0u }));                         \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC static type And(type* addr, type mask)                     \
-  {                                                                                                \
-    return Kokkos::atomic_fetch_and(addr, mask);                                                   \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC static type Or(type* addr, type mask)                      \
-  {                                                                                                \
-    return Kokkos::atomic_fetch_or(addr, mask);                                                    \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC static type Xor(type* addr, type mask)                     \
-  {                                                                                                \
-    return Kokkos::atomic_fetch_xor(addr, mask);                                                   \
-  }                                                                                                \
-  VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC static type CompareAndSwap(                                \
-    type* addr, type newWord, type expected)                                                       \
-  {                                                                                                \
-    return Kokkos::atomic_compare_exchange(addr, expected, newWord);                               \
-  }
-
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32)
-  VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64)
-
-#undef VTKM_ATOMIC_OPS_FOR_TYPE
-};
-}
-}
-} // end namespace vtkm::cont::internal
-
-#endif // vtk_m_cont_kokkos_internal_AtomicInterfaceExecutionKokkos_h
--- a/vtkm/cont/kokkos/internal/CMakeLists.txt
+++ b/vtkm/cont/kokkos/internal/CMakeLists.txt
@ -10,7 +10,6 @@

 set(headers
  ArrayManagerExecutionKokkos.h
-  AtomicInterfaceExecutionKokkos.h
  DeviceAdapterAlgorithmKokkos.h
  DeviceAdapterMemoryManagerKokkos.h
  DeviceAdapterRuntimeDetectorKokkos.h
--- a/vtkm/cont/openmp/DeviceAdapterOpenMP.h
+++ b/vtkm/cont/openmp/DeviceAdapterOpenMP.h
@ -16,7 +16,6 @@

 #ifdef VTKM_ENABLE_OPENMP
 #include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
-#include <vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h>
 #include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
 #include <vtkm/cont/openmp/internal/DeviceAdapterMemoryManagerOpenMP.h>
 #include <vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h>
--- a/vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h
+++ b/vtkm/cont/openmp/internal/AtomicInterfaceExecutionOpenMP.h
@ -1,35 +0,0 @@
-//============================================================================
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//============================================================================
-#ifndef vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
-#define vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
-
-#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
-
-#include <vtkm/cont/internal/AtomicInterfaceControl.h>
-#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
-
-#include <vtkm/Types.h>
-
-namespace vtkm
-{
-namespace cont
-{
-namespace internal
-{
-
-template <>
-class AtomicInterfaceExecution<DeviceAdapterTagOpenMP> : public AtomicInterfaceControl
-{
-};
-}
-}
-} // end namespace vtkm::cont::internal
-
-#endif // vtk_m_cont_openmp_internal_AtomicInterfaceExecutionOpenMP_h
--- a/vtkm/cont/openmp/internal/CMakeLists.txt
+++ b/vtkm/cont/openmp/internal/CMakeLists.txt
@ -14,7 +14,6 @@ set(headers
  DeviceAdapterMemoryManagerOpenMP.h
  DeviceAdapterRuntimeDetectorOpenMP.h
  DeviceAdapterTagOpenMP.h
-  AtomicInterfaceExecutionOpenMP.h
  FunctorsOpenMP.h
  ParallelQuickSortOpenMP.h
  ParallelRadixSortOpenMP.h
--- a/vtkm/cont/serial/DeviceAdapterSerial.h
+++ b/vtkm/cont/serial/DeviceAdapterSerial.h
@ -15,7 +15,6 @@
 #include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
 #include <vtkm/cont/serial/internal/DeviceAdapterRuntimeDetectorSerial.h>
 #include <vtkm/cont/serial/internal/DeviceAdapterMemoryManagerSerial.h>
-#include <vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h>
 #include <vtkm/cont/serial/internal/ArrayManagerExecutionSerial.h>
 #include <vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h>
 #include <vtkm/cont/serial/internal/VirtualObjectTransferSerial.h>
--- a/vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h
+++ b/vtkm/cont/serial/internal/AtomicInterfaceExecutionSerial.h
@ -1,35 +0,0 @@
-//============================================================================
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//============================================================================
-#ifndef vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
-#define vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
-
-#include <vtkm/cont/serial/internal/DeviceAdapterTagSerial.h>
-
-#include <vtkm/cont/internal/AtomicInterfaceControl.h>
-#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
-
-#include <vtkm/Types.h>
-
-namespace vtkm
-{
-namespace cont
-{
-namespace internal
-{
-
-template <>
-class AtomicInterfaceExecution<DeviceAdapterTagSerial> : public AtomicInterfaceControl
-{
-};
-}
-}
-} // end namespace vtkm::cont::internal
-
-#endif // vtk_m_cont_serial_internal_AtomicInterfaceExecutionSerial_h
--- a/vtkm/cont/serial/internal/CMakeLists.txt
+++ b/vtkm/cont/serial/internal/CMakeLists.txt
@ -10,7 +10,6 @@

 set(headers
  ArrayManagerExecutionSerial.h
-  AtomicInterfaceExecutionSerial.h
  DeviceAdapterAlgorithmSerial.h
  DeviceAdapterMemoryManagerSerial.h
  DeviceAdapterRuntimeDetectorSerial.h
--- a/vtkm/cont/tbb/DeviceAdapterTBB.h
+++ b/vtkm/cont/tbb/DeviceAdapterTBB.h
@ -15,7 +15,6 @@

 #ifdef VTKM_ENABLE_TBB
 #include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
-#include <vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h>
 #include <vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h>
 #include <vtkm/cont/tbb/internal/DeviceAdapterMemoryManagerTBB.h>
 #include <vtkm/cont/tbb/internal/VirtualObjectTransferTBB.h>
--- a/vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h
+++ b/vtkm/cont/tbb/internal/AtomicInterfaceExecutionTBB.h
@ -1,35 +0,0 @@
-//============================================================================
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//============================================================================
-#ifndef vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
-#define vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
-
-#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
-
-#include <vtkm/cont/internal/AtomicInterfaceControl.h>
-#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
-
-#include <vtkm/Types.h>
-
-namespace vtkm
-{
-namespace cont
-{
-namespace internal
-{
-
-template <>
-class AtomicInterfaceExecution<DeviceAdapterTagTBB> : public AtomicInterfaceControl
-{
-};
-}
-}
-} // end namespace vtkm::cont::internal
-
-#endif // vtk_m_cont_tbb_internal_AtomicInterfaceExecutionTBB_h
--- a/vtkm/cont/tbb/internal/CMakeLists.txt
+++ b/vtkm/cont/tbb/internal/CMakeLists.txt
@ -10,7 +10,6 @@

 set(headers
  ArrayManagerExecutionTBB.h
-  AtomicInterfaceExecutionTBB.h
  DeviceAdapterAlgorithmTBB.h
  DeviceAdapterMemoryManagerTBB.h
  DeviceAdapterRuntimeDetectorTBB.h
--- a/vtkm/cont/testing/UnitTestDeviceAdapterAlgorithmGeneral.cxx
+++ b/vtkm/cont/testing/UnitTestDeviceAdapterAlgorithmGeneral.cxx
@ -101,11 +101,6 @@ public:
  }
 };

-template <>
-class AtomicInterfaceExecution<DeviceAdapterTagTestAlgorithmGeneral> : public AtomicInterfaceControl
-{
-};
-
 template <typename TargetClass>
 struct VirtualObjectTransfer<TargetClass, vtkm::cont::DeviceAdapterTagTestAlgorithmGeneral> final
  : public VirtualObjectTransferShareWithControl<TargetClass>