diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index bfa2707eb..d58144a25 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -32,9 +32,10 @@
 #     - tbb
 #     - openmp
 #     - mpich2
-#   * .gitlab/ci/docker/ubuntu1804/cuda10.1/
+#   * .gitlab/ci/docker/ubuntu1804/cuda11.1/
 #     - cuda
-#     - gcc 7.4
+#     - gcc 7
+#     - gcc 8
 #     - tbb
 #     - openmp
 #     - mpich2
@@ -49,52 +50,52 @@
     GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci
 
 .centos7: &centos7
-  image: "kitware/vtkm:ci-centos7_cuda10.2-20200820"
+  image: "kitware/vtkm:ci-centos7_cuda10.2-20201016"
   extends:
     - .docker_image
 
 .centos8: &centos8
-  image: "kitware/vtkm:ci-centos8-20200820"
+  image: "kitware/vtkm:ci-centos8-20201016"
   extends:
     - .docker_image
 
 .rhel8: &rhel8
-  image: "kitware/vtkm:ci-rhel8_cuda10.2-20200820"
+  image: "kitware/vtkm:ci-rhel8_cuda10.2-20201016"
   extends:
     - .docker_image
 
 .ubuntu1604: &ubuntu1604
-  image: "kitware/vtkm:ci-ubuntu1604-20200820"
+  image: "kitware/vtkm:ci-ubuntu1604-20201016"
   extends:
     - .docker_image
 
 .ubuntu1604_cuda: &ubuntu1604_cuda
-  image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200820"
+  image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20201016"
   extends:
     - .docker_image
 
 .ubuntu1804: &ubuntu1804
-  image: "kitware/vtkm:ci-ubuntu1804-20200820"
+  image: "kitware/vtkm:ci-ubuntu1804-20201016"
   extends:
     - .docker_image
 
 .ubuntu1804_cuda: &ubuntu1804_cuda
-  image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200820"
+  image: "kitware/vtkm:ci-ubuntu1804_cuda11.1-20201016"
   extends:
     - .docker_image
 
 .ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
-  image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20200820"
+  image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20201016"
   extends:
     - .docker_image
 
 .ubuntu2004_doxygen: &ubuntu2004_doxygen
-  image: "kitware/vtkm:ci-doxygen-20200820"
+  image: "kitware/vtkm:ci-doxygen-20201016"
   extends:
     - .docker_image
 
 .ubuntu2004_kokkos: &ubuntu2004_kokkos
-  image: "kitware/vtkm:ci-ubuntu2004_kokkos-20200820"
+  image: "kitware/vtkm:ci-ubuntu2004_kokkos-20201016"
   extends:
     - .docker_image
 
diff --git a/.gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile b/.gitlab/ci/docker/ubuntu1804/cuda11.1/Dockerfile
similarity index 96%
rename from .gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile
rename to .gitlab/ci/docker/ubuntu1804/cuda11.1/Dockerfile
index c50cb358a..fbdd44a7d 100644
--- a/.gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/cuda11.1/Dockerfile
@@ -1,10 +1,10 @@
-FROM nvidia/cuda:10.1-devel-ubuntu18.04
+FROM nvidia/cuda:11.1-devel-ubuntu18.04
 LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
 
 # Base dependencies for building VTK-m projects
 RUN apt-get update && apt-get install -y --no-install-recommends \
       curl \
-      g++ \
+      g++-8 \
       clang-8 \
       git \
       git-lfs \
diff --git a/.gitlab/ci/docker/update_all.sh b/.gitlab/ci/docker/update_all.sh
index 644061af5..0acb769d9 100755
--- a/.gitlab/ci/docker/update_all.sh
+++ b/.gitlab/ci/docker/update_all.sh
@@ -30,8 +30,8 @@ cd ubuntu1804/base
 sudo docker build -t kitware/vtkm:ci-ubuntu1804-$date .
 cd ../..
 
-cd ubuntu1804/cuda10.1
-sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda10.1-$date .
+cd ubuntu1804/cuda11.1
+sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11.1-$date .
 cd ../..
 
 cd ubuntu1804/kokkos-cuda
diff --git a/.gitlab/ci/ubuntu1804.yml b/.gitlab/ci/ubuntu1804.yml
index 41c4858f8..d62b0895e 100644
--- a/.gitlab/ci/ubuntu1804.yml
+++ b/.gitlab/ci/ubuntu1804.yml
@@ -38,7 +38,7 @@ test:ubuntu1804_gcc9:
     - build:ubuntu1804_gcc9
 
 # Build on ubuntu1804 with CUDA + MPI and test on ubuntu1804
-# Uses gcc 7.4
+# Uses gcc 7
 # Uses MPICH2
 build:ubuntu1804_gcc7:
   tags:
@@ -55,6 +55,7 @@ build:ubuntu1804_gcc7:
   variables:
     CC: "gcc-7"
     CXX: "g++-7"
+    CUDAHOSTCXX: "g++-7"
     VTKM_SETTINGS: "cuda+turing+mpi+64bit_floats+no_virtual"
 
 test:ubuntu1804_gcc7:
diff --git a/README.md b/README.md
index c7b9dd494..b1c6de3aa 100644
--- a/README.md
+++ b/README.md
@@ -76,8 +76,8 @@ VTK-m Requires:
 Optional dependencies are:
 
   + CUDA Device Adapter
-      + [Cuda Toolkit 9.2+](https://developer.nvidia.com/cuda-toolkit)
-      + Note CUDA >= 10.1 is required on Windows
+      + [Cuda Toolkit 9.2, >= 10.2](https://developer.nvidia.com/cuda-toolkit)
+      + Note CUDA >= 10.2 is required on Windows
   + TBB Device Adapter
       + [TBB](https://www.threadingbuildingblocks.org/)
   + OpenMP Device Adapter
@@ -105,12 +105,12 @@ VTK-m has been tested on the following configurations:c
   + On Linux
       + GCC 4.8.5, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0
       + CMake 3.12, 3.13, 3.16, 3.17
-      + CUDA 9.2.148, 10.0.130, 10.1.105, 10.2.89
+      + CUDA 9.2, 10.2, 11.0, 11.1 
       + TBB 4.4 U2, 2017 U7
   + On Windows
       + Visual Studio 2015, 2017
       + CMake 3.12, 3.17
-      + CUDA 10.1
+      + CUDA 10.2
       + TBB 2017 U3, 2018 U2
   + On MacOS
       + AppleClang 9.1
diff --git a/benchmarking/BenchmarkAtomicArray.cxx b/benchmarking/BenchmarkAtomicArray.cxx
index e32205867..965c307b0 100644
--- a/benchmarking/BenchmarkAtomicArray.cxx
+++ b/benchmarking/BenchmarkAtomicArray.cxx
@@ -260,7 +260,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(
     ->ArgNames({ "Values", "Ops", "Stride" }),
   vtkm::cont::AtomicArrayTypeList);
 
-// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent
+// Benchmarks AtomicArray::CompareExchange such that each work index writes to adjacent
 // indices.
 struct CASSeqWorker : public vtkm::worklet::WorkletMapField
 {
@@ -273,12 +273,8 @@ struct CASSeqWorker : public vtkm::worklet::WorkletMapField
     const vtkm::Id idx = i % portal.GetNumberOfValues();
     const T val = static_cast<T>(i) + in;
     T oldVal = portal.Get(idx);
-    T assumed = static_cast<T>(0);
-    do
-    {
-      assumed = oldVal;
-      oldVal = portal.CompareAndSwap(idx, assumed + val, assumed);
-    } while (assumed != oldVal);
+    while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
+      ;
   }
 };
 
@@ -371,7 +367,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCASSeqBaseline,
                                 ->ArgNames({ "Values", "Ops" }),
                               vtkm::cont::AtomicArrayTypeList);
 
-// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to
+// Benchmarks AtomicArray::CompareExchange such that each work index writes to
 // a strided index:
 // ( floor(i / stride) + stride * (i % stride)
 struct CASStrideWorker : public vtkm::worklet::WorkletMapField
@@ -393,12 +389,8 @@ struct CASStrideWorker : public vtkm::worklet::WorkletMapField
     const vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
     const T val = static_cast<T>(i) + in;
     T oldVal = portal.Get(idx);
-    T assumed = static_cast<T>(0);
-    do
-    {
-      assumed = oldVal;
-      oldVal = portal.CompareAndSwap(idx, assumed + val, assumed);
-    } while (assumed != oldVal);
+    while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
+      ;
   }
 };
 
diff --git a/vtkm/Algorithms.h b/vtkm/Algorithms.h
index 1686b9f78..2c53a4bd9 100644
--- a/vtkm/Algorithms.h
+++ b/vtkm/Algorithms.h
@@ -90,7 +90,7 @@ VTKM_EXEC_CONT vtkm::Id BinarySearch(const PortalT& portal, const T& val)
 template <typename IterT, typename T, typename Comp>
 VTKM_EXEC_CONT IterT LowerBound(IterT first, IterT last, const T& val, Comp comp)
 {
-#ifdef VTKM_CUDA
+#if defined(VTKM_CUDA) || defined(VTKM_HIP)
   auto len = last - first;
   while (len != 0)
   {
@@ -107,9 +107,9 @@ VTKM_EXEC_CONT IterT LowerBound(IterT first, IterT last, const T& val, Comp comp
     }
   }
   return first;
-#else  // VTKM_CUDA
+#else  // VTKM_CUDA || VTKM_HIP
   return std::lower_bound(first, last, val, std::move(comp));
-#endif // VTKM_CUDA
+#endif // VTKM_CUDA || VTKM_HIP
 }
 
 template <typename IterT, typename T>
@@ -139,7 +139,7 @@ VTKM_EXEC_CONT vtkm::Id LowerBound(const PortalT& portal, const T& val)
 template <typename IterT, typename T, typename Comp>
 VTKM_EXEC_CONT IterT UpperBound(IterT first, IterT last, const T& val, Comp comp)
 {
-#ifdef VTKM_CUDA
+#if defined(VTKM_CUDA) || defined(VTKM_HIP)
   auto len = last - first;
   while (len != 0)
   {
@@ -156,9 +156,9 @@ VTKM_EXEC_CONT IterT UpperBound(IterT first, IterT last, const T& val, Comp comp
     }
   }
   return first;
-#else  // VTKM_CUDA
+#else  // VTKM_CUDA || VTKM_HIP
   return std::upper_bound(first, last, val, std::move(comp));
-#endif // VTKM_CUDA
+#endif // VTKM_CUDA || VTKM_HIP
 }
 
 template <typename IterT, typename T>
diff --git a/vtkm/Atomic.h b/vtkm/Atomic.h
index 67d3c5413..9a0fc2292 100644
--- a/vtkm/Atomic.h
+++ b/vtkm/Atomic.h
@@ -142,9 +142,9 @@ VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
 }
 
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
 {
-  const volatile T* vaddr = addr; /* volatile to bypass cache*/
+  volatile T* const vaddr = addr; /* volatile to bypass cache*/
   if (order == vtkm::MemoryOrder::SequentiallyConsistent)
   {
     __threadfence();
@@ -207,15 +207,23 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
 }
 
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
-                                                 T desired,
-                                                 T expected,
-                                                 vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
+                                                     T* expected,
+                                                     T desired,
+                                                     vtkm::MemoryOrder order)
 {
   AtomicStoreFence(order);
-  auto result = atomicCAS(addr, expected, desired);
+  auto result = atomicCAS(addr, *expected, desired);
   AtomicLoadFence(order);
-  return result;
+  if (result == *expected)
+  {
+    return true;
+  }
+  else
+  {
+    *expected = result;
+    return false;
+  }
 }
 }
 } // namespace vtkm::detail
@@ -239,9 +247,14 @@ VTKM_THIRDPARTY_PRE_INCLUDE
 #if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA)
 #undef KOKKOS_ENABLE_CUDA
 #endif
+
+#if defined(KOKKOS_ENABLE_HIP) && !defined(VTKM_HIP)
+#undef KOKKOS_ENABLE_HIP
+#endif
+
 #endif //KOKKOS_MACROS_HPP not loaded
 
-#include <Kokkos_Core.hpp>
+#include <Kokkos_Atomic.hpp>
 VTKM_THIRDPARTY_POST_INCLUDE
 
 namespace vtkm
@@ -270,7 +283,7 @@ VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
 }
 
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
 {
   switch (order)
   {
@@ -350,15 +363,23 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
 }
 
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
-                                                 T desired,
-                                                 T expected,
-                                                 vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
+                                                     T* expected,
+                                                     T desired,
+                                                     vtkm::MemoryOrder order)
 {
   AtomicStoreFence(order);
-  T result = Kokkos::atomic_compare_exchange(addr, expected, desired);
+  T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
   AtomicLoadFence(order);
-  return result;
+  if (oldValue == *expected)
+  {
+    return true;
+  }
+  else
+  {
+    *expected = oldValue;
+    return false;
+  }
 }
 }
 } // namespace vtkm::detail
@@ -407,35 +428,35 @@ VTKM_EXEC_CONT inline T BitCast(T&& src)
 //
 // https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
 
-VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(const vtkm::UInt8* addr, vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(vtkm::UInt8* const addr, vtkm::MemoryOrder order)
 {
   // This assumes that the memory interface is smart enough to load a 32-bit
   // word atomically and a properly aligned 8-bit word from it.
   // We could build address masks and do shifts to perform this manually if
   // this assumption is incorrect.
-  auto result = *static_cast<volatile const vtkm::UInt8*>(addr);
+  auto result = *static_cast<volatile vtkm::UInt8* const>(addr);
   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
   return result;
 }
-VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(const vtkm::UInt16* addr, vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(vtkm::UInt16* const addr, vtkm::MemoryOrder order)
 {
   // This assumes that the memory interface is smart enough to load a 32-bit
   // word atomically and a properly aligned 16-bit word from it.
   // We could build address masks and do shifts to perform this manually if
   // this assumption is incorrect.
-  auto result = *static_cast<volatile const vtkm::UInt16*>(addr);
+  auto result = *static_cast<volatile vtkm::UInt16* const>(addr);
   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
   return result;
 }
-VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(const vtkm::UInt32* addr, vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(vtkm::UInt32* const addr, vtkm::MemoryOrder order)
 {
-  auto result = *static_cast<volatile const vtkm::UInt32*>(addr);
+  auto result = *static_cast<volatile vtkm::UInt32* const>(addr);
   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
   return result;
 }
-VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(const vtkm::UInt64* addr, vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(vtkm::UInt64* const addr, vtkm::MemoryOrder order)
 {
-  auto result = *static_cast<volatile const vtkm::UInt64*>(addr);
+  auto result = *static_cast<volatile vtkm::UInt64* const>(addr);
   std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
   return result;
 }
@@ -487,13 +508,22 @@ VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt64* addr,
   {                                                                                     \
     return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order);          \
   }                                                                                     \
-  VTKM_EXEC_CONT inline vtkmType AtomicCompareAndSwapImpl(                              \
-    vtkmType* addr, vtkmType desired, vtkmType expected, vtkm::MemoryOrder order)       \
+  VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(                                 \
+    vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder order)      \
   {                                                                                     \
-    return BitCast<vtkmType>(                                                           \
+    vtkmType result = BitCast<vtkmType>(                                                \
       _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr),    \
                                           BitCast<winType>(desired),                    \
-                                          BitCast<winType>(expected)));                 \
+                                          BitCast<winType>(*expected)));                \
+    if (result == *expected)                                                            \
+    {                                                                                   \
+      return true;                                                                      \
+    }                                                                                   \
+    else                                                                                \
+    {                                                                                   \
+      *expected = result;                                                               \
+      return false;                                                                     \
+    }                                                                                   \
   }
 
 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
@@ -538,7 +568,7 @@ VTKM_EXEC_CONT inline int GccAtomicMemOrder(vtkm::MemoryOrder order)
 }
 
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
 {
   return __atomic_load_n(addr, GccAtomicMemOrder(order));
 }
@@ -580,14 +610,13 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
 }
 
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
-                                                 T desired,
-                                                 T expected,
-                                                 vtkm::MemoryOrder order)
+VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
+                                                     T* expected,
+                                                     T desired,
+                                                     vtkm::MemoryOrder order)
 {
-  __atomic_compare_exchange_n(
-    addr, &expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
-  return expected;
+  return __atomic_compare_exchange_n(
+    addr, expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
 }
 }
 } // namespace vtkm::detail
@@ -627,7 +656,7 @@ using AtomicTypesSupported = vtkm::List<vtkm::UInt32, vtkm::UInt64>;
 /// or after that write.
 ///
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicLoad(const T* pointer,
+VTKM_EXEC_CONT inline T AtomicLoad(T* const pointer,
                                    vtkm::MemoryOrder order = vtkm::MemoryOrder::Acquire)
 {
   return detail::AtomicLoadImpl(pointer, order);
@@ -796,24 +825,34 @@ VTKM_EXEC_CONT inline T AtomicNot(
 
 /// \brief Atomic function that replaces a value given a condition.
 ///
-/// Given a pointer, a new desired value, and an expected value, replaces the value at the
-/// pointer if it is the same as the expected value with the new desired value. If the original
-/// value in the pointer does not equal the expected value, then the memory at the pointer
-/// remains unchanged. In either case, the function returns the _old_ original value that
-/// was at the pointer.
+/// Given a pointer to a `shared` value, a pointer holding the `expected` value at that shared
+/// location, and a new `desired` value, `AtomicCompareExchange` compares the existing `shared`
+/// value to the `expected` value, and then conditionally replaces the `shared` value with
+/// the provided `desired` value. Otherwise, the `expected` value gets replaced with the
+/// `shared` value. Note that in either case, the function returns with `expected` replaced
+/// with the value _originally_ in `shared` at the start of the call.
 ///
-/// If multiple threads call `AtomicCompareAndSwap` simultaneously, the result will be consistent
-/// as if one was called before the other (although it is indeterminate which will be applied
-/// first).
+/// If the `shared` value and `expected` value are the same, then `shared` gets set to
+/// `desired`, and `AtomicCompareAndExchange` returns `true`.
+///
+/// If the `shared` value and `expected` value are different, then `expected` gets set
+/// to `shared`, and `AtomicCompareAndExchange` returns `false`. The value at `shared`
+/// is _not_ changed in this case.
+///
+/// If multiple threads call `AtomicCompareExchange` simultaneously with the same `shared`
+/// pointer, the result will be consistent as if one was called before the other (although
+/// it is indeterminate which will be applied first). Note that the `expected` pointer should
+/// _not_ be shared among threads. The `expected` pointer should be thread-local (often
+/// pointing to an object on the stack).
 ///
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicCompareAndSwap(
-  T* pointer,
+VTKM_EXEC_CONT inline T AtomicCompareExchange(
+  T* shared,
+  T* expected,
   T desired,
-  T expected,
   vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
 {
-  return detail::AtomicCompareAndSwapImpl(pointer, desired, expected, order);
+  return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
 }
 
 } // namespace vtkm
diff --git a/vtkm/Math.h b/vtkm/Math.h
index 7525d2dc9..fcdabfc94 100644
--- a/vtkm/Math.h
+++ b/vtkm/Math.h
@@ -2381,7 +2381,8 @@ static inline VTKM_EXEC_CONT vtkm::Float32 RemainderQuotient(vtkm::Float32 numer
                                                              QType& quotient)
 {
   int iQuotient;
-#ifdef VTKM_CUDA
+  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
+#if defined(VTKM_CUDA) || defined(VTKM_HIP)
   const vtkm::Float32 result =
     VTKM_CUDA_MATH_FUNCTION_32(remquo)(numerator, denominator, &iQuotient);
 #else
@@ -2411,11 +2412,20 @@ static inline VTKM_EXEC_CONT vtkm::Float64 RemainderQuotient(vtkm::Float64 numer
 ///
 static inline VTKM_EXEC_CONT vtkm::Float32 ModF(vtkm::Float32 x, vtkm::Float32& integral)
 {
+  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
+#if defined(VTKM_CUDA) || defined(VTKM_HIP)
+  return VTKM_CUDA_MATH_FUNCTION_32(modf)(x, &integral);
+#else
   return std::modf(x, &integral);
+#endif
 }
 static inline VTKM_EXEC_CONT vtkm::Float64 ModF(vtkm::Float64 x, vtkm::Float64& integral)
 {
+#if defined(VTKM_CUDA)
+  return VTKM_CUDA_MATH_FUNCTION_64(modf)(x, &integral);
+#else
   return std::modf(x, &integral);
+#endif
 }
 
 //-----------------------------------------------------------------------------
diff --git a/vtkm/Math.h.in b/vtkm/Math.h.in
index 3fb1f30de..4dd6dcbdc 100644
--- a/vtkm/Math.h.in
+++ b/vtkm/Math.h.in
@@ -1011,7 +1011,8 @@ static inline VTKM_EXEC_CONT vtkm::Float32 RemainderQuotient(vtkm::Float32 numer
                                                              QType& quotient)
 {
   int iQuotient;
-#ifdef VTKM_CUDA
+  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
+#if defined(VTKM_CUDA) || defined(VTKM_HIP)
   const vtkm::Float32 result =
     VTKM_CUDA_MATH_FUNCTION_32(remquo)(numerator, denominator, &iQuotient);
 #else
@@ -1041,11 +1042,20 @@ static inline VTKM_EXEC_CONT vtkm::Float64 RemainderQuotient(vtkm::Float64 numer
 ///
 static inline VTKM_EXEC_CONT vtkm::Float32 ModF(vtkm::Float32 x, vtkm::Float32& integral)
 {
+  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
+#if defined(VTKM_CUDA) || defined(VTKM_HIP)
+  return VTKM_CUDA_MATH_FUNCTION_32(modf)(x, &integral);
+#else
   return std::modf(x, &integral);
+#endif
 }
 static inline VTKM_EXEC_CONT vtkm::Float64 ModF(vtkm::Float64 x, vtkm::Float64& integral)
 {
+#if defined(VTKM_CUDA)
+  return VTKM_CUDA_MATH_FUNCTION_64(modf)(x, &integral);
+#else
   return std::modf(x, &integral);
+#endif
 }
 
 //-----------------------------------------------------------------------------
diff --git a/vtkm/cont/ArrayHandle.h b/vtkm/cont/ArrayHandle.h
index c55472045..b5060a168 100644
--- a/vtkm/cont/ArrayHandle.h
+++ b/vtkm/cont/ArrayHandle.h
@@ -1225,7 +1225,7 @@ public:
   /// Releases any resources being used in the execution environment (that are
   /// not being shared by the control environment).
   ///
-  VTKM_CONT void ReleaseResourcesExecution()
+  VTKM_CONT void ReleaseResourcesExecution() const
   {
     detail::ArrayHandleReleaseResourcesExecution(this->Buffers);
   }
@@ -1294,27 +1294,23 @@ public:
     return StorageType::CreateWritePortal(this->GetBuffers(), device, token);
   }
 
-  template <typename DeviceAdapterTag>
   VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForInput now requires a vtkm::cont::Token object.")
-    typename ExecutionTypes<DeviceAdapterTag>::PortalConst PrepareForInput(DeviceAdapterTag) const
+    ReadPortalType PrepareForInput(vtkm::cont::DeviceAdapterId device) const
   {
     vtkm::cont::Token token;
-    return this->PrepareForInput(DeviceAdapterTag{}, token);
+    return this->PrepareForInput(device, token);
   }
-  template <typename DeviceAdapterTag>
   VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForOutput now requires a vtkm::cont::Token object.")
-    typename ExecutionTypes<DeviceAdapterTag>::Portal
-    PrepareForOutput(vtkm::Id numberOfValues, DeviceAdapterTag)
+    WritePortalType PrepareForOutput(vtkm::Id numberOfValues, vtkm::cont::DeviceAdapterId device)
   {
     vtkm::cont::Token token;
-    return this->PrepareForOutput(numberOfValues, DeviceAdapterTag{}, token);
+    return this->PrepareForOutput(numberOfValues, device, token);
   }
-  template <typename DeviceAdapterTag>
   VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForInPlace now requires a vtkm::cont::Token object.")
-    typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForInPlace(DeviceAdapterTag)
+    WritePortalType PrepareForInPlace(vtkm::cont::DeviceAdapterId device) const
   {
     vtkm::cont::Token token;
-    return this->PrepareForInPlace(DeviceAdapterTag{}, token);
+    return this->PrepareForInPlace(device, token);
   }
 
   /// Returns true if the ArrayHandle's data is on the given device. If the data are on the given
diff --git a/vtkm/cont/BitField.h b/vtkm/cont/BitField.h
index 1b9f2d2da..e7786515c 100644
--- a/vtkm/cont/BitField.h
+++ b/vtkm/cont/BitField.h
@@ -430,56 +430,74 @@ public:
   }
 
   /// Perform an atomic compare-and-swap operation on the bit at @a bitIdx.
-  /// If the value in memory is equal to @a expectedBit, it is replaced with
-  /// the value of @a newBit and the original value of the bit is returned as a
-  /// boolean. This method implements a full memory barrier around the atomic
+  /// If the value in memory is equal to @a oldBit, it is replaced with
+  /// the value of @a newBit and true is returned. If the value in memory is
+  /// not equal to @oldBit, @oldBit is changed to that value and false is
+  /// returned. This method implements a full memory barrier around the atomic
   /// operation.
   VTKM_EXEC_CONT
-  bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const
+  bool CompareExchangeBitAtomic(vtkm::Id bitIdx, bool* oldBit, bool newBit) const
   {
     VTKM_STATIC_ASSERT_MSG(!IsConst, "Attempt to modify const BitField portal.");
     using WordType = WordTypePreferred;
     const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
     const auto bitmask = WordType(1) << coord.BitOffset;
 
-    WordType oldWord;
-    WordType newWord;
+    WordType oldWord = this->GetWord<WordType>(coord.WordIndex);
     do
     {
-      oldWord = this->GetWord<WordType>(coord.WordIndex);
-      bool oldBitSet = (oldWord & bitmask) != WordType(0);
-      if (oldBitSet != expectedBit)
+      bool actualBit = (oldWord & bitmask) != WordType(0);
+      if (actualBit != *oldBit)
       { // The bit-of-interest does not match what we expected.
-        return oldBitSet;
+        *oldBit = actualBit;
+        return false;
       }
-      else if (oldBitSet == newBit)
+      else if (actualBit == newBit)
       { // The bit hasn't changed, but also already matches newVal. We're done.
-        return expectedBit;
+        return true;
       }
 
-      // Compute the new word
-      newWord = oldWord ^ bitmask;
-    } // CAS loop to resolve any conflicting changes to other bits in the word.
-    while (this->CompareAndSwapWordAtomic(coord.WordIndex, newWord, oldWord) != oldWord);
+      // Attempt to update the word with a compare-exchange in the loop condition.
+      // If the old word changed since last queried, oldWord will get updated and
+      // the loop will continue until it succeeds.
+    } while (!this->CompareExchangeWordAtomic(coord.WordIndex, &oldWord, oldWord ^ bitmask));
 
+    return true;
+  }
+
+  VTKM_DEPRECATED(1.6, "Use CompareExchangeBitAtomic. (Note the changed interface.)")
+  VTKM_EXEC_CONT bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const
+  {
+    this->CompareExchangeBitAtomic(bitIdx, &expectedBit, newBit);
     return expectedBit;
   }
 
-  /// Perform an atomic compare-and-swap operation on the word at @a wordIdx.
-  /// If the word in memory is equal to @a expectedWord, it is replaced with
-  /// the value of @a newWord and the original word is returned. This method
-  /// implements a full memory barrier around the atomic operation.
+  /// Perform an atomic compare-exchange operation on the word at @a wordIdx.
+  /// If the word in memory is equal to @a oldWord, it is replaced with
+  /// the value of @a newWord and true returned. If the word in memory is not
+  /// equal to @oldWord, @oldWord is set to the word in memory and false is
+  /// returned. This method implements a full memory barrier around the atomic
+  /// operation.
   template <typename WordType = WordTypePreferred>
-  VTKM_EXEC_CONT WordType CompareAndSwapWordAtomic(vtkm::Id wordIdx,
-                                                   WordType newWord,
-                                                   WordType expected) const
+  VTKM_EXEC_CONT bool CompareExchangeWordAtomic(vtkm::Id wordIdx,
+                                                WordType* oldWord,
+                                                WordType newWord) const
   {
     VTKM_STATIC_ASSERT_MSG(!IsConst, "Attempt to modify const BitField portal.");
     VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
                            "Requested WordType does not support atomic"
                            " operations on target execution platform.");
     WordType* addr = this->GetWordAddress<WordType>(wordIdx);
-    return vtkm::AtomicCompareAndSwap(addr, newWord, expected);
+    return vtkm::AtomicCompareExchange(addr, oldWord, newWord);
+  }
+
+  template <typename WordType = WordTypePreferred>
+  VTKM_DEPRECATED(1.6, "Use CompareExchangeWordAtomic. (Note the changed interface.)")
+  VTKM_EXEC_CONT WordType
+    CompareAndSwapWordAtomic(vtkm::Id wordIdx, WordType newWord, WordType expected) const
+  {
+    this->CompareExchangeWordAtomic(wordIdx, &expected, newWord);
+    return expected;
   }
 
 private:
diff --git a/vtkm/cont/internal/AtomicInterfaceControl.h b/vtkm/cont/internal/AtomicInterfaceControl.h
index 0b2bfdf00..42c2b4368 100644
--- a/vtkm/cont/internal/AtomicInterfaceControl.h
+++ b/vtkm/cont/internal/AtomicInterfaceControl.h
@@ -70,7 +70,8 @@ struct VTKM_DEPRECATED(1.6, "Use the functions in vtkm/Atomic.h.") AtomicInterfa
   template <typename T>
   VTKM_EXEC_CONT static T CompareAndSwap(T* addr, T newWord, T expected)
   {
-    return vtkm::AtomicCompareAndSwap(addr, newWord, expected);
+    vtkm::AtomicCompareExchange(addr, &expected, newWord);
+    return expected;
   }
 };
 }
diff --git a/vtkm/cont/internal/AtomicInterfaceExecution.h b/vtkm/cont/internal/AtomicInterfaceExecution.h
index 58c7403fe..c179522a2 100644
--- a/vtkm/cont/internal/AtomicInterfaceExecution.h
+++ b/vtkm/cont/internal/AtomicInterfaceExecution.h
@@ -71,7 +71,8 @@ struct VTKM_DEPRECATED(1.6, "Use the functions in vtkm/Atomic.h.") AtomicInterfa
   template <typename T>
   VTKM_EXEC_CONT static T CompareAndSwap(T* addr, T newWord, T expected)
   {
-    return vtkm::AtomicCompareAndSwap(addr, newWord, expected);
+    vtkm::AtomicCompareExchange(addr, &expected, newWord);
+    return expected;
   }
 };
 }
diff --git a/vtkm/cont/internal/FunctorsGeneral.h b/vtkm/cont/internal/FunctorsGeneral.h
index 7cf127375..17bb06d29 100644
--- a/vtkm/cont/internal/FunctorsGeneral.h
+++ b/vtkm/cont/internal/FunctorsGeneral.h
@@ -657,6 +657,23 @@ VTKM_EXEC static inline vtkm::Id IteratorDistanceImpl(const Iterator& from,
   return static_cast<vtkm::Id>(to - from);
 }
 
+#if defined(VTKM_HIP)
+
+template <typename Iterator>
+__host__ static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to)
+{
+  return static_cast<vtkm::Id>(std::distance(from, to));
+}
+
+template <typename Iterator>
+__device__ static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to)
+{
+  return IteratorDistanceImpl(
+    from, to, typename std::iterator_traits<Iterator>::iterator_category{});
+}
+
+#else
+
 template <typename Iterator>
 VTKM_EXEC static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to)
 {
@@ -668,6 +685,8 @@ VTKM_EXEC static inline vtkm::Id IteratorDistance(const Iterator& from, const It
 #endif
 }
 
+#endif
+
 template <class InputPortalType, class ValuesPortalType, class OutputPortalType>
 struct LowerBoundsKernel
 {
diff --git a/vtkm/cont/testing/TestingBitField.h b/vtkm/cont/testing/TestingBitField.h
index 26b8c2cd4..495251eb4 100644
--- a/vtkm/cont/testing/TestingBitField.h
+++ b/vtkm/cont/testing/TestingBitField.h
@@ -203,11 +203,19 @@ struct TestingBitField
     DEVICE_ASSERT(testValues("XorBitAtomic"));
 
     const auto notBit = !bit;
-    bool casResult = portal.CompareAndSwapBitAtomic(i, bit, notBit);
-    DEVICE_ASSERT(casResult == bit);
+    // A compare-exchange that should fail
+    auto expectedBit = notBit;
+    bool cxResult = portal.CompareExchangeBitAtomic(i, &expectedBit, bit);
+    DEVICE_ASSERT(!cxResult);
+    DEVICE_ASSERT(expectedBit != notBit);
+    DEVICE_ASSERT(portal.GetBit(i) == expectedBit);
     DEVICE_ASSERT(portal.GetBit(i) == bit);
-    casResult = portal.CompareAndSwapBitAtomic(i, notBit, bit);
-    DEVICE_ASSERT(casResult == bit);
+
+    // A compare-exchange that should succeed.
+    expectedBit = bit;
+    cxResult = portal.CompareExchangeBitAtomic(i, &expectedBit, notBit);
+    DEVICE_ASSERT(cxResult);
+    DEVICE_ASSERT(expectedBit == bit);
     DEVICE_ASSERT(portal.GetBit(i) == notBit);
 
     return true;
@@ -258,12 +266,20 @@ struct TestingBitField
     portal.XorWordAtomic(i, mod);
     DEVICE_ASSERT(testValues("XorWordAtomic"));
 
+    // Compare-exchange that should fail
     const WordType notWord = static_cast<WordType>(~word);
-    auto casResult = portal.CompareAndSwapWordAtomic(i, word, notWord);
-    DEVICE_ASSERT(casResult == word);
+    WordType expectedWord = notWord;
+    bool cxResult = portal.CompareExchangeWordAtomic(i, &expectedWord, word);
+    DEVICE_ASSERT(!cxResult);
+    DEVICE_ASSERT(expectedWord != notWord);
+    DEVICE_ASSERT(portal.template GetWord<WordType>(i) == expectedWord);
     DEVICE_ASSERT(portal.template GetWord<WordType>(i) == word);
-    casResult = portal.CompareAndSwapWordAtomic(i, notWord, word);
-    DEVICE_ASSERT(casResult == word);
+
+    // Compare-exchange that should succeed
+    expectedWord = word;
+    cxResult = portal.CompareExchangeWordAtomic(i, &expectedWord, notWord);
+    DEVICE_ASSERT(cxResult);
+    DEVICE_ASSERT(expectedWord == word);
     DEVICE_ASSERT(portal.template GetWord<WordType>(i) == notWord);
 
     return true;
diff --git a/vtkm/cont/testing/TestingDeviceAdapter.h b/vtkm/cont/testing/TestingDeviceAdapter.h
index 96f19919d..2ccb8d393 100644
--- a/vtkm/cont/testing/TestingDeviceAdapter.h
+++ b/vtkm/cont/testing/TestingDeviceAdapter.h
@@ -327,14 +327,9 @@ public:
       T value = (T)index;
       //Get the old value from the array
       T oldValue = this->AArray.Get(0);
-      //This creates an atomic add using the CAS operatoin
-      T assumed = T(0);
-      do
-      {
-        assumed = oldValue;
-        oldValue = this->AArray.CompareAndSwap(0, (assumed + value), assumed);
-
-      } while (assumed != oldValue);
+      //Use atomic compare-exchange to atomically add value
+      while (!this->AArray.CompareExchange(0, &oldValue, oldValue + value))
+        ;
     }
 
     VTKM_CONT void SetErrorMessageBuffer(const vtkm::exec::internal::ErrorMessageBuffer&) {}
diff --git a/vtkm/exec/AtomicArrayExecutionObject.h b/vtkm/exec/AtomicArrayExecutionObject.h
index 9a183a5b8..3ac66919a 100644
--- a/vtkm/exec/AtomicArrayExecutionObject.h
+++ b/vtkm/exec/AtomicArrayExecutionObject.h
@@ -106,7 +106,7 @@ public:
     // We'll cast the signed types to unsigned to work around this.
     using APIType = typename detail::MakeUnsigned<ValueType>::type;
 
-    return static_cast<T>(vtkm::AtomicLoad(reinterpret_cast<const APIType*>(this->Data + index)));
+    return static_cast<T>(vtkm::AtomicLoad(reinterpret_cast<APIType*>(this->Data + index)));
   }
 
   /// \brief Peform an atomic addition with sequentially consistent memory
@@ -158,47 +158,42 @@ public:
     vtkm::AtomicStore(reinterpret_cast<APIType*>(this->Data + index), static_cast<APIType>(value));
   }
 
-  /// \brief Perform an atomic CAS operation with sequentially consistent
+  /// \brief Perform an atomic compare and exchange operation with sequentially consistent
   /// memory ordering.
   /// \param index The index of the array element that will be atomically
   /// modified.
+  /// \param oldValue A pointer to the expected value of the indexed element.
   /// \param newValue The value to replace the indexed element with.
-  /// \param oldValue The expected value of the indexed element.
-  /// \return If the operation is successful, \a oldValue is returned. Otherwise
-  /// the current value of the indexed element is returned, and the element is
-  /// not modified.
+  /// \return If the operation is successful, \a true is returned. Otherwise,
+  /// \a oldValue is replaced with the current value of the indexed element,
+  /// the element is not modified, and \a false is returned. In either case, \a oldValue
+  /// becomes the value that was originally in the indexed element.
   ///
   /// This operation is typically used in a loop. For example usage,
-  /// an atomic multiplication may be implemented using CAS as follows:
+  /// an atomic multiplication may be implemented using compare-exchange as follows:
   ///
   /// ```
   /// AtomicArrayExecutionObject<vtkm::Int32, ...> arr = ...;
   ///
-  /// // CAS multiplication:
-  /// vtkm::Int32 cur = arr->Get(idx); // Load the current value at idx
-  /// vtkm::Int32 newVal; // will hold the result of the multiplication
-  /// vtkm::Int32 expect; // will hold the expected value before multiplication
+  /// // Compare-exchange multiplication:
+  /// vtkm::Int32 current = arr->Get(idx); // Load the current value at idx
   /// do {
-  ///   expect = cur; // Used to ensure the value hasn't changed since reading
-  ///   newVal = cur * multFactor; // the actual multiplication
-  /// }
-  /// while ((cur = arr->CompareAndSwap(idx, newVal, expect)) == expect);
+  ///   vtkm::Int32 newVal = current * multFactor; // the actual multiplication
+  /// } while (!arr->CompareExchange(idx, &current, newVal));
   /// ```
   ///
-  /// The while condition here updates \a cur with the pre-CAS value of the
-  /// operation (the return from CompareAndSwap), and compares this to the
-  /// expected value. If the values match, the operation was successful and the
+  /// The while condition here updates \a newVal what the proper multiplication
+  /// is given the expected current value. It then compares this to the
+  /// value in the array. If the values match, the operation was successful and the
   /// loop exits. If the values do not match, the value at \a idx was changed
-  /// by another thread since the initial Get, and the CAS operation failed --
-  /// the target element was not modified by the CAS call. If this happens, the
-  /// loop body re-executes using the new value of \a cur and tries again until
+  /// by another thread since the initial Get, and the compare-exchange operation failed --
+  /// the target element was not modified by the compare-exchange call. If this happens, the
+  /// loop body re-executes using the new value of \a current and tries again until
   /// it succeeds.
   ///
   VTKM_SUPPRESS_EXEC_WARNINGS
   VTKM_EXEC
-  ValueType CompareAndSwap(vtkm::Id index,
-                           const ValueType& newValue,
-                           const ValueType& oldValue) const
+  bool CompareExchange(vtkm::Id index, ValueType* oldValue, const ValueType& newValue) const
   {
     // We only support 32/64 bit signed/unsigned ints, and vtkm::Atomic
     // currently only provides API for unsigned types.
@@ -207,9 +202,18 @@ public:
     // is how overflow works, and signed overflow is already undefined.
     using APIType = typename detail::MakeUnsigned<ValueType>::type;
 
-    return static_cast<T>(vtkm::AtomicCompareAndSwap(reinterpret_cast<APIType*>(this->Data + index),
-                                                     static_cast<APIType>(newValue),
-                                                     static_cast<APIType>(oldValue)));
+    return vtkm::AtomicCompareExchange(reinterpret_cast<APIType*>(this->Data + index),
+                                       reinterpret_cast<APIType*>(oldValue),
+                                       static_cast<APIType>(newValue));
+  }
+
+  VTKM_DEPRECATED(1.6, "Use CompareExchange. (Note the changed interface.)")
+  VTKM_EXEC ValueType CompareAndSwap(vtkm::Id index,
+                                     const ValueType& newValue,
+                                     ValueType oldValue) const
+  {
+    this->CompareExchange(index, &oldValue, newValue);
+    return oldValue;
   }
 
 private:
diff --git a/vtkm/filter/GhostCellRemove.hxx b/vtkm/filter/GhostCellRemove.hxx
index c64e0bd1d..7cbb1fc82 100644
--- a/vtkm/filter/GhostCellRemove.hxx
+++ b/vtkm/filter/GhostCellRemove.hxx
@@ -106,21 +106,21 @@ public:
   template <typename Atomic>
   VTKM_EXEC void Max(Atomic& atom, const vtkm::Id& val, const vtkm::Id& index) const
   {
-    vtkm::Id old = -1;
-    do
+    vtkm::Id old = atom.Get(index);
+    while (old < val)
     {
-      old = atom.CompareAndSwap(index, val, old);
-    } while (old < val);
+      atom.CompareExchange(index, &old, val);
+    }
   }
 
   template <typename Atomic>
   VTKM_EXEC void Min(Atomic& atom, const vtkm::Id& val, const vtkm::Id& index) const
   {
-    vtkm::Id old = 1000000000;
-    do
+    vtkm::Id old = atom.Get(index);
+    while (old > val)
     {
-      old = atom.CompareAndSwap(index, val, old);
-    } while (old > val);
+      atom.CompareExchange(index, &old, val);
+    }
   }
 
   template <typename T, typename AtomicType>
diff --git a/vtkm/rendering/Wireframer.h b/vtkm/rendering/Wireframer.h
index 09335d181..fbb1883aa 100644
--- a/vtkm/rendering/Wireframer.h
+++ b/vtkm/rendering/Wireframer.h
@@ -362,7 +362,7 @@ private:
       blendedColor[2] = color[2] * intensity + srcColor[2] * alpha;
       blendedColor[3] = alpha + intensity;
       next.Ints.Color = PackColor(blendedColor);
-      current.Raw = FrameBuffer.CompareAndSwap(index, next.Raw, current.Raw);
+      FrameBuffer.CompareExchange(index, &current.Raw, next.Raw);
     } while (current.Floats.Depth > next.Floats.Depth);
   }
 
diff --git a/vtkm/testing/UnitTestAtomic.cxx b/vtkm/testing/UnitTestAtomic.cxx
index 9ad04724f..e84c52b1a 100644
--- a/vtkm/testing/UnitTestAtomic.cxx
+++ b/vtkm/testing/UnitTestAtomic.cxx
@@ -277,7 +277,7 @@ struct AtomicTests
     }
   }
 
-  struct CompareAndSwapFunctor : vtkm::worklet::WorkletMapField
+  struct CompareExchangeFunctor : vtkm::worklet::WorkletMapField
   {
     using ControlSignature = void(FieldIn ignored, ExecObject);
     using ExecutionSignature = void(WorkIndex, _2);
@@ -288,8 +288,8 @@ struct AtomicTests
       bool success = false;
       for (T overlapIndex = 0; overlapIndex < static_cast<T>(OVERLAP); ++overlapIndex)
       {
-        T oldValue = vtkm::AtomicCompareAndSwap(data + arrayIndex, overlapIndex + 1, overlapIndex);
-        if (oldValue == overlapIndex)
+        T expectedValue = overlapIndex;
+        if (vtkm::AtomicCompareExchange(data + arrayIndex, &expectedValue, overlapIndex + 1))
         {
           success = true;
           break;
@@ -303,9 +303,9 @@ struct AtomicTests
     }
   };
 
-  VTKM_CONT void TestCompareAndSwap()
+  VTKM_CONT void TestCompareExchange()
   {
-    std::cout << "AtomicCompareAndSwap" << std::endl;
+    std::cout << "AtomicCompareExchange" << std::endl;
     vtkm::cont::ArrayHandleBasic<T> array;
     vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandleConstant<T>(0, ARRAY_SIZE), array);
     array.Allocate(ARRAY_SIZE);
@@ -331,7 +331,7 @@ struct AtomicTests
     TestOr();
     TestXor();
     TestNot();
-    TestCompareAndSwap();
+    TestCompareExchange();
   }
 };
 
diff --git a/vtkm/worklet/OrientPointNormals.h b/vtkm/worklet/OrientPointNormals.h
index d875d8765..5efef6039 100644
--- a/vtkm/worklet/OrientPointNormals.h
+++ b/vtkm/worklet/OrientPointNormals.h
@@ -151,9 +151,8 @@ public:
       for (vtkm::IdComponent c = 0; c < cells.GetNumberOfComponents(); ++c)
       {
         const vtkm::Id cellId = cells[c];
-        const bool alreadyVisited = visitedCells.CompareAndSwapBitAtomic(cellId, true, false);
-
-        if (!alreadyVisited)
+        bool checkNotVisited = false;
+        if (visitedCells.CompareExchangeBitAtomic(cellId, &checkNotVisited, true))
         { // This thread is first to visit cell
           activeCells.SetBitAtomic(cellId, true);
         }
@@ -216,8 +215,8 @@ public:
         const bool alreadyVisited = visitedPoints.GetBit(pointId);
         if (!alreadyVisited)
         {
-          const bool alreadyActive = activePoints.CompareAndSwapBitAtomic(pointId, true, false);
-          if (!alreadyActive)
+          bool checkNotActive = false;
+          if (activePoints.CompareExchangeBitAtomic(pointId, &checkNotActive, true))
           { // If we're the first thread to mark point active, set ref point:
             refPoints.Set(pointId, refPtId);
           }
diff --git a/vtkm/worklet/connectivities/UnionFind.h b/vtkm/worklet/connectivities/UnionFind.h
index cb65cfb94..cff053e60 100644
--- a/vtkm/worklet/connectivities/UnionFind.h
+++ b/vtkm/worklet/connectivities/UnionFind.h
@@ -106,19 +106,16 @@ public:
     // We can use this return "new root" as is without calling findRoot() to
     // find the "new root". The while loop terminates when both u and v have
     // the same root (thus united).
-    auto root_u = UnionFind::findRoot(parents, u);
-    auto root_v = UnionFind::findRoot(parents, v);
+    vtkm::Id root_u = UnionFind::findRoot(parents, u);
+    vtkm::Id root_v = UnionFind::findRoot(parents, v);
 
     while (root_u != root_v)
     {
       // FIXME: we might be executing the loop one extra time than necessary.
-      // Nota Bene: VTKm's CompareAndSwap has a different order of parameters
-      // than common practice, it is (index, new, expected) rather than
-      // (index, expected, new).
       if (root_u < root_v)
-        root_v = parents.CompareAndSwap(root_v, root_u, root_v);
+        parents.CompareExchange(root_v, &root_v, root_u);
       else if (root_u > root_v)
-        root_u = parents.CompareAndSwap(root_u, root_v, root_u);
+        parents.CompareExchange(root_u, &root_u, root_v);
     }
   }
 
diff --git a/vtkm/worklet/contourtree_augmented/processcontourtree/HypersweepWorklets.h b/vtkm/worklet/contourtree_augmented/processcontourtree/HypersweepWorklets.h
index 59c670431..e8d9613a3 100644
--- a/vtkm/worklet/contourtree_augmented/processcontourtree/HypersweepWorklets.h
+++ b/vtkm/worklet/contourtree_augmented/processcontourtree/HypersweepWorklets.h
@@ -264,14 +264,9 @@ public:
     //Id writeValue = op(vertexValue, parentValue);
 
     auto cur = minMaxIndexPortal.Get(parent); // Load the current value at idx
-    vtkm::Id newVal;                          // will hold the result of the multiplication
-    vtkm::Id expect;                          // will hold the expected value before multiplication
-
-    do
-    {
-      expect = cur;                        // Used to ensure the value hasn't changed since reading
-      newVal = this->Op(cur, vertexValue); // the actual multiplication
-    } while ((cur = minMaxIndexPortal.CompareAndSwap(parent, newVal, expect)) != expect);
+    // Use a compare-exchange loop to ensure the operation gets applied atomically
+    while (!minMaxIndexPortal.CompareExchange(parent, &cur, this->Op(cur, vertexValue)))
+      ;
 
     //minMaxIndexPortal.Set(parent, writeValue);
   }