diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index bfa2707eb..d58144a25 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -32,9 +32,10 @@ # - tbb # - openmp # - mpich2 -# * .gitlab/ci/docker/ubuntu1804/cuda10.1/ +# * .gitlab/ci/docker/ubuntu1804/cuda11.1/ # - cuda -# - gcc 7.4 +# - gcc 7 +# - gcc 8 # - tbb # - openmp # - mpich2 @@ -49,52 +50,52 @@ GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci .centos7: ¢os7 - image: "kitware/vtkm:ci-centos7_cuda10.2-20200820" + image: "kitware/vtkm:ci-centos7_cuda10.2-20201016" extends: - .docker_image .centos8: ¢os8 - image: "kitware/vtkm:ci-centos8-20200820" + image: "kitware/vtkm:ci-centos8-20201016" extends: - .docker_image .rhel8: &rhel8 - image: "kitware/vtkm:ci-rhel8_cuda10.2-20200820" + image: "kitware/vtkm:ci-rhel8_cuda10.2-20201016" extends: - .docker_image .ubuntu1604: &ubuntu1604 - image: "kitware/vtkm:ci-ubuntu1604-20200820" + image: "kitware/vtkm:ci-ubuntu1604-20201016" extends: - .docker_image .ubuntu1604_cuda: &ubuntu1604_cuda - image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200820" + image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20201016" extends: - .docker_image .ubuntu1804: &ubuntu1804 - image: "kitware/vtkm:ci-ubuntu1804-20200820" + image: "kitware/vtkm:ci-ubuntu1804-20201016" extends: - .docker_image .ubuntu1804_cuda: &ubuntu1804_cuda - image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200820" + image: "kitware/vtkm:ci-ubuntu1804_cuda11.1-20201016" extends: - .docker_image .ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos - image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20200820" + image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20201016" extends: - .docker_image .ubuntu2004_doxygen: &ubuntu2004_doxygen - image: "kitware/vtkm:ci-doxygen-20200820" + image: "kitware/vtkm:ci-doxygen-20201016" extends: - .docker_image .ubuntu2004_kokkos: &ubuntu2004_kokkos - image: "kitware/vtkm:ci-ubuntu2004_kokkos-20200820" + image: "kitware/vtkm:ci-ubuntu2004_kokkos-20201016" extends: - .docker_image diff --git a/.gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile b/.gitlab/ci/docker/ubuntu1804/cuda11.1/Dockerfile similarity index 96% rename from .gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile rename to .gitlab/ci/docker/ubuntu1804/cuda11.1/Dockerfile index c50cb358a..fbdd44a7d 100644 --- a/.gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile +++ b/.gitlab/ci/docker/ubuntu1804/cuda11.1/Dockerfile @@ -1,10 +1,10 @@ -FROM nvidia/cuda:10.1-devel-ubuntu18.04 +FROM nvidia/cuda:11.1-devel-ubuntu18.04 LABEL maintainer "Robert Maynard" # Base dependencies for building VTK-m projects RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ - g++ \ + g++-8 \ clang-8 \ git \ git-lfs \ diff --git a/.gitlab/ci/docker/update_all.sh b/.gitlab/ci/docker/update_all.sh index 644061af5..0acb769d9 100755 --- a/.gitlab/ci/docker/update_all.sh +++ b/.gitlab/ci/docker/update_all.sh @@ -30,8 +30,8 @@ cd ubuntu1804/base sudo docker build -t kitware/vtkm:ci-ubuntu1804-$date . cd ../.. -cd ubuntu1804/cuda10.1 -sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda10.1-$date . +cd ubuntu1804/cuda11.1 +sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11.1-$date . cd ../.. cd ubuntu1804/kokkos-cuda diff --git a/.gitlab/ci/ubuntu1804.yml b/.gitlab/ci/ubuntu1804.yml index 41c4858f8..d62b0895e 100644 --- a/.gitlab/ci/ubuntu1804.yml +++ b/.gitlab/ci/ubuntu1804.yml @@ -38,7 +38,7 @@ test:ubuntu1804_gcc9: - build:ubuntu1804_gcc9 # Build on ubuntu1804 with CUDA + MPI and test on ubuntu1804 -# Uses gcc 7.4 +# Uses gcc 7 # Uses MPICH2 build:ubuntu1804_gcc7: tags: @@ -55,6 +55,7 @@ build:ubuntu1804_gcc7: variables: CC: "gcc-7" CXX: "g++-7" + CUDAHOSTCXX: "g++-7" VTKM_SETTINGS: "cuda+turing+mpi+64bit_floats+no_virtual" test:ubuntu1804_gcc7: diff --git a/README.md b/README.md index c7b9dd494..b1c6de3aa 100644 --- a/README.md +++ b/README.md @@ -76,8 +76,8 @@ VTK-m Requires: Optional dependencies are: + CUDA Device Adapter - + [Cuda Toolkit 9.2+](https://developer.nvidia.com/cuda-toolkit) - + Note CUDA >= 10.1 is required on Windows + + [Cuda Toolkit 9.2, >= 10.2](https://developer.nvidia.com/cuda-toolkit) + + Note CUDA >= 10.2 is required on Windows + TBB Device Adapter + [TBB](https://www.threadingbuildingblocks.org/) + OpenMP Device Adapter @@ -105,12 +105,12 @@ VTK-m has been tested on the following configurations:c + On Linux + GCC 4.8.5, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0 + CMake 3.12, 3.13, 3.16, 3.17 - + CUDA 9.2.148, 10.0.130, 10.1.105, 10.2.89 + + CUDA 9.2, 10.2, 11.0, 11.1 + TBB 4.4 U2, 2017 U7 + On Windows + Visual Studio 2015, 2017 + CMake 3.12, 3.17 - + CUDA 10.1 + + CUDA 10.2 + TBB 2017 U3, 2018 U2 + On MacOS + AppleClang 9.1 diff --git a/benchmarking/BenchmarkAtomicArray.cxx b/benchmarking/BenchmarkAtomicArray.cxx index e32205867..965c307b0 100644 --- a/benchmarking/BenchmarkAtomicArray.cxx +++ b/benchmarking/BenchmarkAtomicArray.cxx @@ -260,7 +260,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS( ->ArgNames({ "Values", "Ops", "Stride" }), vtkm::cont::AtomicArrayTypeList); -// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent +// Benchmarks AtomicArray::CompareExchange such that each work index writes to adjacent // indices. struct CASSeqWorker : public vtkm::worklet::WorkletMapField { @@ -273,12 +273,8 @@ struct CASSeqWorker : public vtkm::worklet::WorkletMapField const vtkm::Id idx = i % portal.GetNumberOfValues(); const T val = static_cast(i) + in; T oldVal = portal.Get(idx); - T assumed = static_cast(0); - do - { - assumed = oldVal; - oldVal = portal.CompareAndSwap(idx, assumed + val, assumed); - } while (assumed != oldVal); + while (!portal.CompareExchange(idx, &oldVal, oldVal + val)) + ; } }; @@ -371,7 +367,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCASSeqBaseline, ->ArgNames({ "Values", "Ops" }), vtkm::cont::AtomicArrayTypeList); -// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to +// Benchmarks AtomicArray::CompareExchange such that each work index writes to // a strided index: // ( floor(i / stride) + stride * (i % stride) struct CASStrideWorker : public vtkm::worklet::WorkletMapField @@ -393,12 +389,8 @@ struct CASStrideWorker : public vtkm::worklet::WorkletMapField const vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals; const T val = static_cast(i) + in; T oldVal = portal.Get(idx); - T assumed = static_cast(0); - do - { - assumed = oldVal; - oldVal = portal.CompareAndSwap(idx, assumed + val, assumed); - } while (assumed != oldVal); + while (!portal.CompareExchange(idx, &oldVal, oldVal + val)) + ; } }; diff --git a/vtkm/Algorithms.h b/vtkm/Algorithms.h index 1686b9f78..2c53a4bd9 100644 --- a/vtkm/Algorithms.h +++ b/vtkm/Algorithms.h @@ -90,7 +90,7 @@ VTKM_EXEC_CONT vtkm::Id BinarySearch(const PortalT& portal, const T& val) template VTKM_EXEC_CONT IterT LowerBound(IterT first, IterT last, const T& val, Comp comp) { -#ifdef VTKM_CUDA +#if defined(VTKM_CUDA) || defined(VTKM_HIP) auto len = last - first; while (len != 0) { @@ -107,9 +107,9 @@ VTKM_EXEC_CONT IterT LowerBound(IterT first, IterT last, const T& val, Comp comp } } return first; -#else // VTKM_CUDA +#else // VTKM_CUDA || VTKM_HIP return std::lower_bound(first, last, val, std::move(comp)); -#endif // VTKM_CUDA +#endif // VTKM_CUDA || VTKM_HIP } template @@ -139,7 +139,7 @@ VTKM_EXEC_CONT vtkm::Id LowerBound(const PortalT& portal, const T& val) template VTKM_EXEC_CONT IterT UpperBound(IterT first, IterT last, const T& val, Comp comp) { -#ifdef VTKM_CUDA +#if defined(VTKM_CUDA) || defined(VTKM_HIP) auto len = last - first; while (len != 0) { @@ -156,9 +156,9 @@ VTKM_EXEC_CONT IterT UpperBound(IterT first, IterT last, const T& val, Comp comp } } return first; -#else // VTKM_CUDA +#else // VTKM_CUDA || VTKM_HIP return std::upper_bound(first, last, val, std::move(comp)); -#endif // VTKM_CUDA +#endif // VTKM_CUDA || VTKM_HIP } template diff --git a/vtkm/Atomic.h b/vtkm/Atomic.h index 67d3c5413..9a0fc2292 100644 --- a/vtkm/Atomic.h +++ b/vtkm/Atomic.h @@ -142,9 +142,9 @@ VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order) } template -VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order) { - const volatile T* vaddr = addr; /* volatile to bypass cache*/ + volatile T* const vaddr = addr; /* volatile to bypass cache*/ if (order == vtkm::MemoryOrder::SequentiallyConsistent) { __threadfence(); @@ -207,15 +207,23 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order) } template -VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr, - T desired, - T expected, - vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr, + T* expected, + T desired, + vtkm::MemoryOrder order) { AtomicStoreFence(order); - auto result = atomicCAS(addr, expected, desired); + auto result = atomicCAS(addr, *expected, desired); AtomicLoadFence(order); - return result; + if (result == *expected) + { + return true; + } + else + { + *expected = result; + return false; + } } } } // namespace vtkm::detail @@ -239,9 +247,14 @@ VTKM_THIRDPARTY_PRE_INCLUDE #if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA) #undef KOKKOS_ENABLE_CUDA #endif + +#if defined(KOKKOS_ENABLE_HIP) && !defined(VTKM_HIP) +#undef KOKKOS_ENABLE_HIP +#endif + #endif //KOKKOS_MACROS_HPP not loaded -#include +#include VTKM_THIRDPARTY_POST_INCLUDE namespace vtkm @@ -270,7 +283,7 @@ VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order) } template -VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order) { switch (order) { @@ -350,15 +363,23 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order) } template -VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr, - T desired, - T expected, - vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr, + T* expected, + T desired, + vtkm::MemoryOrder order) { AtomicStoreFence(order); - T result = Kokkos::atomic_compare_exchange(addr, expected, desired); + T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired); AtomicLoadFence(order); - return result; + if (oldValue == *expected) + { + return true; + } + else + { + *expected = oldValue; + return false; + } } } } // namespace vtkm::detail @@ -407,35 +428,35 @@ VTKM_EXEC_CONT inline T BitCast(T&& src) // // https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access -VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(const vtkm::UInt8* addr, vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(vtkm::UInt8* const addr, vtkm::MemoryOrder order) { // This assumes that the memory interface is smart enough to load a 32-bit // word atomically and a properly aligned 8-bit word from it. // We could build address masks and do shifts to perform this manually if // this assumption is incorrect. - auto result = *static_cast(addr); + auto result = *static_cast(addr); std::atomic_thread_fence(internal::StdAtomicMemOrder(order)); return result; } -VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(const vtkm::UInt16* addr, vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(vtkm::UInt16* const addr, vtkm::MemoryOrder order) { // This assumes that the memory interface is smart enough to load a 32-bit // word atomically and a properly aligned 16-bit word from it. // We could build address masks and do shifts to perform this manually if // this assumption is incorrect. - auto result = *static_cast(addr); + auto result = *static_cast(addr); std::atomic_thread_fence(internal::StdAtomicMemOrder(order)); return result; } -VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(const vtkm::UInt32* addr, vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(vtkm::UInt32* const addr, vtkm::MemoryOrder order) { - auto result = *static_cast(addr); + auto result = *static_cast(addr); std::atomic_thread_fence(internal::StdAtomicMemOrder(order)); return result; } -VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(const vtkm::UInt64* addr, vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(vtkm::UInt64* const addr, vtkm::MemoryOrder order) { - auto result = *static_cast(addr); + auto result = *static_cast(addr); std::atomic_thread_fence(internal::StdAtomicMemOrder(order)); return result; } @@ -487,13 +508,22 @@ VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt64* addr, { \ return AtomicXorImpl(addr, static_cast(~vtkmType{ 0u }), order); \ } \ - VTKM_EXEC_CONT inline vtkmType AtomicCompareAndSwapImpl( \ - vtkmType* addr, vtkmType desired, vtkmType expected, vtkm::MemoryOrder order) \ + VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl( \ + vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder order) \ { \ - return BitCast( \ + vtkmType result = BitCast( \ _InterlockedCompareExchange##suffix(reinterpret_cast(addr), \ BitCast(desired), \ - BitCast(expected))); \ + BitCast(*expected))); \ + if (result == *expected) \ + { \ + return true; \ + } \ + else \ + { \ + *expected = result; \ + return false; \ + } \ } VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8) @@ -538,7 +568,7 @@ VTKM_EXEC_CONT inline int GccAtomicMemOrder(vtkm::MemoryOrder order) } template -VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order) { return __atomic_load_n(addr, GccAtomicMemOrder(order)); } @@ -580,14 +610,13 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order) } template -VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr, - T desired, - T expected, - vtkm::MemoryOrder order) +VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr, + T* expected, + T desired, + vtkm::MemoryOrder order) { - __atomic_compare_exchange_n( - addr, &expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order)); - return expected; + return __atomic_compare_exchange_n( + addr, expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order)); } } } // namespace vtkm::detail @@ -627,7 +656,7 @@ using AtomicTypesSupported = vtkm::List; /// or after that write. /// template -VTKM_EXEC_CONT inline T AtomicLoad(const T* pointer, +VTKM_EXEC_CONT inline T AtomicLoad(T* const pointer, vtkm::MemoryOrder order = vtkm::MemoryOrder::Acquire) { return detail::AtomicLoadImpl(pointer, order); @@ -796,24 +825,34 @@ VTKM_EXEC_CONT inline T AtomicNot( /// \brief Atomic function that replaces a value given a condition. /// -/// Given a pointer, a new desired value, and an expected value, replaces the value at the -/// pointer if it is the same as the expected value with the new desired value. If the original -/// value in the pointer does not equal the expected value, then the memory at the pointer -/// remains unchanged. In either case, the function returns the _old_ original value that -/// was at the pointer. +/// Given a pointer to a `shared` value, a pointer holding the `expected` value at that shared +/// location, and a new `desired` value, `AtomicCompareExchange` compares the existing `shared` +/// value to the `expected` value, and then conditionally replaces the `shared` value with +/// the provided `desired` value. Otherwise, the `expected` value gets replaced with the +/// `shared` value. Note that in either case, the function returns with `expected` replaced +/// with the value _originally_ in `shared` at the start of the call. /// -/// If multiple threads call `AtomicCompareAndSwap` simultaneously, the result will be consistent -/// as if one was called before the other (although it is indeterminate which will be applied -/// first). +/// If the `shared` value and `expected` value are the same, then `shared` gets set to +/// `desired`, and `AtomicCompareAndExchange` returns `true`. +/// +/// If the `shared` value and `expected` value are different, then `expected` gets set +/// to `shared`, and `AtomicCompareAndExchange` returns `false`. The value at `shared` +/// is _not_ changed in this case. +/// +/// If multiple threads call `AtomicCompareExchange` simultaneously with the same `shared` +/// pointer, the result will be consistent as if one was called before the other (although +/// it is indeterminate which will be applied first). Note that the `expected` pointer should +/// _not_ be shared among threads. The `expected` pointer should be thread-local (often +/// pointing to an object on the stack). /// template -VTKM_EXEC_CONT inline T AtomicCompareAndSwap( - T* pointer, +VTKM_EXEC_CONT inline T AtomicCompareExchange( + T* shared, + T* expected, T desired, - T expected, vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent) { - return detail::AtomicCompareAndSwapImpl(pointer, desired, expected, order); + return detail::AtomicCompareExchangeImpl(shared, expected, desired, order); } } // namespace vtkm diff --git a/vtkm/Math.h b/vtkm/Math.h index 7525d2dc9..fcdabfc94 100644 --- a/vtkm/Math.h +++ b/vtkm/Math.h @@ -2381,7 +2381,8 @@ static inline VTKM_EXEC_CONT vtkm::Float32 RemainderQuotient(vtkm::Float32 numer QType& quotient) { int iQuotient; -#ifdef VTKM_CUDA + // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169 +#if defined(VTKM_CUDA) || defined(VTKM_HIP) const vtkm::Float32 result = VTKM_CUDA_MATH_FUNCTION_32(remquo)(numerator, denominator, &iQuotient); #else @@ -2411,11 +2412,20 @@ static inline VTKM_EXEC_CONT vtkm::Float64 RemainderQuotient(vtkm::Float64 numer /// static inline VTKM_EXEC_CONT vtkm::Float32 ModF(vtkm::Float32 x, vtkm::Float32& integral) { + // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169 +#if defined(VTKM_CUDA) || defined(VTKM_HIP) + return VTKM_CUDA_MATH_FUNCTION_32(modf)(x, &integral); +#else return std::modf(x, &integral); +#endif } static inline VTKM_EXEC_CONT vtkm::Float64 ModF(vtkm::Float64 x, vtkm::Float64& integral) { +#if defined(VTKM_CUDA) + return VTKM_CUDA_MATH_FUNCTION_64(modf)(x, &integral); +#else return std::modf(x, &integral); +#endif } //----------------------------------------------------------------------------- diff --git a/vtkm/Math.h.in b/vtkm/Math.h.in index 3fb1f30de..4dd6dcbdc 100644 --- a/vtkm/Math.h.in +++ b/vtkm/Math.h.in @@ -1011,7 +1011,8 @@ static inline VTKM_EXEC_CONT vtkm::Float32 RemainderQuotient(vtkm::Float32 numer QType& quotient) { int iQuotient; -#ifdef VTKM_CUDA + // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169 +#if defined(VTKM_CUDA) || defined(VTKM_HIP) const vtkm::Float32 result = VTKM_CUDA_MATH_FUNCTION_32(remquo)(numerator, denominator, &iQuotient); #else @@ -1041,11 +1042,20 @@ static inline VTKM_EXEC_CONT vtkm::Float64 RemainderQuotient(vtkm::Float64 numer /// static inline VTKM_EXEC_CONT vtkm::Float32 ModF(vtkm::Float32 x, vtkm::Float32& integral) { + // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169 +#if defined(VTKM_CUDA) || defined(VTKM_HIP) + return VTKM_CUDA_MATH_FUNCTION_32(modf)(x, &integral); +#else return std::modf(x, &integral); +#endif } static inline VTKM_EXEC_CONT vtkm::Float64 ModF(vtkm::Float64 x, vtkm::Float64& integral) { +#if defined(VTKM_CUDA) + return VTKM_CUDA_MATH_FUNCTION_64(modf)(x, &integral); +#else return std::modf(x, &integral); +#endif } //----------------------------------------------------------------------------- diff --git a/vtkm/cont/ArrayHandle.h b/vtkm/cont/ArrayHandle.h index c55472045..b5060a168 100644 --- a/vtkm/cont/ArrayHandle.h +++ b/vtkm/cont/ArrayHandle.h @@ -1225,7 +1225,7 @@ public: /// Releases any resources being used in the execution environment (that are /// not being shared by the control environment). /// - VTKM_CONT void ReleaseResourcesExecution() + VTKM_CONT void ReleaseResourcesExecution() const { detail::ArrayHandleReleaseResourcesExecution(this->Buffers); } @@ -1294,27 +1294,23 @@ public: return StorageType::CreateWritePortal(this->GetBuffers(), device, token); } - template VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForInput now requires a vtkm::cont::Token object.") - typename ExecutionTypes::PortalConst PrepareForInput(DeviceAdapterTag) const + ReadPortalType PrepareForInput(vtkm::cont::DeviceAdapterId device) const { vtkm::cont::Token token; - return this->PrepareForInput(DeviceAdapterTag{}, token); + return this->PrepareForInput(device, token); } - template VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForOutput now requires a vtkm::cont::Token object.") - typename ExecutionTypes::Portal - PrepareForOutput(vtkm::Id numberOfValues, DeviceAdapterTag) + WritePortalType PrepareForOutput(vtkm::Id numberOfValues, vtkm::cont::DeviceAdapterId device) { vtkm::cont::Token token; - return this->PrepareForOutput(numberOfValues, DeviceAdapterTag{}, token); + return this->PrepareForOutput(numberOfValues, device, token); } - template VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForInPlace now requires a vtkm::cont::Token object.") - typename ExecutionTypes::Portal PrepareForInPlace(DeviceAdapterTag) + WritePortalType PrepareForInPlace(vtkm::cont::DeviceAdapterId device) const { vtkm::cont::Token token; - return this->PrepareForInPlace(DeviceAdapterTag{}, token); + return this->PrepareForInPlace(device, token); } /// Returns true if the ArrayHandle's data is on the given device. If the data are on the given diff --git a/vtkm/cont/BitField.h b/vtkm/cont/BitField.h index 1b9f2d2da..e7786515c 100644 --- a/vtkm/cont/BitField.h +++ b/vtkm/cont/BitField.h @@ -430,56 +430,74 @@ public: } /// Perform an atomic compare-and-swap operation on the bit at @a bitIdx. - /// If the value in memory is equal to @a expectedBit, it is replaced with - /// the value of @a newBit and the original value of the bit is returned as a - /// boolean. This method implements a full memory barrier around the atomic + /// If the value in memory is equal to @a oldBit, it is replaced with + /// the value of @a newBit and true is returned. If the value in memory is + /// not equal to @oldBit, @oldBit is changed to that value and false is + /// returned. This method implements a full memory barrier around the atomic /// operation. VTKM_EXEC_CONT - bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const + bool CompareExchangeBitAtomic(vtkm::Id bitIdx, bool* oldBit, bool newBit) const { VTKM_STATIC_ASSERT_MSG(!IsConst, "Attempt to modify const BitField portal."); using WordType = WordTypePreferred; const auto coord = this->GetBitCoordinateFromIndex(bitIdx); const auto bitmask = WordType(1) << coord.BitOffset; - WordType oldWord; - WordType newWord; + WordType oldWord = this->GetWord(coord.WordIndex); do { - oldWord = this->GetWord(coord.WordIndex); - bool oldBitSet = (oldWord & bitmask) != WordType(0); - if (oldBitSet != expectedBit) + bool actualBit = (oldWord & bitmask) != WordType(0); + if (actualBit != *oldBit) { // The bit-of-interest does not match what we expected. - return oldBitSet; + *oldBit = actualBit; + return false; } - else if (oldBitSet == newBit) + else if (actualBit == newBit) { // The bit hasn't changed, but also already matches newVal. We're done. - return expectedBit; + return true; } - // Compute the new word - newWord = oldWord ^ bitmask; - } // CAS loop to resolve any conflicting changes to other bits in the word. - while (this->CompareAndSwapWordAtomic(coord.WordIndex, newWord, oldWord) != oldWord); + // Attempt to update the word with a compare-exchange in the loop condition. + // If the old word changed since last queried, oldWord will get updated and + // the loop will continue until it succeeds. + } while (!this->CompareExchangeWordAtomic(coord.WordIndex, &oldWord, oldWord ^ bitmask)); + return true; + } + + VTKM_DEPRECATED(1.6, "Use CompareExchangeBitAtomic. (Note the changed interface.)") + VTKM_EXEC_CONT bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const + { + this->CompareExchangeBitAtomic(bitIdx, &expectedBit, newBit); return expectedBit; } - /// Perform an atomic compare-and-swap operation on the word at @a wordIdx. - /// If the word in memory is equal to @a expectedWord, it is replaced with - /// the value of @a newWord and the original word is returned. This method - /// implements a full memory barrier around the atomic operation. + /// Perform an atomic compare-exchange operation on the word at @a wordIdx. + /// If the word in memory is equal to @a oldWord, it is replaced with + /// the value of @a newWord and true returned. If the word in memory is not + /// equal to @oldWord, @oldWord is set to the word in memory and false is + /// returned. This method implements a full memory barrier around the atomic + /// operation. template - VTKM_EXEC_CONT WordType CompareAndSwapWordAtomic(vtkm::Id wordIdx, - WordType newWord, - WordType expected) const + VTKM_EXEC_CONT bool CompareExchangeWordAtomic(vtkm::Id wordIdx, + WordType* oldWord, + WordType newWord) const { VTKM_STATIC_ASSERT_MSG(!IsConst, "Attempt to modify const BitField portal."); VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic::value, "Requested WordType does not support atomic" " operations on target execution platform."); WordType* addr = this->GetWordAddress(wordIdx); - return vtkm::AtomicCompareAndSwap(addr, newWord, expected); + return vtkm::AtomicCompareExchange(addr, oldWord, newWord); + } + + template + VTKM_DEPRECATED(1.6, "Use CompareExchangeWordAtomic. (Note the changed interface.)") + VTKM_EXEC_CONT WordType + CompareAndSwapWordAtomic(vtkm::Id wordIdx, WordType newWord, WordType expected) const + { + this->CompareExchangeWordAtomic(wordIdx, &expected, newWord); + return expected; } private: diff --git a/vtkm/cont/internal/AtomicInterfaceControl.h b/vtkm/cont/internal/AtomicInterfaceControl.h index 0b2bfdf00..42c2b4368 100644 --- a/vtkm/cont/internal/AtomicInterfaceControl.h +++ b/vtkm/cont/internal/AtomicInterfaceControl.h @@ -70,7 +70,8 @@ struct VTKM_DEPRECATED(1.6, "Use the functions in vtkm/Atomic.h.") AtomicInterfa template VTKM_EXEC_CONT static T CompareAndSwap(T* addr, T newWord, T expected) { - return vtkm::AtomicCompareAndSwap(addr, newWord, expected); + vtkm::AtomicCompareExchange(addr, &expected, newWord); + return expected; } }; } diff --git a/vtkm/cont/internal/AtomicInterfaceExecution.h b/vtkm/cont/internal/AtomicInterfaceExecution.h index 58c7403fe..c179522a2 100644 --- a/vtkm/cont/internal/AtomicInterfaceExecution.h +++ b/vtkm/cont/internal/AtomicInterfaceExecution.h @@ -71,7 +71,8 @@ struct VTKM_DEPRECATED(1.6, "Use the functions in vtkm/Atomic.h.") AtomicInterfa template VTKM_EXEC_CONT static T CompareAndSwap(T* addr, T newWord, T expected) { - return vtkm::AtomicCompareAndSwap(addr, newWord, expected); + vtkm::AtomicCompareExchange(addr, &expected, newWord); + return expected; } }; } diff --git a/vtkm/cont/internal/FunctorsGeneral.h b/vtkm/cont/internal/FunctorsGeneral.h index 7cf127375..17bb06d29 100644 --- a/vtkm/cont/internal/FunctorsGeneral.h +++ b/vtkm/cont/internal/FunctorsGeneral.h @@ -657,6 +657,23 @@ VTKM_EXEC static inline vtkm::Id IteratorDistanceImpl(const Iterator& from, return static_cast(to - from); } +#if defined(VTKM_HIP) + +template +__host__ static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to) +{ + return static_cast(std::distance(from, to)); +} + +template +__device__ static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to) +{ + return IteratorDistanceImpl( + from, to, typename std::iterator_traits::iterator_category{}); +} + +#else + template VTKM_EXEC static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to) { @@ -668,6 +685,8 @@ VTKM_EXEC static inline vtkm::Id IteratorDistance(const Iterator& from, const It #endif } +#endif + template struct LowerBoundsKernel { diff --git a/vtkm/cont/testing/TestingBitField.h b/vtkm/cont/testing/TestingBitField.h index 26b8c2cd4..495251eb4 100644 --- a/vtkm/cont/testing/TestingBitField.h +++ b/vtkm/cont/testing/TestingBitField.h @@ -203,11 +203,19 @@ struct TestingBitField DEVICE_ASSERT(testValues("XorBitAtomic")); const auto notBit = !bit; - bool casResult = portal.CompareAndSwapBitAtomic(i, bit, notBit); - DEVICE_ASSERT(casResult == bit); + // A compare-exchange that should fail + auto expectedBit = notBit; + bool cxResult = portal.CompareExchangeBitAtomic(i, &expectedBit, bit); + DEVICE_ASSERT(!cxResult); + DEVICE_ASSERT(expectedBit != notBit); + DEVICE_ASSERT(portal.GetBit(i) == expectedBit); DEVICE_ASSERT(portal.GetBit(i) == bit); - casResult = portal.CompareAndSwapBitAtomic(i, notBit, bit); - DEVICE_ASSERT(casResult == bit); + + // A compare-exchange that should succeed. + expectedBit = bit; + cxResult = portal.CompareExchangeBitAtomic(i, &expectedBit, notBit); + DEVICE_ASSERT(cxResult); + DEVICE_ASSERT(expectedBit == bit); DEVICE_ASSERT(portal.GetBit(i) == notBit); return true; @@ -258,12 +266,20 @@ struct TestingBitField portal.XorWordAtomic(i, mod); DEVICE_ASSERT(testValues("XorWordAtomic")); + // Compare-exchange that should fail const WordType notWord = static_cast(~word); - auto casResult = portal.CompareAndSwapWordAtomic(i, word, notWord); - DEVICE_ASSERT(casResult == word); + WordType expectedWord = notWord; + bool cxResult = portal.CompareExchangeWordAtomic(i, &expectedWord, word); + DEVICE_ASSERT(!cxResult); + DEVICE_ASSERT(expectedWord != notWord); + DEVICE_ASSERT(portal.template GetWord(i) == expectedWord); DEVICE_ASSERT(portal.template GetWord(i) == word); - casResult = portal.CompareAndSwapWordAtomic(i, notWord, word); - DEVICE_ASSERT(casResult == word); + + // Compare-exchange that should succeed + expectedWord = word; + cxResult = portal.CompareExchangeWordAtomic(i, &expectedWord, notWord); + DEVICE_ASSERT(cxResult); + DEVICE_ASSERT(expectedWord == word); DEVICE_ASSERT(portal.template GetWord(i) == notWord); return true; diff --git a/vtkm/cont/testing/TestingDeviceAdapter.h b/vtkm/cont/testing/TestingDeviceAdapter.h index 96f19919d..2ccb8d393 100644 --- a/vtkm/cont/testing/TestingDeviceAdapter.h +++ b/vtkm/cont/testing/TestingDeviceAdapter.h @@ -327,14 +327,9 @@ public: T value = (T)index; //Get the old value from the array T oldValue = this->AArray.Get(0); - //This creates an atomic add using the CAS operatoin - T assumed = T(0); - do - { - assumed = oldValue; - oldValue = this->AArray.CompareAndSwap(0, (assumed + value), assumed); - - } while (assumed != oldValue); + //Use atomic compare-exchange to atomically add value + while (!this->AArray.CompareExchange(0, &oldValue, oldValue + value)) + ; } VTKM_CONT void SetErrorMessageBuffer(const vtkm::exec::internal::ErrorMessageBuffer&) {} diff --git a/vtkm/exec/AtomicArrayExecutionObject.h b/vtkm/exec/AtomicArrayExecutionObject.h index 9a183a5b8..3ac66919a 100644 --- a/vtkm/exec/AtomicArrayExecutionObject.h +++ b/vtkm/exec/AtomicArrayExecutionObject.h @@ -106,7 +106,7 @@ public: // We'll cast the signed types to unsigned to work around this. using APIType = typename detail::MakeUnsigned::type; - return static_cast(vtkm::AtomicLoad(reinterpret_cast(this->Data + index))); + return static_cast(vtkm::AtomicLoad(reinterpret_cast(this->Data + index))); } /// \brief Peform an atomic addition with sequentially consistent memory @@ -158,47 +158,42 @@ public: vtkm::AtomicStore(reinterpret_cast(this->Data + index), static_cast(value)); } - /// \brief Perform an atomic CAS operation with sequentially consistent + /// \brief Perform an atomic compare and exchange operation with sequentially consistent /// memory ordering. /// \param index The index of the array element that will be atomically /// modified. + /// \param oldValue A pointer to the expected value of the indexed element. /// \param newValue The value to replace the indexed element with. - /// \param oldValue The expected value of the indexed element. - /// \return If the operation is successful, \a oldValue is returned. Otherwise - /// the current value of the indexed element is returned, and the element is - /// not modified. + /// \return If the operation is successful, \a true is returned. Otherwise, + /// \a oldValue is replaced with the current value of the indexed element, + /// the element is not modified, and \a false is returned. In either case, \a oldValue + /// becomes the value that was originally in the indexed element. /// /// This operation is typically used in a loop. For example usage, - /// an atomic multiplication may be implemented using CAS as follows: + /// an atomic multiplication may be implemented using compare-exchange as follows: /// /// ``` /// AtomicArrayExecutionObject arr = ...; /// - /// // CAS multiplication: - /// vtkm::Int32 cur = arr->Get(idx); // Load the current value at idx - /// vtkm::Int32 newVal; // will hold the result of the multiplication - /// vtkm::Int32 expect; // will hold the expected value before multiplication + /// // Compare-exchange multiplication: + /// vtkm::Int32 current = arr->Get(idx); // Load the current value at idx /// do { - /// expect = cur; // Used to ensure the value hasn't changed since reading - /// newVal = cur * multFactor; // the actual multiplication - /// } - /// while ((cur = arr->CompareAndSwap(idx, newVal, expect)) == expect); + /// vtkm::Int32 newVal = current * multFactor; // the actual multiplication + /// } while (!arr->CompareExchange(idx, ¤t, newVal)); /// ``` /// - /// The while condition here updates \a cur with the pre-CAS value of the - /// operation (the return from CompareAndSwap), and compares this to the - /// expected value. If the values match, the operation was successful and the + /// The while condition here updates \a newVal what the proper multiplication + /// is given the expected current value. It then compares this to the + /// value in the array. If the values match, the operation was successful and the /// loop exits. If the values do not match, the value at \a idx was changed - /// by another thread since the initial Get, and the CAS operation failed -- - /// the target element was not modified by the CAS call. If this happens, the - /// loop body re-executes using the new value of \a cur and tries again until + /// by another thread since the initial Get, and the compare-exchange operation failed -- + /// the target element was not modified by the compare-exchange call. If this happens, the + /// loop body re-executes using the new value of \a current and tries again until /// it succeeds. /// VTKM_SUPPRESS_EXEC_WARNINGS VTKM_EXEC - ValueType CompareAndSwap(vtkm::Id index, - const ValueType& newValue, - const ValueType& oldValue) const + bool CompareExchange(vtkm::Id index, ValueType* oldValue, const ValueType& newValue) const { // We only support 32/64 bit signed/unsigned ints, and vtkm::Atomic // currently only provides API for unsigned types. @@ -207,9 +202,18 @@ public: // is how overflow works, and signed overflow is already undefined. using APIType = typename detail::MakeUnsigned::type; - return static_cast(vtkm::AtomicCompareAndSwap(reinterpret_cast(this->Data + index), - static_cast(newValue), - static_cast(oldValue))); + return vtkm::AtomicCompareExchange(reinterpret_cast(this->Data + index), + reinterpret_cast(oldValue), + static_cast(newValue)); + } + + VTKM_DEPRECATED(1.6, "Use CompareExchange. (Note the changed interface.)") + VTKM_EXEC ValueType CompareAndSwap(vtkm::Id index, + const ValueType& newValue, + ValueType oldValue) const + { + this->CompareExchange(index, &oldValue, newValue); + return oldValue; } private: diff --git a/vtkm/filter/GhostCellRemove.hxx b/vtkm/filter/GhostCellRemove.hxx index c64e0bd1d..7cbb1fc82 100644 --- a/vtkm/filter/GhostCellRemove.hxx +++ b/vtkm/filter/GhostCellRemove.hxx @@ -106,21 +106,21 @@ public: template VTKM_EXEC void Max(Atomic& atom, const vtkm::Id& val, const vtkm::Id& index) const { - vtkm::Id old = -1; - do + vtkm::Id old = atom.Get(index); + while (old < val) { - old = atom.CompareAndSwap(index, val, old); - } while (old < val); + atom.CompareExchange(index, &old, val); + } } template VTKM_EXEC void Min(Atomic& atom, const vtkm::Id& val, const vtkm::Id& index) const { - vtkm::Id old = 1000000000; - do + vtkm::Id old = atom.Get(index); + while (old > val) { - old = atom.CompareAndSwap(index, val, old); - } while (old > val); + atom.CompareExchange(index, &old, val); + } } template diff --git a/vtkm/rendering/Wireframer.h b/vtkm/rendering/Wireframer.h index 09335d181..fbb1883aa 100644 --- a/vtkm/rendering/Wireframer.h +++ b/vtkm/rendering/Wireframer.h @@ -362,7 +362,7 @@ private: blendedColor[2] = color[2] * intensity + srcColor[2] * alpha; blendedColor[3] = alpha + intensity; next.Ints.Color = PackColor(blendedColor); - current.Raw = FrameBuffer.CompareAndSwap(index, next.Raw, current.Raw); + FrameBuffer.CompareExchange(index, ¤t.Raw, next.Raw); } while (current.Floats.Depth > next.Floats.Depth); } diff --git a/vtkm/testing/UnitTestAtomic.cxx b/vtkm/testing/UnitTestAtomic.cxx index 9ad04724f..e84c52b1a 100644 --- a/vtkm/testing/UnitTestAtomic.cxx +++ b/vtkm/testing/UnitTestAtomic.cxx @@ -277,7 +277,7 @@ struct AtomicTests } } - struct CompareAndSwapFunctor : vtkm::worklet::WorkletMapField + struct CompareExchangeFunctor : vtkm::worklet::WorkletMapField { using ControlSignature = void(FieldIn ignored, ExecObject); using ExecutionSignature = void(WorkIndex, _2); @@ -288,8 +288,8 @@ struct AtomicTests bool success = false; for (T overlapIndex = 0; overlapIndex < static_cast(OVERLAP); ++overlapIndex) { - T oldValue = vtkm::AtomicCompareAndSwap(data + arrayIndex, overlapIndex + 1, overlapIndex); - if (oldValue == overlapIndex) + T expectedValue = overlapIndex; + if (vtkm::AtomicCompareExchange(data + arrayIndex, &expectedValue, overlapIndex + 1)) { success = true; break; @@ -303,9 +303,9 @@ struct AtomicTests } }; - VTKM_CONT void TestCompareAndSwap() + VTKM_CONT void TestCompareExchange() { - std::cout << "AtomicCompareAndSwap" << std::endl; + std::cout << "AtomicCompareExchange" << std::endl; vtkm::cont::ArrayHandleBasic array; vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandleConstant(0, ARRAY_SIZE), array); array.Allocate(ARRAY_SIZE); @@ -331,7 +331,7 @@ struct AtomicTests TestOr(); TestXor(); TestNot(); - TestCompareAndSwap(); + TestCompareExchange(); } }; diff --git a/vtkm/worklet/OrientPointNormals.h b/vtkm/worklet/OrientPointNormals.h index d875d8765..5efef6039 100644 --- a/vtkm/worklet/OrientPointNormals.h +++ b/vtkm/worklet/OrientPointNormals.h @@ -151,9 +151,8 @@ public: for (vtkm::IdComponent c = 0; c < cells.GetNumberOfComponents(); ++c) { const vtkm::Id cellId = cells[c]; - const bool alreadyVisited = visitedCells.CompareAndSwapBitAtomic(cellId, true, false); - - if (!alreadyVisited) + bool checkNotVisited = false; + if (visitedCells.CompareExchangeBitAtomic(cellId, &checkNotVisited, true)) { // This thread is first to visit cell activeCells.SetBitAtomic(cellId, true); } @@ -216,8 +215,8 @@ public: const bool alreadyVisited = visitedPoints.GetBit(pointId); if (!alreadyVisited) { - const bool alreadyActive = activePoints.CompareAndSwapBitAtomic(pointId, true, false); - if (!alreadyActive) + bool checkNotActive = false; + if (activePoints.CompareExchangeBitAtomic(pointId, &checkNotActive, true)) { // If we're the first thread to mark point active, set ref point: refPoints.Set(pointId, refPtId); } diff --git a/vtkm/worklet/connectivities/UnionFind.h b/vtkm/worklet/connectivities/UnionFind.h index cb65cfb94..cff053e60 100644 --- a/vtkm/worklet/connectivities/UnionFind.h +++ b/vtkm/worklet/connectivities/UnionFind.h @@ -106,19 +106,16 @@ public: // We can use this return "new root" as is without calling findRoot() to // find the "new root". The while loop terminates when both u and v have // the same root (thus united). - auto root_u = UnionFind::findRoot(parents, u); - auto root_v = UnionFind::findRoot(parents, v); + vtkm::Id root_u = UnionFind::findRoot(parents, u); + vtkm::Id root_v = UnionFind::findRoot(parents, v); while (root_u != root_v) { // FIXME: we might be executing the loop one extra time than necessary. - // Nota Bene: VTKm's CompareAndSwap has a different order of parameters - // than common practice, it is (index, new, expected) rather than - // (index, expected, new). if (root_u < root_v) - root_v = parents.CompareAndSwap(root_v, root_u, root_v); + parents.CompareExchange(root_v, &root_v, root_u); else if (root_u > root_v) - root_u = parents.CompareAndSwap(root_u, root_v, root_u); + parents.CompareExchange(root_u, &root_u, root_v); } } diff --git a/vtkm/worklet/contourtree_augmented/processcontourtree/HypersweepWorklets.h b/vtkm/worklet/contourtree_augmented/processcontourtree/HypersweepWorklets.h index 59c670431..e8d9613a3 100644 --- a/vtkm/worklet/contourtree_augmented/processcontourtree/HypersweepWorklets.h +++ b/vtkm/worklet/contourtree_augmented/processcontourtree/HypersweepWorklets.h @@ -264,14 +264,9 @@ public: //Id writeValue = op(vertexValue, parentValue); auto cur = minMaxIndexPortal.Get(parent); // Load the current value at idx - vtkm::Id newVal; // will hold the result of the multiplication - vtkm::Id expect; // will hold the expected value before multiplication - - do - { - expect = cur; // Used to ensure the value hasn't changed since reading - newVal = this->Op(cur, vertexValue); // the actual multiplication - } while ((cur = minMaxIndexPortal.CompareAndSwap(parent, newVal, expect)) != expect); + // Use a compare-exchange loop to ensure the operation gets applied atomically + while (!minMaxIndexPortal.CompareExchange(parent, &cur, this->Op(cur, vertexValue))) + ; //minMaxIndexPortal.Set(parent, writeValue); }