Merge branch 'master' of https://gitlab.kitware.com/vtk/vtk-m into ghost_streamlines

This commit is contained in:
Dave Pugmire 2020-10-21 12:01:31 -04:00
commit ec37279b01
23 changed files with 294 additions and 196 deletions

@ -32,9 +32,10 @@
# - tbb
# - openmp
# - mpich2
# * .gitlab/ci/docker/ubuntu1804/cuda10.1/
# * .gitlab/ci/docker/ubuntu1804/cuda11.1/
# - cuda
# - gcc 7.4
# - gcc 7
# - gcc 8
# - tbb
# - openmp
# - mpich2
@ -49,52 +50,52 @@
GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci
.centos7: &centos7
image: "kitware/vtkm:ci-centos7_cuda10.2-20200820"
image: "kitware/vtkm:ci-centos7_cuda10.2-20201016"
extends:
- .docker_image
.centos8: &centos8
image: "kitware/vtkm:ci-centos8-20200820"
image: "kitware/vtkm:ci-centos8-20201016"
extends:
- .docker_image
.rhel8: &rhel8
image: "kitware/vtkm:ci-rhel8_cuda10.2-20200820"
image: "kitware/vtkm:ci-rhel8_cuda10.2-20201016"
extends:
- .docker_image
.ubuntu1604: &ubuntu1604
image: "kitware/vtkm:ci-ubuntu1604-20200820"
image: "kitware/vtkm:ci-ubuntu1604-20201016"
extends:
- .docker_image
.ubuntu1604_cuda: &ubuntu1604_cuda
image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200820"
image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20201016"
extends:
- .docker_image
.ubuntu1804: &ubuntu1804
image: "kitware/vtkm:ci-ubuntu1804-20200820"
image: "kitware/vtkm:ci-ubuntu1804-20201016"
extends:
- .docker_image
.ubuntu1804_cuda: &ubuntu1804_cuda
image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200820"
image: "kitware/vtkm:ci-ubuntu1804_cuda11.1-20201016"
extends:
- .docker_image
.ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20200820"
image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20201016"
extends:
- .docker_image
.ubuntu2004_doxygen: &ubuntu2004_doxygen
image: "kitware/vtkm:ci-doxygen-20200820"
image: "kitware/vtkm:ci-doxygen-20201016"
extends:
- .docker_image
.ubuntu2004_kokkos: &ubuntu2004_kokkos
image: "kitware/vtkm:ci-ubuntu2004_kokkos-20200820"
image: "kitware/vtkm:ci-ubuntu2004_kokkos-20201016"
extends:
- .docker_image

@ -1,10 +1,10 @@
FROM nvidia/cuda:10.1-devel-ubuntu18.04
FROM nvidia/cuda:11.1-devel-ubuntu18.04
LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
# Base dependencies for building VTK-m projects
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
g++ \
g++-8 \
clang-8 \
git \
git-lfs \

@ -30,8 +30,8 @@ cd ubuntu1804/base
sudo docker build -t kitware/vtkm:ci-ubuntu1804-$date .
cd ../..
cd ubuntu1804/cuda10.1
sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda10.1-$date .
cd ubuntu1804/cuda11.1
sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11.1-$date .
cd ../..
cd ubuntu1804/kokkos-cuda

@ -38,7 +38,7 @@ test:ubuntu1804_gcc9:
- build:ubuntu1804_gcc9
# Build on ubuntu1804 with CUDA + MPI and test on ubuntu1804
# Uses gcc 7.4
# Uses gcc 7
# Uses MPICH2
build:ubuntu1804_gcc7:
tags:
@ -55,6 +55,7 @@ build:ubuntu1804_gcc7:
variables:
CC: "gcc-7"
CXX: "g++-7"
CUDAHOSTCXX: "g++-7"
VTKM_SETTINGS: "cuda+turing+mpi+64bit_floats+no_virtual"
test:ubuntu1804_gcc7:

@ -76,8 +76,8 @@ VTK-m Requires:
Optional dependencies are:
+ CUDA Device Adapter
+ [Cuda Toolkit 9.2+](https://developer.nvidia.com/cuda-toolkit)
+ Note CUDA >= 10.1 is required on Windows
+ [Cuda Toolkit 9.2, >= 10.2](https://developer.nvidia.com/cuda-toolkit)
+ Note CUDA >= 10.2 is required on Windows
+ TBB Device Adapter
+ [TBB](https://www.threadingbuildingblocks.org/)
+ OpenMP Device Adapter
@ -105,12 +105,12 @@ VTK-m has been tested on the following configurations:c
+ On Linux
+ GCC 4.8.5, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0
+ CMake 3.12, 3.13, 3.16, 3.17
+ CUDA 9.2.148, 10.0.130, 10.1.105, 10.2.89
+ CUDA 9.2, 10.2, 11.0, 11.1
+ TBB 4.4 U2, 2017 U7
+ On Windows
+ Visual Studio 2015, 2017
+ CMake 3.12, 3.17
+ CUDA 10.1
+ CUDA 10.2
+ TBB 2017 U3, 2018 U2
+ On MacOS
+ AppleClang 9.1

@ -260,7 +260,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(
->ArgNames({ "Values", "Ops", "Stride" }),
vtkm::cont::AtomicArrayTypeList);
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent
// Benchmarks AtomicArray::CompareExchange such that each work index writes to adjacent
// indices.
struct CASSeqWorker : public vtkm::worklet::WorkletMapField
{
@ -273,12 +273,8 @@ struct CASSeqWorker : public vtkm::worklet::WorkletMapField
const vtkm::Id idx = i % portal.GetNumberOfValues();
const T val = static_cast<T>(i) + in;
T oldVal = portal.Get(idx);
T assumed = static_cast<T>(0);
do
{
assumed = oldVal;
oldVal = portal.CompareAndSwap(idx, assumed + val, assumed);
} while (assumed != oldVal);
while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
;
}
};
@ -371,7 +367,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCASSeqBaseline,
->ArgNames({ "Values", "Ops" }),
vtkm::cont::AtomicArrayTypeList);
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to
// Benchmarks AtomicArray::CompareExchange such that each work index writes to
// a strided index:
// ( floor(i / stride) + stride * (i % stride)
struct CASStrideWorker : public vtkm::worklet::WorkletMapField
@ -393,12 +389,8 @@ struct CASStrideWorker : public vtkm::worklet::WorkletMapField
const vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
const T val = static_cast<T>(i) + in;
T oldVal = portal.Get(idx);
T assumed = static_cast<T>(0);
do
{
assumed = oldVal;
oldVal = portal.CompareAndSwap(idx, assumed + val, assumed);
} while (assumed != oldVal);
while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
;
}
};

@ -90,7 +90,7 @@ VTKM_EXEC_CONT vtkm::Id BinarySearch(const PortalT& portal, const T& val)
template <typename IterT, typename T, typename Comp>
VTKM_EXEC_CONT IterT LowerBound(IterT first, IterT last, const T& val, Comp comp)
{
#ifdef VTKM_CUDA
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
auto len = last - first;
while (len != 0)
{
@ -107,9 +107,9 @@ VTKM_EXEC_CONT IterT LowerBound(IterT first, IterT last, const T& val, Comp comp
}
}
return first;
#else // VTKM_CUDA
#else // VTKM_CUDA || VTKM_HIP
return std::lower_bound(first, last, val, std::move(comp));
#endif // VTKM_CUDA
#endif // VTKM_CUDA || VTKM_HIP
}
template <typename IterT, typename T>
@ -139,7 +139,7 @@ VTKM_EXEC_CONT vtkm::Id LowerBound(const PortalT& portal, const T& val)
template <typename IterT, typename T, typename Comp>
VTKM_EXEC_CONT IterT UpperBound(IterT first, IterT last, const T& val, Comp comp)
{
#ifdef VTKM_CUDA
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
auto len = last - first;
while (len != 0)
{
@ -156,9 +156,9 @@ VTKM_EXEC_CONT IterT UpperBound(IterT first, IterT last, const T& val, Comp comp
}
}
return first;
#else // VTKM_CUDA
#else // VTKM_CUDA || VTKM_HIP
return std::upper_bound(first, last, val, std::move(comp));
#endif // VTKM_CUDA
#endif // VTKM_CUDA || VTKM_HIP
}
template <typename IterT, typename T>

@ -142,9 +142,9 @@ VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
{
const volatile T* vaddr = addr; /* volatile to bypass cache*/
volatile T* const vaddr = addr; /* volatile to bypass cache*/
if (order == vtkm::MemoryOrder::SequentiallyConsistent)
{
__threadfence();
@ -207,15 +207,23 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
T desired,
T expected,
vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
T* expected,
T desired,
vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
auto result = atomicCAS(addr, expected, desired);
auto result = atomicCAS(addr, *expected, desired);
AtomicLoadFence(order);
return result;
if (result == *expected)
{
return true;
}
else
{
*expected = result;
return false;
}
}
}
} // namespace vtkm::detail
@ -239,9 +247,14 @@ VTKM_THIRDPARTY_PRE_INCLUDE
#if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA)
#undef KOKKOS_ENABLE_CUDA
#endif
#if defined(KOKKOS_ENABLE_HIP) && !defined(VTKM_HIP)
#undef KOKKOS_ENABLE_HIP
#endif
#endif //KOKKOS_MACROS_HPP not loaded
#include <Kokkos_Core.hpp>
#include <Kokkos_Atomic.hpp>
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm
@ -270,7 +283,7 @@ VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
{
switch (order)
{
@ -350,15 +363,23 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
T desired,
T expected,
vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
T* expected,
T desired,
vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
T result = Kokkos::atomic_compare_exchange(addr, expected, desired);
T oldValue = Kokkos::atomic_compare_exchange(addr, *expected, desired);
AtomicLoadFence(order);
return result;
if (oldValue == *expected)
{
return true;
}
else
{
*expected = oldValue;
return false;
}
}
}
} // namespace vtkm::detail
@ -407,35 +428,35 @@ VTKM_EXEC_CONT inline T BitCast(T&& src)
//
// https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(const vtkm::UInt8* addr, vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(vtkm::UInt8* const addr, vtkm::MemoryOrder order)
{
// This assumes that the memory interface is smart enough to load a 32-bit
// word atomically and a properly aligned 8-bit word from it.
// We could build address masks and do shifts to perform this manually if
// this assumption is incorrect.
auto result = *static_cast<volatile const vtkm::UInt8*>(addr);
auto result = *static_cast<volatile vtkm::UInt8* const>(addr);
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
return result;
}
VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(const vtkm::UInt16* addr, vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(vtkm::UInt16* const addr, vtkm::MemoryOrder order)
{
// This assumes that the memory interface is smart enough to load a 32-bit
// word atomically and a properly aligned 16-bit word from it.
// We could build address masks and do shifts to perform this manually if
// this assumption is incorrect.
auto result = *static_cast<volatile const vtkm::UInt16*>(addr);
auto result = *static_cast<volatile vtkm::UInt16* const>(addr);
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
return result;
}
VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(const vtkm::UInt32* addr, vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(vtkm::UInt32* const addr, vtkm::MemoryOrder order)
{
auto result = *static_cast<volatile const vtkm::UInt32*>(addr);
auto result = *static_cast<volatile vtkm::UInt32* const>(addr);
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
return result;
}
VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(const vtkm::UInt64* addr, vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(vtkm::UInt64* const addr, vtkm::MemoryOrder order)
{
auto result = *static_cast<volatile const vtkm::UInt64*>(addr);
auto result = *static_cast<volatile vtkm::UInt64* const>(addr);
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
return result;
}
@ -487,13 +508,22 @@ VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt64* addr,
{ \
return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order); \
} \
VTKM_EXEC_CONT inline vtkmType AtomicCompareAndSwapImpl( \
vtkmType* addr, vtkmType desired, vtkmType expected, vtkm::MemoryOrder order) \
VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl( \
vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder order) \
{ \
return BitCast<vtkmType>( \
vtkmType result = BitCast<vtkmType>( \
_InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
BitCast<winType>(desired), \
BitCast<winType>(expected))); \
BitCast<winType>(*expected))); \
if (result == *expected) \
{ \
return true; \
} \
else \
{ \
*expected = result; \
return false; \
} \
}
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
@ -538,7 +568,7 @@ VTKM_EXEC_CONT inline int GccAtomicMemOrder(vtkm::MemoryOrder order)
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline T AtomicLoadImpl(T* const addr, vtkm::MemoryOrder order)
{
return __atomic_load_n(addr, GccAtomicMemOrder(order));
}
@ -580,14 +610,13 @@ VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
T desired,
T expected,
vtkm::MemoryOrder order)
VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
T* expected,
T desired,
vtkm::MemoryOrder order)
{
__atomic_compare_exchange_n(
addr, &expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
return expected;
return __atomic_compare_exchange_n(
addr, expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
}
}
} // namespace vtkm::detail
@ -627,7 +656,7 @@ using AtomicTypesSupported = vtkm::List<vtkm::UInt32, vtkm::UInt64>;
/// or after that write.
///
template <typename T>
VTKM_EXEC_CONT inline T AtomicLoad(const T* pointer,
VTKM_EXEC_CONT inline T AtomicLoad(T* const pointer,
vtkm::MemoryOrder order = vtkm::MemoryOrder::Acquire)
{
return detail::AtomicLoadImpl(pointer, order);
@ -796,24 +825,34 @@ VTKM_EXEC_CONT inline T AtomicNot(
/// \brief Atomic function that replaces a value given a condition.
///
/// Given a pointer, a new desired value, and an expected value, replaces the value at the
/// pointer if it is the same as the expected value with the new desired value. If the original
/// value in the pointer does not equal the expected value, then the memory at the pointer
/// remains unchanged. In either case, the function returns the _old_ original value that
/// was at the pointer.
/// Given a pointer to a `shared` value, a pointer holding the `expected` value at that shared
/// location, and a new `desired` value, `AtomicCompareExchange` compares the existing `shared`
/// value to the `expected` value, and then conditionally replaces the `shared` value with
/// the provided `desired` value. Otherwise, the `expected` value gets replaced with the
/// `shared` value. Note that in either case, the function returns with `expected` replaced
/// with the value _originally_ in `shared` at the start of the call.
///
/// If multiple threads call `AtomicCompareAndSwap` simultaneously, the result will be consistent
/// as if one was called before the other (although it is indeterminate which will be applied
/// first).
/// If the `shared` value and `expected` value are the same, then `shared` gets set to
/// `desired`, and `AtomicCompareAndExchange` returns `true`.
///
/// If the `shared` value and `expected` value are different, then `expected` gets set
/// to `shared`, and `AtomicCompareAndExchange` returns `false`. The value at `shared`
/// is _not_ changed in this case.
///
/// If multiple threads call `AtomicCompareExchange` simultaneously with the same `shared`
/// pointer, the result will be consistent as if one was called before the other (although
/// it is indeterminate which will be applied first). Note that the `expected` pointer should
/// _not_ be shared among threads. The `expected` pointer should be thread-local (often
/// pointing to an object on the stack).
///
template <typename T>
VTKM_EXEC_CONT inline T AtomicCompareAndSwap(
T* pointer,
VTKM_EXEC_CONT inline T AtomicCompareExchange(
T* shared,
T* expected,
T desired,
T expected,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicCompareAndSwapImpl(pointer, desired, expected, order);
return detail::AtomicCompareExchangeImpl(shared, expected, desired, order);
}
} // namespace vtkm

@ -2381,7 +2381,8 @@ static inline VTKM_EXEC_CONT vtkm::Float32 RemainderQuotient(vtkm::Float32 numer
QType& quotient)
{
int iQuotient;
#ifdef VTKM_CUDA
// See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
const vtkm::Float32 result =
VTKM_CUDA_MATH_FUNCTION_32(remquo)(numerator, denominator, &iQuotient);
#else
@ -2411,11 +2412,20 @@ static inline VTKM_EXEC_CONT vtkm::Float64 RemainderQuotient(vtkm::Float64 numer
///
static inline VTKM_EXEC_CONT vtkm::Float32 ModF(vtkm::Float32 x, vtkm::Float32& integral)
{
// See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
return VTKM_CUDA_MATH_FUNCTION_32(modf)(x, &integral);
#else
return std::modf(x, &integral);
#endif
}
static inline VTKM_EXEC_CONT vtkm::Float64 ModF(vtkm::Float64 x, vtkm::Float64& integral)
{
#if defined(VTKM_CUDA)
return VTKM_CUDA_MATH_FUNCTION_64(modf)(x, &integral);
#else
return std::modf(x, &integral);
#endif
}
//-----------------------------------------------------------------------------

@ -1011,7 +1011,8 @@ static inline VTKM_EXEC_CONT vtkm::Float32 RemainderQuotient(vtkm::Float32 numer
QType& quotient)
{
int iQuotient;
#ifdef VTKM_CUDA
// See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
const vtkm::Float32 result =
VTKM_CUDA_MATH_FUNCTION_32(remquo)(numerator, denominator, &iQuotient);
#else
@ -1041,11 +1042,20 @@ static inline VTKM_EXEC_CONT vtkm::Float64 RemainderQuotient(vtkm::Float64 numer
///
static inline VTKM_EXEC_CONT vtkm::Float32 ModF(vtkm::Float32 x, vtkm::Float32& integral)
{
// See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
#if defined(VTKM_CUDA) || defined(VTKM_HIP)
return VTKM_CUDA_MATH_FUNCTION_32(modf)(x, &integral);
#else
return std::modf(x, &integral);
#endif
}
static inline VTKM_EXEC_CONT vtkm::Float64 ModF(vtkm::Float64 x, vtkm::Float64& integral)
{
#if defined(VTKM_CUDA)
return VTKM_CUDA_MATH_FUNCTION_64(modf)(x, &integral);
#else
return std::modf(x, &integral);
#endif
}
//-----------------------------------------------------------------------------

@ -430,56 +430,74 @@ public:
}
/// Perform an atomic compare-and-swap operation on the bit at @a bitIdx.
/// If the value in memory is equal to @a expectedBit, it is replaced with
/// the value of @a newBit and the original value of the bit is returned as a
/// boolean. This method implements a full memory barrier around the atomic
/// If the value in memory is equal to @a oldBit, it is replaced with
/// the value of @a newBit and true is returned. If the value in memory is
/// not equal to @oldBit, @oldBit is changed to that value and false is
/// returned. This method implements a full memory barrier around the atomic
/// operation.
VTKM_EXEC_CONT
bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const
bool CompareExchangeBitAtomic(vtkm::Id bitIdx, bool* oldBit, bool newBit) const
{
VTKM_STATIC_ASSERT_MSG(!IsConst, "Attempt to modify const BitField portal.");
using WordType = WordTypePreferred;
const auto coord = this->GetBitCoordinateFromIndex<WordType>(bitIdx);
const auto bitmask = WordType(1) << coord.BitOffset;
WordType oldWord;
WordType newWord;
WordType oldWord = this->GetWord<WordType>(coord.WordIndex);
do
{
oldWord = this->GetWord<WordType>(coord.WordIndex);
bool oldBitSet = (oldWord & bitmask) != WordType(0);
if (oldBitSet != expectedBit)
bool actualBit = (oldWord & bitmask) != WordType(0);
if (actualBit != *oldBit)
{ // The bit-of-interest does not match what we expected.
return oldBitSet;
*oldBit = actualBit;
return false;
}
else if (oldBitSet == newBit)
else if (actualBit == newBit)
{ // The bit hasn't changed, but also already matches newVal. We're done.
return expectedBit;
return true;
}
// Compute the new word
newWord = oldWord ^ bitmask;
} // CAS loop to resolve any conflicting changes to other bits in the word.
while (this->CompareAndSwapWordAtomic(coord.WordIndex, newWord, oldWord) != oldWord);
// Attempt to update the word with a compare-exchange in the loop condition.
// If the old word changed since last queried, oldWord will get updated and
// the loop will continue until it succeeds.
} while (!this->CompareExchangeWordAtomic(coord.WordIndex, &oldWord, oldWord ^ bitmask));
return true;
}
VTKM_DEPRECATED(1.6, "Use CompareExchangeBitAtomic. (Note the changed interface.)")
VTKM_EXEC_CONT bool CompareAndSwapBitAtomic(vtkm::Id bitIdx, bool newBit, bool expectedBit) const
{
this->CompareExchangeBitAtomic(bitIdx, &expectedBit, newBit);
return expectedBit;
}
/// Perform an atomic compare-and-swap operation on the word at @a wordIdx.
/// If the word in memory is equal to @a expectedWord, it is replaced with
/// the value of @a newWord and the original word is returned. This method
/// implements a full memory barrier around the atomic operation.
/// Perform an atomic compare-exchange operation on the word at @a wordIdx.
/// If the word in memory is equal to @a oldWord, it is replaced with
/// the value of @a newWord and true returned. If the word in memory is not
/// equal to @oldWord, @oldWord is set to the word in memory and false is
/// returned. This method implements a full memory barrier around the atomic
/// operation.
template <typename WordType = WordTypePreferred>
VTKM_EXEC_CONT WordType CompareAndSwapWordAtomic(vtkm::Id wordIdx,
WordType newWord,
WordType expected) const
VTKM_EXEC_CONT bool CompareExchangeWordAtomic(vtkm::Id wordIdx,
WordType* oldWord,
WordType newWord) const
{
VTKM_STATIC_ASSERT_MSG(!IsConst, "Attempt to modify const BitField portal.");
VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
"Requested WordType does not support atomic"
" operations on target execution platform.");
WordType* addr = this->GetWordAddress<WordType>(wordIdx);
return vtkm::AtomicCompareAndSwap(addr, newWord, expected);
return vtkm::AtomicCompareExchange(addr, oldWord, newWord);
}
template <typename WordType = WordTypePreferred>
VTKM_DEPRECATED(1.6, "Use CompareExchangeWordAtomic. (Note the changed interface.)")
VTKM_EXEC_CONT WordType
CompareAndSwapWordAtomic(vtkm::Id wordIdx, WordType newWord, WordType expected) const
{
this->CompareExchangeWordAtomic(wordIdx, &expected, newWord);
return expected;
}
private:

@ -70,7 +70,8 @@ struct VTKM_DEPRECATED(1.6, "Use the functions in vtkm/Atomic.h.") AtomicInterfa
template <typename T>
VTKM_EXEC_CONT static T CompareAndSwap(T* addr, T newWord, T expected)
{
return vtkm::AtomicCompareAndSwap(addr, newWord, expected);
vtkm::AtomicCompareExchange(addr, &expected, newWord);
return expected;
}
};
}

@ -71,7 +71,8 @@ struct VTKM_DEPRECATED(1.6, "Use the functions in vtkm/Atomic.h.") AtomicInterfa
template <typename T>
VTKM_EXEC_CONT static T CompareAndSwap(T* addr, T newWord, T expected)
{
return vtkm::AtomicCompareAndSwap(addr, newWord, expected);
vtkm::AtomicCompareExchange(addr, &expected, newWord);
return expected;
}
};
}

@ -657,6 +657,23 @@ VTKM_EXEC static inline vtkm::Id IteratorDistanceImpl(const Iterator& from,
return static_cast<vtkm::Id>(to - from);
}
#if defined(VTKM_HIP)
template <typename Iterator>
__host__ static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to)
{
return static_cast<vtkm::Id>(std::distance(from, to));
}
template <typename Iterator>
__device__ static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to)
{
return IteratorDistanceImpl(
from, to, typename std::iterator_traits<Iterator>::iterator_category{});
}
#else
template <typename Iterator>
VTKM_EXEC static inline vtkm::Id IteratorDistance(const Iterator& from, const Iterator& to)
{
@ -668,6 +685,8 @@ VTKM_EXEC static inline vtkm::Id IteratorDistance(const Iterator& from, const It
#endif
}
#endif
template <class InputPortalType, class ValuesPortalType, class OutputPortalType>
struct LowerBoundsKernel
{

@ -203,11 +203,19 @@ struct TestingBitField
DEVICE_ASSERT(testValues("XorBitAtomic"));
const auto notBit = !bit;
bool casResult = portal.CompareAndSwapBitAtomic(i, bit, notBit);
DEVICE_ASSERT(casResult == bit);
// A compare-exchange that should fail
auto expectedBit = notBit;
bool cxResult = portal.CompareExchangeBitAtomic(i, &expectedBit, bit);
DEVICE_ASSERT(!cxResult);
DEVICE_ASSERT(expectedBit != notBit);
DEVICE_ASSERT(portal.GetBit(i) == expectedBit);
DEVICE_ASSERT(portal.GetBit(i) == bit);
casResult = portal.CompareAndSwapBitAtomic(i, notBit, bit);
DEVICE_ASSERT(casResult == bit);
// A compare-exchange that should succeed.
expectedBit = bit;
cxResult = portal.CompareExchangeBitAtomic(i, &expectedBit, notBit);
DEVICE_ASSERT(cxResult);
DEVICE_ASSERT(expectedBit == bit);
DEVICE_ASSERT(portal.GetBit(i) == notBit);
return true;
@ -258,12 +266,20 @@ struct TestingBitField
portal.XorWordAtomic(i, mod);
DEVICE_ASSERT(testValues("XorWordAtomic"));
// Compare-exchange that should fail
const WordType notWord = static_cast<WordType>(~word);
auto casResult = portal.CompareAndSwapWordAtomic(i, word, notWord);
DEVICE_ASSERT(casResult == word);
WordType expectedWord = notWord;
bool cxResult = portal.CompareExchangeWordAtomic(i, &expectedWord, word);
DEVICE_ASSERT(!cxResult);
DEVICE_ASSERT(expectedWord != notWord);
DEVICE_ASSERT(portal.template GetWord<WordType>(i) == expectedWord);
DEVICE_ASSERT(portal.template GetWord<WordType>(i) == word);
casResult = portal.CompareAndSwapWordAtomic(i, notWord, word);
DEVICE_ASSERT(casResult == word);
// Compare-exchange that should succeed
expectedWord = word;
cxResult = portal.CompareExchangeWordAtomic(i, &expectedWord, notWord);
DEVICE_ASSERT(cxResult);
DEVICE_ASSERT(expectedWord == word);
DEVICE_ASSERT(portal.template GetWord<WordType>(i) == notWord);
return true;

@ -327,14 +327,9 @@ public:
T value = (T)index;
//Get the old value from the array
T oldValue = this->AArray.Get(0);
//This creates an atomic add using the CAS operatoin
T assumed = T(0);
do
{
assumed = oldValue;
oldValue = this->AArray.CompareAndSwap(0, (assumed + value), assumed);
} while (assumed != oldValue);
//Use atomic compare-exchange to atomically add value
while (!this->AArray.CompareExchange(0, &oldValue, oldValue + value))
;
}
VTKM_CONT void SetErrorMessageBuffer(const vtkm::exec::internal::ErrorMessageBuffer&) {}

@ -106,7 +106,7 @@ public:
// We'll cast the signed types to unsigned to work around this.
using APIType = typename detail::MakeUnsigned<ValueType>::type;
return static_cast<T>(vtkm::AtomicLoad(reinterpret_cast<const APIType*>(this->Data + index)));
return static_cast<T>(vtkm::AtomicLoad(reinterpret_cast<APIType*>(this->Data + index)));
}
/// \brief Peform an atomic addition with sequentially consistent memory
@ -158,47 +158,42 @@ public:
vtkm::AtomicStore(reinterpret_cast<APIType*>(this->Data + index), static_cast<APIType>(value));
}
/// \brief Perform an atomic CAS operation with sequentially consistent
/// \brief Perform an atomic compare and exchange operation with sequentially consistent
/// memory ordering.
/// \param index The index of the array element that will be atomically
/// modified.
/// \param oldValue A pointer to the expected value of the indexed element.
/// \param newValue The value to replace the indexed element with.
/// \param oldValue The expected value of the indexed element.
/// \return If the operation is successful, \a oldValue is returned. Otherwise
/// the current value of the indexed element is returned, and the element is
/// not modified.
/// \return If the operation is successful, \a true is returned. Otherwise,
/// \a oldValue is replaced with the current value of the indexed element,
/// the element is not modified, and \a false is returned. In either case, \a oldValue
/// becomes the value that was originally in the indexed element.
///
/// This operation is typically used in a loop. For example usage,
/// an atomic multiplication may be implemented using CAS as follows:
/// an atomic multiplication may be implemented using compare-exchange as follows:
///
/// ```
/// AtomicArrayExecutionObject<vtkm::Int32, ...> arr = ...;
///
/// // CAS multiplication:
/// vtkm::Int32 cur = arr->Get(idx); // Load the current value at idx
/// vtkm::Int32 newVal; // will hold the result of the multiplication
/// vtkm::Int32 expect; // will hold the expected value before multiplication
/// // Compare-exchange multiplication:
/// vtkm::Int32 current = arr->Get(idx); // Load the current value at idx
/// do {
/// expect = cur; // Used to ensure the value hasn't changed since reading
/// newVal = cur * multFactor; // the actual multiplication
/// }
/// while ((cur = arr->CompareAndSwap(idx, newVal, expect)) == expect);
/// vtkm::Int32 newVal = current * multFactor; // the actual multiplication
/// } while (!arr->CompareExchange(idx, &current, newVal));
/// ```
///
/// The while condition here updates \a cur with the pre-CAS value of the
/// operation (the return from CompareAndSwap), and compares this to the
/// expected value. If the values match, the operation was successful and the
/// The while condition here updates \a newVal what the proper multiplication
/// is given the expected current value. It then compares this to the
/// value in the array. If the values match, the operation was successful and the
/// loop exits. If the values do not match, the value at \a idx was changed
/// by another thread since the initial Get, and the CAS operation failed --
/// the target element was not modified by the CAS call. If this happens, the
/// loop body re-executes using the new value of \a cur and tries again until
/// by another thread since the initial Get, and the compare-exchange operation failed --
/// the target element was not modified by the compare-exchange call. If this happens, the
/// loop body re-executes using the new value of \a current and tries again until
/// it succeeds.
///
VTKM_SUPPRESS_EXEC_WARNINGS
VTKM_EXEC
ValueType CompareAndSwap(vtkm::Id index,
const ValueType& newValue,
const ValueType& oldValue) const
bool CompareExchange(vtkm::Id index, ValueType* oldValue, const ValueType& newValue) const
{
// We only support 32/64 bit signed/unsigned ints, and vtkm::Atomic
// currently only provides API for unsigned types.
@ -207,9 +202,18 @@ public:
// is how overflow works, and signed overflow is already undefined.
using APIType = typename detail::MakeUnsigned<ValueType>::type;
return static_cast<T>(vtkm::AtomicCompareAndSwap(reinterpret_cast<APIType*>(this->Data + index),
static_cast<APIType>(newValue),
static_cast<APIType>(oldValue)));
return vtkm::AtomicCompareExchange(reinterpret_cast<APIType*>(this->Data + index),
reinterpret_cast<APIType*>(oldValue),
static_cast<APIType>(newValue));
}
VTKM_DEPRECATED(1.6, "Use CompareExchange. (Note the changed interface.)")
VTKM_EXEC ValueType CompareAndSwap(vtkm::Id index,
const ValueType& newValue,
ValueType oldValue) const
{
this->CompareExchange(index, &oldValue, newValue);
return oldValue;
}
private:

@ -106,21 +106,21 @@ public:
template <typename Atomic>
VTKM_EXEC void Max(Atomic& atom, const vtkm::Id& val, const vtkm::Id& index) const
{
vtkm::Id old = -1;
do
vtkm::Id old = atom.Get(index);
while (old < val)
{
old = atom.CompareAndSwap(index, val, old);
} while (old < val);
atom.CompareExchange(index, &old, val);
}
}
template <typename Atomic>
VTKM_EXEC void Min(Atomic& atom, const vtkm::Id& val, const vtkm::Id& index) const
{
vtkm::Id old = 1000000000;
do
vtkm::Id old = atom.Get(index);
while (old > val)
{
old = atom.CompareAndSwap(index, val, old);
} while (old > val);
atom.CompareExchange(index, &old, val);
}
}
template <typename T, typename AtomicType>

@ -362,7 +362,7 @@ private:
blendedColor[2] = color[2] * intensity + srcColor[2] * alpha;
blendedColor[3] = alpha + intensity;
next.Ints.Color = PackColor(blendedColor);
current.Raw = FrameBuffer.CompareAndSwap(index, next.Raw, current.Raw);
FrameBuffer.CompareExchange(index, &current.Raw, next.Raw);
} while (current.Floats.Depth > next.Floats.Depth);
}

@ -277,7 +277,7 @@ struct AtomicTests
}
}
struct CompareAndSwapFunctor : vtkm::worklet::WorkletMapField
struct CompareExchangeFunctor : vtkm::worklet::WorkletMapField
{
using ControlSignature = void(FieldIn ignored, ExecObject);
using ExecutionSignature = void(WorkIndex, _2);
@ -288,8 +288,8 @@ struct AtomicTests
bool success = false;
for (T overlapIndex = 0; overlapIndex < static_cast<T>(OVERLAP); ++overlapIndex)
{
T oldValue = vtkm::AtomicCompareAndSwap(data + arrayIndex, overlapIndex + 1, overlapIndex);
if (oldValue == overlapIndex)
T expectedValue = overlapIndex;
if (vtkm::AtomicCompareExchange(data + arrayIndex, &expectedValue, overlapIndex + 1))
{
success = true;
break;
@ -303,9 +303,9 @@ struct AtomicTests
}
};
VTKM_CONT void TestCompareAndSwap()
VTKM_CONT void TestCompareExchange()
{
std::cout << "AtomicCompareAndSwap" << std::endl;
std::cout << "AtomicCompareExchange" << std::endl;
vtkm::cont::ArrayHandleBasic<T> array;
vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandleConstant<T>(0, ARRAY_SIZE), array);
array.Allocate(ARRAY_SIZE);
@ -331,7 +331,7 @@ struct AtomicTests
TestOr();
TestXor();
TestNot();
TestCompareAndSwap();
TestCompareExchange();
}
};

@ -151,9 +151,8 @@ public:
for (vtkm::IdComponent c = 0; c < cells.GetNumberOfComponents(); ++c)
{
const vtkm::Id cellId = cells[c];
const bool alreadyVisited = visitedCells.CompareAndSwapBitAtomic(cellId, true, false);
if (!alreadyVisited)
bool checkNotVisited = false;
if (visitedCells.CompareExchangeBitAtomic(cellId, &checkNotVisited, true))
{ // This thread is first to visit cell
activeCells.SetBitAtomic(cellId, true);
}
@ -216,8 +215,8 @@ public:
const bool alreadyVisited = visitedPoints.GetBit(pointId);
if (!alreadyVisited)
{
const bool alreadyActive = activePoints.CompareAndSwapBitAtomic(pointId, true, false);
if (!alreadyActive)
bool checkNotActive = false;
if (activePoints.CompareExchangeBitAtomic(pointId, &checkNotActive, true))
{ // If we're the first thread to mark point active, set ref point:
refPoints.Set(pointId, refPtId);
}

@ -106,19 +106,16 @@ public:
// We can use this return "new root" as is without calling findRoot() to
// find the "new root". The while loop terminates when both u and v have
// the same root (thus united).
auto root_u = UnionFind::findRoot(parents, u);
auto root_v = UnionFind::findRoot(parents, v);
vtkm::Id root_u = UnionFind::findRoot(parents, u);
vtkm::Id root_v = UnionFind::findRoot(parents, v);
while (root_u != root_v)
{
// FIXME: we might be executing the loop one extra time than necessary.
// Nota Bene: VTKm's CompareAndSwap has a different order of parameters
// than common practice, it is (index, new, expected) rather than
// (index, expected, new).
if (root_u < root_v)
root_v = parents.CompareAndSwap(root_v, root_u, root_v);
parents.CompareExchange(root_v, &root_v, root_u);
else if (root_u > root_v)
root_u = parents.CompareAndSwap(root_u, root_v, root_u);
parents.CompareExchange(root_u, &root_u, root_v);
}
}

@ -264,14 +264,9 @@ public:
//Id writeValue = op(vertexValue, parentValue);
auto cur = minMaxIndexPortal.Get(parent); // Load the current value at idx
vtkm::Id newVal; // will hold the result of the multiplication
vtkm::Id expect; // will hold the expected value before multiplication
do
{
expect = cur; // Used to ensure the value hasn't changed since reading
newVal = this->Op(cur, vertexValue); // the actual multiplication
} while ((cur = minMaxIndexPortal.CompareAndSwap(parent, newVal, expect)) != expect);
// Use a compare-exchange loop to ensure the operation gets applied atomically
while (!minMaxIndexPortal.CompareExchange(parent, &cur, this->Op(cur, vertexValue)))
;
//minMaxIndexPortal.Set(parent, writeValue);
}