Simplify and extend AtomicArray implementation.

- Use AtomicInterface to implement device-specific atomic operations.
- Remove DeviceAdapterAtomicArrayImplementations.
- Extend supported atomic types to include unsigned 32/64-bit ints.
- Add a static_assert to check that AtomicArray type is supported.
- Add documentation for AtomicArrayExecutionObject, including a CAS
  example.
- Add a `T Get(idx)` method to AtomicArrayExecutionObject that does
  an atomic load, and update existing CAS usage to use this instead
  of `Add(idx, 0)`.
This commit is contained in:
Allison Vacanti 2019-08-22 15:52:58 -04:00
parent 9560c4f633
commit 884616788a
11 changed files with 131 additions and 315 deletions

@ -317,7 +317,7 @@ public:
vtkm::Id idx = i % this->ArraySize;
ValueType val = static_cast<ValueType>(i);
// Get the old val with a no-op
ValueType oldVal = this->Portal.Add(idx, static_cast<ValueType>(0));
ValueType oldVal = this->Portal.Get(idx);
ValueType assumed = static_cast<ValueType>(0);
do
{
@ -445,7 +445,7 @@ public:
vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % this->ArraySize;
ValueType val = static_cast<ValueType>(i);
// Get the old val with a no-op
ValueType oldVal = this->Portal.Add(idx, static_cast<ValueType>(0));
ValueType oldVal = this->Portal.Get(idx);
ValueType assumed = static_cast<ValueType>(0);
do
{

@ -11,6 +11,7 @@
#define vtk_m_cont_AtomicArray_h
#include <vtkm/ListTag.h>
#include <vtkm/StaticAssert.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/ExecutionObjectBase.h>
@ -23,7 +24,8 @@ namespace cont
/// \brief A type list containing types that can be used with an AtomicArray.
///
struct AtomicArrayTypeListTag : vtkm::ListTagBase<vtkm::Int32, vtkm::Int64>
struct AtomicArrayTypeListTag
: vtkm::ListTagBase<vtkm::UInt32, vtkm::Int32, vtkm::UInt64, vtkm::Int64>
{
};
@ -37,14 +39,19 @@ struct AtomicArrayTypeListTag : vtkm::ListTagBase<vtkm::Int32, vtkm::Int64>
/// vtkm::cont::ArrayHandle that is used as the underlying storage for the
/// AtomicArray
///
/// Supported Operations: add / compare and swap (CAS)
/// Supported Operations: get / add / compare and swap (CAS). See
/// AtomicArrayExecutionObject for details.
///
/// Supported Types: 32 / 64 bit signed integers
/// Supported Types: 32 / 64 bit signed/unsigned integers.
///
///
template <typename T>
class AtomicArray : public vtkm::cont::ExecutionObjectBase
{
static constexpr bool ValueTypeIsValid = vtkm::ListContains<AtomicArrayTypeListTag, T>::value;
VTKM_STATIC_ASSERT_MSG(ValueTypeIsValid, "AtomicArray used with unsupported ValueType.");
public:
using ValueType = T;

@ -683,15 +683,6 @@ public:
#endif
};
/// \brief Class providing a device-specific support for atomic operations.
///
/// The class provide the actual implementation used by
/// vtkm::cont::DeviceAdapterAtomicArrayImplementation.
///
/// TODO combine this with AtomicInterfaceExecution.
template <typename T, typename DeviceTag>
class DeviceAdapterAtomicArrayImplementation;
/// \brief Class providing a device-specific support for atomic operations.
///
/// AtomicInterfaceControl provides atomic operations for the control

@ -13,7 +13,6 @@ set(headers
AtomicInterfaceExecutionCuda.h
CudaAllocator.h
DeviceAdapterAlgorithmCuda.h
DeviceAdapterAtomicArrayImplementationCuda.h
DeviceAdapterRuntimeDetectorCuda.h
DeviceAdapterTagCuda.h
DeviceAdapterTimerImplementationCuda.h

@ -27,7 +27,6 @@
#include <vtkm/cont/cuda/ErrorCuda.h>
#include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
#include <vtkm/cont/cuda/internal/AtomicInterfaceExecutionCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterAtomicArrayImplementationCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterTimerImplementationCuda.h>

@ -1,133 +0,0 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#ifndef vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h
#define vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h
#include <vtkm/Types.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/StorageBasic.h>
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
// Disable warnings we check vtkm for but Thrust does not.
#include <vtkm/exec/cuda/internal/ThrustPatches.h>
VTKM_THIRDPARTY_PRE_INCLUDE
#include <thrust/device_ptr.h>
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm
{
namespace cont
{
/// CUDA contains its own atomic operations
///
template <typename T>
class DeviceAdapterAtomicArrayImplementation<T, vtkm::cont::DeviceAdapterTagCuda>
{
public:
VTKM_CONT
DeviceAdapterAtomicArrayImplementation(
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle)
: Portal(handle.PrepareForInPlace(vtkm::cont::DeviceAdapterTagCuda()))
{
}
VTKM_EXEC T Add(vtkm::Id index, const T& value) const
{
// Although this function is marked VTKM_EXEC, this currently expands to
// __host__ __device__, and nvcc 8.0.61 errors when calling the __device__
// function vtkmAtomicAdd. VTKM_SUPPRESS_EXEC_WARNINGS does not fix this.
// We work around this by calling the __device__ function inside of a
// __CUDA_ARCH__ guard, as nvcc is smart enough to recognize that this is a
// safe usage of a __device__ function in a __host__ __device__ context.
#ifdef VTKM_CUDA_DEVICE_PASS
T* lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index);
return this->vtkmAtomicAdd(lockedValue, value);
#else
// Shut up, compiler
(void)index;
(void)value;
throw vtkm::cont::ErrorExecution("AtomicArray used in control environment, "
"or incorrect array implementation used "
"for device.");
#endif
}
VTKM_EXEC T CompareAndSwap(vtkm::Id index,
const vtkm::Int64& newValue,
const vtkm::Int64& oldValue) const
{
// Although this function is marked VTKM_EXEC, this currently expands to
// __host__ __device__, and nvcc 8.0.61 errors when calling the __device__
// function vtkmAtomicAdd. VTKM_SUPPRESS_EXEC_WARNINGS does not fix this.
// We work around this by calling the __device__ function inside of a
// __CUDA_ARCH__ guard, as nvcc is smart enough to recognize that this is a
// safe usage of a __device__ function in a __host__ __device__ context.
#ifdef VTKM_CUDA_DEVICE_PASS
T* lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index);
return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#else
// Shut up, compiler.
(void)index;
(void)newValue;
(void)oldValue;
throw vtkm::cont::ErrorExecution("AtomicArray used in control environment, "
"or incorrect array implementation used "
"for device.");
#endif
}
private:
using PortalType =
typename vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>::template ExecutionTypes<
vtkm::cont::DeviceAdapterTagCuda>::Portal;
PortalType Portal;
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
{
return atomicAdd((unsigned long long*)address, (unsigned long long)value);
}
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
{
return atomicAdd(address, value);
}
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
const vtkm::Int32& newValue,
const vtkm::Int32& oldValue) const
{
return atomicCAS(address, oldValue, newValue);
}
VTKM_SUPPRESS_EXEC_WARNINGS
__device__ vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
const vtkm::Int64& newValue,
const vtkm::Int64& oldValue) const
{
return atomicCAS((unsigned long long int*)address,
(unsigned long long int)oldValue,
(unsigned long long int)newValue);
}
};
}
} // end namespace vtkm::cont
#endif // vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h

@ -22,7 +22,6 @@ set(headers
AtomicInterfaceExecution.h
ConnectivityExplicitInternals.h
DeviceAdapterAlgorithmGeneral.h
DeviceAdapterAtomicArrayImplementation.h
DeviceAdapterListHelpers.h
DynamicTransform.h
FunctorsGeneral.h

@ -19,7 +19,6 @@
#include <vtkm/cont/ArrayHandleZip.h>
#include <vtkm/cont/BitField.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
#include <vtkm/exec/internal/ErrorMessageBuffer.h>

@ -1,142 +0,0 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#ifndef vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h
#define vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayPortalToIterators.h>
#include <vtkm/cont/StorageBasic.h>
#include <vtkm/internal/Configure.h>
#include <vtkm/internal/Windows.h>
#include <vtkm/Types.h>
namespace vtkm
{
namespace cont
{
/// \brief Class providing a device-specific atomic interface.
///
/// The class provide the actual implementation used by vtkm::exec::AtomicArray.
/// A serial default implementation is provided. But each device will have a different
/// implementation.
///
/// Serial requires no form of atomicity
///
template <typename T, typename DeviceTag>
class DeviceAdapterAtomicArrayImplementation
{
using PortalType =
typename vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>::template ExecutionTypes<
DeviceTag>::Portal;
using IteratorsType = vtkm::cont::ArrayPortalToIterators<PortalType>;
IteratorsType Iterators;
public:
VTKM_CONT
DeviceAdapterAtomicArrayImplementation(
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle)
: Iterators(IteratorsType(handle.PrepareForInPlace(DeviceTag())))
{
}
T Add(vtkm::Id index, const T& value) const
{
T* lockedValue;
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
using IteratorType = typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType;
typename IteratorType::pointer temp =
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
lockedValue = temp;
return this->vtkmAtomicAdd(lockedValue, value);
#else
lockedValue = (Iterators.GetBegin() + index);
return this->vtkmAtomicAdd(lockedValue, value);
#endif
}
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
{
T* lockedValue;
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
using IteratorType = typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType;
typename IteratorType::pointer temp =
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
lockedValue = temp;
return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#else
lockedValue = (Iterators.GetBegin() + index);
return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#endif
}
private:
#if defined(VTKM_MSVC) //MSVC atomics
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
{
return InterlockedExchangeAdd(reinterpret_cast<volatile long*>(address), value);
}
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
{
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(address), value);
}
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
const vtkm::Int32& newValue,
const vtkm::Int32& oldValue) const
{
return InterlockedCompareExchange(
reinterpret_cast<volatile long*>(address), newValue, oldValue);
}
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
const vtkm::Int64& newValue,
const vtkm::Int64& oldValue) const
{
return InterlockedCompareExchange64(
reinterpret_cast<volatile long long*>(address), newValue, oldValue);
}
#else //gcc built-in atomics
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
{
return __sync_fetch_and_add(address, value);
}
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
{
return __sync_fetch_and_add(address, value);
}
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
const vtkm::Int32& newValue,
const vtkm::Int32& oldValue) const
{
return __sync_val_compare_and_swap(address, oldValue, newValue);
}
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
const vtkm::Int64& newValue,
const vtkm::Int64& oldValue) const
{
return __sync_val_compare_and_swap(address, oldValue, newValue);
}
#endif
};
}
} // end namespace vtkm::cont
#endif // vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h

@ -345,8 +345,8 @@ public:
VTKM_EXEC void operator()(vtkm::Id index) const
{
T value = (T)index;
//Get the old value from the array with a no-op
T oldValue = this->AArray.Add(0, T(0));
//Get the old value from the array
T oldValue = this->AArray.Get(0);
//This creates an atomic add using the CAS operatoin
T assumed = T(0);
do
@ -2330,7 +2330,9 @@ private:
vtkm::Int32 atomicCount = 0;
for (vtkm::Int32 i = 0; i < SHORT_ARRAY_SIZE; i++)
{
atomicCount += i;
}
std::cout << "-------------------------------------------" << std::endl;
// To test the atomics, SHORT_ARRAY_SIZE number of threads will all increment
// a single atomic value.

@ -13,6 +13,9 @@
#include <vtkm/ListTag.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
#include <type_traits>
namespace vtkm
{
@ -22,42 +25,134 @@ namespace exec
template <typename T, typename Device>
class AtomicArrayExecutionObject
{
using AtomicInterface = vtkm::cont::internal::AtomicInterfaceExecution<Device>;
// Checks if PortalType has a GetIteratorBegin() method that returns a
// pointer.
template <typename PortalType,
typename PointerType = decltype(std::declval<PortalType>().GetIteratorBegin())>
struct HasPointerAccess : public std::is_pointer<PointerType>
{
};
public:
using ValueType = T;
VTKM_CONT
AtomicArrayExecutionObject()
: AtomicImplementation((vtkm::cont::ArrayHandle<T>()))
{
}
AtomicArrayExecutionObject() = default;
template <typename StorageType>
VTKM_CONT AtomicArrayExecutionObject(vtkm::cont::ArrayHandle<T, StorageType> handle)
: AtomicImplementation(handle)
VTKM_CONT AtomicArrayExecutionObject(vtkm::cont::ArrayHandle<T> handle)
: Data{ handle.PrepareForInPlace(Device{}).GetIteratorBegin() }
, NumberOfValues{ handle.GetNumberOfValues() }
{
using PortalType = decltype(handle.PrepareForInPlace(Device{}));
VTKM_STATIC_ASSERT_MSG(HasPointerAccess<PortalType>::value,
"Source portal must return a pointer from "
"GetIteratorBegin().");
}
VTKM_SUPPRESS_EXEC_WARNINGS
VTKM_EXEC
T Add(vtkm::Id index, const T& value) const
{
return this->AtomicImplementation.Add(index, value);
}
vtkm::Id GetNumberOfValues() const { return this->NumberOfValues; }
//
// Compare and Swap is an atomic exchange operation. If the value at
// the index is equal to oldValue, then newValue is written to the index.
// The operation was successful if return value is equal to oldValue
//
/// \brief Perform an atomic load of the indexed element with acquire memory
/// ordering.
/// \param index The index of the element to load.
/// \return The value of the atomic array at \a index.
///
VTKM_SUPPRESS_EXEC_WARNINGS
VTKM_EXEC
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
ValueType Get(vtkm::Id index) const
{
return this->AtomicImplementation.CompareAndSwap(index, newValue, oldValue);
// We only support 32/64 bit signed/unsigned ints, and AtomicInterface
// currently only provides API for unsigned types.
// We'll cast the signed types to unsigned to work around this.
using APIType = typename std::make_unsigned<ValueType>::type;
return static_cast<T>(
AtomicInterface::Load(reinterpret_cast<const APIType*>(this->Data + index)));
}
/// \brief Peform an atomic addition with sequentially consistent memory
/// ordering.
/// \param index The index of the array element that will be added to.
/// \param value The addend of the atomic add operation.
/// \return The original value of the element at \a index (before addition).
/// \warning Overflow behavior from this operation is undefined.
///
VTKM_SUPPRESS_EXEC_WARNINGS
VTKM_EXEC
ValueType Add(vtkm::Id index, const ValueType& value) const
{
// We only support 32/64 bit signed/unsigned ints, and AtomicInterface
// currently only provides API for unsigned types.
// We'll cast the signed types to unsigned to work around this.
// This is safe, since the only difference between signed/unsigned types
// is how overflow works, and signed overflow is already undefined. We also
// document that overflow is undefined for this operation.
using APIType = typename std::make_unsigned<ValueType>::type;
return static_cast<T>(AtomicInterface::Add(reinterpret_cast<APIType*>(this->Data + index),
static_cast<APIType>(value)));
}
/// \brief Perform an atomic CAS operation with sequentially consistent
/// memory ordering.
/// \param index The index of the array element that will be atomically
/// modified.
/// \param newValue The value to replace the indexed element with.
/// \param oldValue The expected value of the indexed element.
/// \return If the operation is successful, \a oldValue is returned. Otherwise
/// the current value of the indexed element is returned, and the element is
/// not modified.
///
/// This operation is typically used in a loop. For example usage,
/// an atomic multiplication may be implemented using CAS as follows:
///
/// ```
/// AtomicArrayExecutionObject<vtkm::Int32, ...> arr = ...;
///
/// // CAS multiplication:
/// vtkm::Int32 cur = arr->Get(idx); // Load the current value at idx
/// vtkm::Int32 newVal; // will hold the result of the multiplication
/// vtkm::Int32 expect; // will hold the expected value before multiplication
/// do {
/// expect = cur; // Used to ensure the value hasn't changed since reading
/// newVal = cur * multFactor; // the actual multiplication
/// }
/// while ((cur = arr->CompareAndSwap(idx, newVal, expect)) == expect);
/// ```
///
/// The while condition here updates \a cur with the pre-CAS value of the
/// operation (the return from CompareAndSwap), and compares this to the
/// expected value. If the values match, the operation was successful and the
/// loop exits. If the values do not match, the value at \a idx was changed
/// by another thread since the initial Get, and the CAS operation failed --
/// the target element was not modified by the CAS call. If this happens, the
/// loop body re-executes using the new value of \a cur and tries again until
/// it succeeds.
///
VTKM_SUPPRESS_EXEC_WARNINGS
VTKM_EXEC
ValueType CompareAndSwap(vtkm::Id index,
const ValueType& newValue,
const ValueType& oldValue) const
{
// We only support 32/64 bit signed/unsigned ints, and AtomicInterface
// currently only provides API for unsigned types.
// We'll cast the signed types to unsigned to work around this.
// This is safe, since the only difference between signed/unsigned types
// is how overflow works, and signed overflow is already undefined.
using APIType = typename std::make_unsigned<ValueType>::type;
return static_cast<T>(
AtomicInterface::CompareAndSwap(reinterpret_cast<APIType*>(this->Data + index),
static_cast<APIType>(newValue),
static_cast<APIType>(oldValue)));
}
private:
vtkm::cont::DeviceAdapterAtomicArrayImplementation<T, Device> AtomicImplementation;
ValueType* Data{ nullptr };
vtkm::Id NumberOfValues{ 0 };
};
}
} // namespace vtkm::exec