diff --git a/vtkm/cont/DeviceAdapterAlgorithm.h b/vtkm/cont/DeviceAdapterAlgorithm.h index b8f6e1d61..92528995d 100644 --- a/vtkm/cont/DeviceAdapterAlgorithm.h +++ b/vtkm/cont/DeviceAdapterAlgorithm.h @@ -523,6 +523,14 @@ public: } }; +/// \brief Class providing a device-specific support for atomic operations. +/// +/// The class provide the actual implementation used by +/// vtkm::cont::DeviceAdapterAtomicArrayImplementation. +/// +template +class DeviceAdapterAtomicArrayImplementation; + } } // namespace vtkm::cont diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h index ca66b27d9..22a65e65c 100644 --- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h +++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h @@ -185,6 +185,65 @@ private: vtkm::Int32 HighestArchSupported; }; +/// CUDA contains its own atomic operations +/// +template +class DeviceAdapterAtomicArrayImplementation +{ +public: + VTKM_CONT_EXPORT + DeviceAdapterAtomicArrayImplementation( + vtkm::cont::ArrayHandle handle): + Portal( handle.PrepareForInPlace( vtkm::cont::DeviceAdapterTagCuda()) ) + { + } + + inline __device__ + T Add(vtkm::Id index, const T& value) const + { + T *lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index); + return vtkmAtomicAdd(lockedValue, value); + } + + inline __device__ + T CompareAndSwap(vtkm::Id index, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const + { + T *lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index); + return vtkmCompareAndSwap(lockedValue, newValue, oldValue); + } + +private: + typedef typename vtkm::cont::ArrayHandle + ::template ExecutionTypes::Portal PortalType; + PortalType Portal; + + inline __device__ + vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const + { + return atomicAdd((unsigned long long *)address,(unsigned long long) value); + } + + inline __device__ + vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const + { + return atomicAdd(address,value); + } + + inline __device__ + vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const + { + return atomicCAS(address,oldValue,newValue); + } + + inline __device__ + vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const + { + return atomicCAS((unsigned long long int*) address, + (unsigned long long int ) oldValue, + (unsigned long long int ) newValue); + } +}; + } } // namespace vtkm::cont diff --git a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h index 19bb8b940..40db13fe5 100644 --- a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h +++ b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h @@ -695,4 +695,78 @@ template } } // namespace vtkm::cont::internal +namespace vtkm { +namespace cont { +/// \brief Class providing a device-specific atomic interface. +/// +/// The class provide the actual implementation used by vtkm::exec::AtomicArray. +/// A serial default implementation is provided. But each device will have a different +/// implementation. +/// +/// Serial requires no form of atomicity +/// +template +class DeviceAdapterAtomicArrayImplementation +{ +public: + VTKM_CONT_EXPORT + DeviceAdapterAtomicArrayImplementation(vtkm::cont::ArrayHandle handle): + Portal( handle.PrepareForInPlace(DeviceTag()) ) + { + } + + VTKM_EXEC_EXPORT + T Add(vtkm::Id index, const T& value) const + { + return vtkmAtomicAdd(index, value); + } + + VTKM_EXEC_EXPORT + T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const + { + return vtkmCompareAndSwap(index, newValue, oldValue); + } + +private: + typedef typename vtkm::cont::ArrayHandle + ::template ExecutionTypes::Portal PortalType; + PortalType Portal; + + VTKM_EXEC_EXPORT + vtkm::Int32 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int32 &value) const + { + const vtkm::Int32 old = this->Portal.Get(index); + this->Portal.Set(index, old + value); + return old; + } + + VTKM_EXEC_EXPORT + vtkm::Int64 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int64 &value) const + { + const vtkm::Int64 old = this->Portal.Get(index); + this->Portal.Set(index, old + value); + return old; + } + + VTKM_EXEC_EXPORT + vtkm::Int32 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const + { + const vtkm::Int32 old = this->Portal.Get(index); + if(old == oldValue) this->Portal.Set(index, newValue); + return old; + } + + VTKM_EXEC_EXPORT + vtkm::Int64 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const + { + const vtkm::Int64 old = this->Portal.Get(index); + if(old == oldValue) this->Portal.Set(index, newValue); + return old; + } + +}; + +} +} // namespace vtkm::cont + #endif //vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h diff --git a/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h b/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h index ea833ad0f..ead428c09 100644 --- a/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h +++ b/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h @@ -67,6 +67,9 @@ VTKM_THIRDPARTY_PRE_INCLUDE #undef NOMINMAX #endif +#if defined(VTKM_MSVC) +#include +#endif VTKM_THIRDPARTY_POST_INCLUDE namespace vtkm { @@ -297,6 +300,110 @@ private: ::tbb::tick_count StartTime; }; +template +class DeviceAdapterAtomicArrayImplementation +{ +public: + VTKM_CONT_EXPORT + DeviceAdapterAtomicArrayImplementation( + vtkm::cont::ArrayHandle handle): + Iterators( IteratorsType( handle.PrepareForInPlace( + vtkm::cont::DeviceAdapterTagTBB()) + ) ) + { + } + + VTKM_EXEC_EXPORT + T Add(vtkm::Id index, const T& value) const + { + T* lockedValue; +#if defined(VTKM_MSVC) + typedef typename vtkm::cont::ArrayPortalToIterators::IteratorType IteratorType; + typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index)); + lockedValue = temp; + return vtkmAtomicAdd(lockedValue, value); +#else + lockedValue = (Iterators.GetBegin()+index); + return vtkmAtomicAdd(lockedValue, value); +#endif + } + + VTKM_EXEC_EXPORT + T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const + { + T* lockedValue; +#if defined(VTKM_MSVC) + typedef typename vtkm::cont::ArrayPortalToIterators::IteratorType IteratorType; + typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index)); + lockedValue = temp; + return vtkmCompareAndSwap(lockedValue, newValue, oldValue); +#else + lockedValue = (Iterators.GetBegin()+index); + return vtkmCompareAndSwap(lockedValue, newValue, oldValue); +#endif + } + +private: + typedef typename vtkm::cont::ArrayHandle + ::template ExecutionTypes::Portal PortalType; + typedef vtkm::cont::ArrayPortalToIterators IteratorsType; + IteratorsType Iterators; + +#if defined(VTKM_MSVC) //MSVC atomics + VTKM_EXEC_EXPORT + vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const + { + return InterlockedExchangeAdd(reinterpret_cast(address),value); + } + + VTKM_EXEC_EXPORT + vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const + { + return InterlockedExchangeAdd64(reinterpret_cast(address),value); + } + + VTKM_EXEC_EXPORT + vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const + { + return InterlockedCompareExchange(reinterpret_cast(address),newValue,oldValue); + } + + VTKM_EXEC_EXPORT + vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const + { + return InterlockedCompareExchange64(reinterpret_cast(address),newValue, oldValue); + } + +#else //gcc built-in atomics + + VTKM_EXEC_EXPORT + vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const + { + return __sync_fetch_and_add(address,value); + } + + VTKM_EXEC_EXPORT + vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const + { + return __sync_fetch_and_add(address,value); + } + + VTKM_EXEC_EXPORT + vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const + { + return __sync_val_compare_and_swap(address,oldValue, newValue); + } + + VTKM_EXEC_EXPORT + vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const + { + return __sync_val_compare_and_swap(address,oldValue,newValue); + } + +#endif + +}; + } } // namespace vtkm::cont diff --git a/vtkm/cont/testing/TestingDeviceAdapter.h b/vtkm/cont/testing/TestingDeviceAdapter.h index 3ca160e42..60dbcaf90 100644 --- a/vtkm/cont/testing/TestingDeviceAdapter.h +++ b/vtkm/cont/testing/TestingDeviceAdapter.h @@ -39,6 +39,8 @@ #include +#include + #include #include #include @@ -276,6 +278,56 @@ public: } }; + template + struct AtomicKernel + { + VTKM_CONT_EXPORT + AtomicKernel(const vtkm::exec::AtomicArray &array) + : AArray(array) + { } + + VTKM_EXEC_EXPORT void operator()(vtkm::Id index) const + { + T value = (T) index; + this->AArray.Add(0, value); + } + + VTKM_CONT_EXPORT void SetErrorMessageBuffer( + const vtkm::exec::internal::ErrorMessageBuffer &) { } + + vtkm::exec::AtomicArray AArray; + }; + + template + struct AtomicCASKernel + { + VTKM_CONT_EXPORT + AtomicCASKernel(const vtkm::exec::AtomicArray &array) + : AArray(array) + { } + + VTKM_EXEC_EXPORT void operator()(vtkm::Id index) const + { + T value = (T) index; + //Get the old value from the array with a no-op + T oldValue = this->AArray.Add(0,T(0)); + //This creates an atomic add using the CAS operatoin + T assumed = T(0); + do + { + assumed = oldValue; + oldValue = this->AArray.CompareAndSwap(0, (assumed + value) , assumed); + + } while (assumed != oldValue); + + } + + VTKM_CONT_EXPORT void SetErrorMessageBuffer( + const vtkm::exec::internal::ErrorMessageBuffer &) { } + + vtkm::exec::AtomicArray AArray; + }; + private: @@ -1564,6 +1616,68 @@ private: } + static VTKM_CONT_EXPORT void TestAtomicArray() + { + vtkm::Int32 atomicCount = 0; + for(vtkm::Int32 i = 0; i < ARRAY_SIZE; i++) atomicCount += i; + std::cout << "-------------------------------------------" << std::endl; + // To test the atomics, ARRAY_SIZE number of threads will all increment + // a single atomic value. + std::cout << "Testing Atomic Add with vtkm::Int32" << std::endl; + { + std::vector singleElement; + singleElement.push_back(0); + vtkm::cont::ArrayHandle atomicElement = vtkm::cont::make_ArrayHandle(singleElement); + + vtkm::exec::AtomicArray atomic(atomicElement); + Algorithm::Schedule(AtomicKernel(atomic), ARRAY_SIZE); + vtkm::Int32 expected = vtkm::Int32(atomicCount); + vtkm::Int32 actual= atomicElement.GetPortalControl().Get(0); + VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic add Int32"); + } + + std::cout << "Testing Atomic Add with vtkm::Int64" << std::endl; + { + std::vector singleElement; + singleElement.push_back(0); + vtkm::cont::ArrayHandle atomicElement = vtkm::cont::make_ArrayHandle(singleElement); + + vtkm::exec::AtomicArray atomic(atomicElement); + Algorithm::Schedule(AtomicKernel(atomic), ARRAY_SIZE); + vtkm::Int64 expected = vtkm::Int64(atomicCount); + vtkm::Int64 actual= atomicElement.GetPortalControl().Get(0); + VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic add Int64"); + } + + std::cout << "Testing Atomic CAS with vtkm::Int32" << std::endl; + { + std::vector singleElement; + singleElement.push_back(0); + vtkm::cont::ArrayHandle atomicElement = vtkm::cont::make_ArrayHandle(singleElement); + + vtkm::exec::AtomicArray atomic(atomicElement); + Algorithm::Schedule(AtomicCASKernel(atomic), ARRAY_SIZE); + vtkm::Int32 expected = vtkm::Int32(atomicCount); + vtkm::Int32 actual= atomicElement.GetPortalControl().Get(0); + VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic CAS Int32"); + } + + std::cout << "Testing Atomic CAS with vtkm::Int64" << std::endl; + { + std::vector singleElement; + singleElement.push_back(0); + vtkm::cont::ArrayHandle atomicElement = vtkm::cont::make_ArrayHandle(singleElement); + + vtkm::exec::AtomicArray atomic(atomicElement); + Algorithm::Schedule(AtomicCASKernel(atomic), ARRAY_SIZE); + vtkm::Int64 expected = vtkm::Int64(atomicCount); + vtkm::Int64 actual= atomicElement.GetPortalControl().Get(0); + VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic CAS Int64"); + } + + + } + struct TestAll { VTKM_CONT_EXPORT void operator()() const @@ -1605,6 +1719,8 @@ private: TestStreamCompact(); TestCopyArraysInDiffTypes(); + + TestAtomicArray(); } }; diff --git a/vtkm/exec/AtomicArray.h b/vtkm/exec/AtomicArray.h new file mode 100644 index 000000000..51b9de794 --- /dev/null +++ b/vtkm/exec/AtomicArray.h @@ -0,0 +1,79 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2015 Sandia Corporation. +// Copyright 2015 UT-Battelle, LLC. +// Copyright 2015 Los Alamos National Security. +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================================ +#ifndef vtk_m_exec_AtomicArray_h +#define vtk_m_exec_AtomicArray_h + +#include +#include +#include + +namespace vtkm { +namespace exec { + +/// A class that can be used to atomically operate on an array of values +/// safely across multiple instances of the same worklet. This is useful when +/// you have an algorithm that needs to accumulate values in parallel, but writing +/// out a value per worklet might be memory prohibitive. +/// +/// To construct an AtomicArray you will need to pass in an vtkm::cont::ArrayHandle +/// that is used as the underlying storage for the AtomicArray +/// +/// Supported Operations: add / compare and swap (CAS) +/// +/// Supported Types: 32 / 64 bit signed integers +/// +/// +template +class AtomicArray : public vtkm::exec::ExecutionObjectBase +{ +public: + template + VTKM_CONT_EXPORT + AtomicArray(vtkm::cont::ArrayHandle handle): + AtomicImplementation( handle ) + { + } + + VTKM_EXEC_EXPORT + T Add(vtkm::Id index, const T& value) const + { + return this->AtomicImplementation.Add(index,value); + } + + // + // Compare and Swap is an atomic exchange operation. If the value at + // the index is equal to oldValue, then newValue is written to the index. + // The operation was successful if return value is eqaul to oldValue + // + VTKM_EXEC_EXPORT + T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const + { + return this->AtomicImplementation.CompareAndSwap(index,newValue, oldValue); + } + +private: + vtkm::cont::DeviceAdapterAtomicArrayImplementation + AtomicImplementation; +}; + +} +} // namespace vtkm::exec + +#endif //vtk_m_exec_AtomicArray_h \ No newline at end of file