diff --git a/vtkm/cont/cuda/internal/CMakeLists.txt b/vtkm/cont/cuda/internal/CMakeLists.txt index 5488c0231..198597846 100644 --- a/vtkm/cont/cuda/internal/CMakeLists.txt +++ b/vtkm/cont/cuda/internal/CMakeLists.txt @@ -23,6 +23,7 @@ set(headers CudaAllocator.h DeviceAdapterAlgorithmCuda.h DeviceAdapterAlgorithmThrust.h + DeviceAdapterAtomicArrayImplementationCuda.h DeviceAdapterRuntimeDetectorCuda.h DeviceAdapterTagCuda.h DeviceAdapterTimerImplementationCuda.h diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h index e505fca97..e65cff4e3 100644 --- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h +++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h @@ -56,66 +56,6 @@ struct DeviceAdapterAlgorithm } }; -/// CUDA contains its own atomic operations -/// -template -class DeviceAdapterAtomicArrayImplementation -{ -public: - VTKM_CONT - DeviceAdapterAtomicArrayImplementation( - vtkm::cont::ArrayHandle handle) - : Portal(handle.PrepareForInPlace(vtkm::cont::DeviceAdapterTagCuda())) - { - } - - inline __device__ T Add(vtkm::Id index, const T& value) const - { - T* lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index); - return vtkmAtomicAdd(lockedValue, value); - } - - inline __device__ T CompareAndSwap(vtkm::Id index, - const vtkm::Int64& newValue, - const vtkm::Int64& oldValue) const - { - T* lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index); - return vtkmCompareAndSwap(lockedValue, newValue, oldValue); - } - -private: - using PortalType = - typename vtkm::cont::ArrayHandle::template ExecutionTypes< - vtkm::cont::DeviceAdapterTagCuda>::Portal; - PortalType Portal; - - inline __device__ vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const - { - return atomicAdd((unsigned long long*)address, (unsigned long long)value); - } - - inline __device__ vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const - { - return atomicAdd(address, value); - } - - inline __device__ vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address, - const vtkm::Int32& newValue, - const vtkm::Int32& oldValue) const - { - return atomicCAS(address, oldValue, newValue); - } - - inline __device__ vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address, - const vtkm::Int64& newValue, - const vtkm::Int64& oldValue) const - { - return atomicCAS((unsigned long long int*)address, - (unsigned long long int)oldValue, - (unsigned long long int)newValue); - } -}; - template <> class DeviceTaskTypes { diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h index d80e428f3..c8057c2ce 100644 --- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h +++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h @@ -32,6 +32,7 @@ #include +#include #include #include #include diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAtomicArrayImplementationCuda.h b/vtkm/cont/cuda/internal/DeviceAdapterAtomicArrayImplementationCuda.h new file mode 100644 index 000000000..289f21b10 --- /dev/null +++ b/vtkm/cont/cuda/internal/DeviceAdapterAtomicArrayImplementationCuda.h @@ -0,0 +1,104 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +// Copyright 2018 UT-Battelle, LLC. +// Copyright 2018 Los Alamos National Security. +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================================ + +#ifndef vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h +#define vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h + +#include + +#include +#include +#include + +#include + +// Disable warnings we check vtkm for but Thrust does not. +VTKM_THIRDPARTY_PRE_INCLUDE +#include +VTKM_THIRDPARTY_POST_INCLUDE + +namespace vtkm +{ +namespace cont +{ + +/// CUDA contains its own atomic operations +/// +template +class DeviceAdapterAtomicArrayImplementation +{ +public: + VTKM_CONT + DeviceAdapterAtomicArrayImplementation( + vtkm::cont::ArrayHandle handle) + : Portal(handle.PrepareForInPlace(vtkm::cont::DeviceAdapterTagCuda())) + { + } + + VTKM_EXEC T Add(vtkm::Id index, const T& value) const + { + T* lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index); + return this->vtkmAtomicAdd(lockedValue, value); + } + + VTKM_EXEC T CompareAndSwap(vtkm::Id index, + const vtkm::Int64& newValue, + const vtkm::Int64& oldValue) const + { + T* lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index); + return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue); + } + +private: + using PortalType = + typename vtkm::cont::ArrayHandle::template ExecutionTypes< + vtkm::cont::DeviceAdapterTagCuda>::Portal; + PortalType Portal; + + __device__ vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const + { + return atomicAdd((unsigned long long*)address, (unsigned long long)value); + } + + __device__ vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const + { + return atomicAdd(address, value); + } + + __device__ vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address, + const vtkm::Int32& newValue, + const vtkm::Int32& oldValue) const + { + return atomicCAS(address, oldValue, newValue); + } + + __device__ vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address, + const vtkm::Int64& newValue, + const vtkm::Int64& oldValue) const + { + return atomicCAS((unsigned long long int*)address, + (unsigned long long int)oldValue, + (unsigned long long int)newValue); + } +}; +} +} // end namespace vtkm::cont + +#endif // vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h diff --git a/vtkm/cont/internal/CMakeLists.txt b/vtkm/cont/internal/CMakeLists.txt index 2faf93a7e..190a5456b 100644 --- a/vtkm/cont/internal/CMakeLists.txt +++ b/vtkm/cont/internal/CMakeLists.txt @@ -30,6 +30,7 @@ set(headers ArrayTransfer.h ConnectivityExplicitInternals.h DeviceAdapterAlgorithmGeneral.h + DeviceAdapterAtomicArrayImplementation.h DeviceAdapterDefaultSelection.h DeviceAdapterError.h DeviceAdapterListHelpers.h diff --git a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h index 334a75109..2e477a5ab 100644 --- a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h +++ b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -874,125 +875,8 @@ public: input, values_output, values_output); } }; -} -} -} // namespace vtkm::cont::internal -namespace vtkm -{ -namespace cont -{ -/// \brief Class providing a device-specific atomic interface. -/// -/// The class provide the actual implementation used by vtkm::exec::AtomicArray. -/// A serial default implementation is provided. But each device will have a different -/// implementation. -/// -/// Serial requires no form of atomicity -/// -template -class DeviceAdapterAtomicArrayImplementation -{ -public: - VTKM_CONT - DeviceAdapterAtomicArrayImplementation( - vtkm::cont::ArrayHandle handle) - : Iterators(IteratorsType(handle.PrepareForInPlace(DeviceTag()))) - { - } - - T Add(vtkm::Id index, const T& value) const - { - T* lockedValue; -#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0 - using IteratorType = typename vtkm::cont::ArrayPortalToIterators::IteratorType; - typename IteratorType::pointer temp = - &(*(Iterators.GetBegin() + static_cast(index))); - lockedValue = temp; - return vtkmAtomicAdd(lockedValue, value); -#else - lockedValue = (Iterators.GetBegin() + index); - return vtkmAtomicAdd(lockedValue, value); -#endif - } - - T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const - { - T* lockedValue; -#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0 - using IteratorType = typename vtkm::cont::ArrayPortalToIterators::IteratorType; - typename IteratorType::pointer temp = - &(*(Iterators.GetBegin() + static_cast(index))); - lockedValue = temp; - return vtkmCompareAndSwap(lockedValue, newValue, oldValue); -#else - lockedValue = (Iterators.GetBegin() + index); - return vtkmCompareAndSwap(lockedValue, newValue, oldValue); -#endif - } - -private: - using PortalType = - typename vtkm::cont::ArrayHandle::template ExecutionTypes< - DeviceTag>::Portal; - using IteratorsType = vtkm::cont::ArrayPortalToIterators; - IteratorsType Iterators; - -#if defined(VTKM_MSVC) //MSVC atomics - vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const - { - return InterlockedExchangeAdd(reinterpret_cast(address), value); - } - - vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const - { - return InterlockedExchangeAdd64(reinterpret_cast(address), value); - } - - vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address, - const vtkm::Int32& newValue, - const vtkm::Int32& oldValue) const - { - return InterlockedCompareExchange( - reinterpret_cast(address), newValue, oldValue); - } - - vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address, - const vtkm::Int64& newValue, - const vtkm::Int64& oldValue) const - { - return InterlockedCompareExchange64( - reinterpret_cast(address), newValue, oldValue); - } - -#else //gcc built-in atomics - - vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const - { - return __sync_fetch_and_add(address, value); - } - - vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const - { - return __sync_fetch_and_add(address, value); - } - - vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address, - const vtkm::Int32& newValue, - const vtkm::Int32& oldValue) const - { - return __sync_val_compare_and_swap(address, oldValue, newValue); - } - - vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address, - const vtkm::Int64& newValue, - const vtkm::Int64& oldValue) const - { - return __sync_val_compare_and_swap(address, oldValue, newValue); - } - -#endif -}; +} // namespace internal /// \brief Class providing a device-specific support for selecting the optimal /// Task type for a given worklet. diff --git a/vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h b/vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h new file mode 100644 index 000000000..78fdd42ef --- /dev/null +++ b/vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h @@ -0,0 +1,152 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +// Copyright 2018 UT-Battelle, LLC. +// Copyright 2018 Los Alamos National Security. +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================================ + +#ifndef vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h +#define vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h + +#include +#include +#include + +#include +#include + +#include + +namespace vtkm +{ +namespace cont +{ + +/// \brief Class providing a device-specific atomic interface. +/// +/// The class provide the actual implementation used by vtkm::exec::AtomicArray. +/// A serial default implementation is provided. But each device will have a different +/// implementation. +/// +/// Serial requires no form of atomicity +/// +template +class DeviceAdapterAtomicArrayImplementation +{ + using PortalType = + typename vtkm::cont::ArrayHandle::template ExecutionTypes< + DeviceTag>::Portal; + using IteratorsType = vtkm::cont::ArrayPortalToIterators; + IteratorsType Iterators; + +public: + VTKM_CONT + DeviceAdapterAtomicArrayImplementation( + vtkm::cont::ArrayHandle handle) + : Iterators(IteratorsType(handle.PrepareForInPlace(DeviceTag()))) + { + } + + T Add(vtkm::Id index, const T& value) const + { + T* lockedValue; +#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0 + using IteratorType = typename vtkm::cont::ArrayPortalToIterators::IteratorType; + typename IteratorType::pointer temp = + &(*(Iterators.GetBegin() + static_cast(index))); + lockedValue = temp; + return this->vtkmAtomicAdd(lockedValue, value); +#else + lockedValue = (Iterators.GetBegin() + index); + return this->vtkmAtomicAdd(lockedValue, value); +#endif + } + + T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const + { + T* lockedValue; +#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0 + using IteratorType = typename vtkm::cont::ArrayPortalToIterators::IteratorType; + typename IteratorType::pointer temp = + &(*(Iterators.GetBegin() + static_cast(index))); + lockedValue = temp; + return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue); +#else + lockedValue = (Iterators.GetBegin() + index); + return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue); +#endif + } + +private: +#if defined(VTKM_MSVC) //MSVC atomics + vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const + { + return InterlockedExchangeAdd(reinterpret_cast(address), value); + } + + vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const + { + return InterlockedExchangeAdd64(reinterpret_cast(address), value); + } + + vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address, + const vtkm::Int32& newValue, + const vtkm::Int32& oldValue) const + { + return InterlockedCompareExchange( + reinterpret_cast(address), newValue, oldValue); + } + + vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address, + const vtkm::Int64& newValue, + const vtkm::Int64& oldValue) const + { + return InterlockedCompareExchange64( + reinterpret_cast(address), newValue, oldValue); + } + +#else //gcc built-in atomics + + vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const + { + return __sync_fetch_and_add(address, value); + } + + vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const + { + return __sync_fetch_and_add(address, value); + } + + vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address, + const vtkm::Int32& newValue, + const vtkm::Int32& oldValue) const + { + return __sync_val_compare_and_swap(address, oldValue, newValue); + } + + vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address, + const vtkm::Int64& newValue, + const vtkm::Int64& oldValue) const + { + return __sync_val_compare_and_swap(address, oldValue, newValue); + } + +#endif +}; +} +} // end namespace vtkm::cont + +#endif // vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h