From ee4e490f1d38a65e36f69e6e71b4c21b313889d4 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 14 Mar 2016 09:51:17 -0400 Subject: [PATCH] Moved SMP atomic operations from TBB to General. Even when running with the serial backend, the compiler might enable SIMD vectorization when optimizations are turned on. When this occurs, we need to use properly atomic Add's and CAE's. --- .../internal/DeviceAdapterAlgorithmGeneral.h | 96 +++++++++++---- .../tbb/internal/DeviceAdapterAlgorithmTBB.h | 113 +----------------- vtkm/cont/testing/TestingDeviceAdapter.h | 12 +- 3 files changed, 82 insertions(+), 139 deletions(-) diff --git a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h index 40db13fe5..e623d65d1 100644 --- a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h +++ b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h @@ -28,6 +28,16 @@ #include +VTKM_THIRDPARTY_PRE_INCLUDE +#if defined(VTKM_MSVC) +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#undef WIN32_LEAN_AND_MEAN +#undef NOMINMAX +#endif +VTKM_THIRDPARTY_POST_INCLUDE + namespace vtkm { namespace cont { namespace internal { @@ -710,60 +720,100 @@ class DeviceAdapterAtomicArrayImplementation { public: VTKM_CONT_EXPORT - DeviceAdapterAtomicArrayImplementation(vtkm::cont::ArrayHandle handle): - Portal( handle.PrepareForInPlace(DeviceTag()) ) + DeviceAdapterAtomicArrayImplementation( + vtkm::cont::ArrayHandle handle): + Iterators( IteratorsType( handle.PrepareForInPlace(DeviceTag()) ) ) { } VTKM_EXEC_EXPORT T Add(vtkm::Id index, const T& value) const { - return vtkmAtomicAdd(index, value); + T* lockedValue; +#if defined(VTKM_MSVC) + typedef typename vtkm::cont::ArrayPortalToIterators::IteratorType IteratorType; + typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index)); + lockedValue = temp; + return vtkmAtomicAdd(lockedValue, value); +#else + lockedValue = (Iterators.GetBegin()+index); + return vtkmAtomicAdd(lockedValue, value); +#endif } VTKM_EXEC_EXPORT T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const { - return vtkmCompareAndSwap(index, newValue, oldValue); + T* lockedValue; +#if defined(VTKM_MSVC) + typedef typename vtkm::cont::ArrayPortalToIterators::IteratorType IteratorType; + typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index)); + lockedValue = temp; + return vtkmCompareAndSwap(lockedValue, newValue, oldValue); +#else + lockedValue = (Iterators.GetBegin()+index); + return vtkmCompareAndSwap(lockedValue, newValue, oldValue); +#endif } private: - typedef typename vtkm::cont::ArrayHandle - ::template ExecutionTypes::Portal PortalType; - PortalType Portal; + typedef typename vtkm::cont::ArrayHandle + ::template ExecutionTypes::Portal PortalType; + typedef vtkm::cont::ArrayPortalToIterators IteratorsType; + IteratorsType Iterators; +#if defined(VTKM_MSVC) //MSVC atomics VTKM_EXEC_EXPORT - vtkm::Int32 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int32 &value) const + vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const { - const vtkm::Int32 old = this->Portal.Get(index); - this->Portal.Set(index, old + value); - return old; + return InterlockedExchangeAdd(reinterpret_cast(address),value); } VTKM_EXEC_EXPORT - vtkm::Int64 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int64 &value) const + vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const { - const vtkm::Int64 old = this->Portal.Get(index); - this->Portal.Set(index, old + value); - return old; + return InterlockedExchangeAdd64(reinterpret_cast(address),value); } VTKM_EXEC_EXPORT - vtkm::Int32 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const + vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const { - const vtkm::Int32 old = this->Portal.Get(index); - if(old == oldValue) this->Portal.Set(index, newValue); - return old; + return InterlockedCompareExchange(reinterpret_cast(address),newValue,oldValue); } VTKM_EXEC_EXPORT - vtkm::Int64 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const + vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const { - const vtkm::Int64 old = this->Portal.Get(index); - if(old == oldValue) this->Portal.Set(index, newValue); - return old; + return InterlockedCompareExchange64(reinterpret_cast(address),newValue, oldValue); } +#else //gcc built-in atomics + + VTKM_EXEC_EXPORT + vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const + { + return __sync_fetch_and_add(address,value); + } + + VTKM_EXEC_EXPORT + vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const + { + return __sync_fetch_and_add(address,value); + } + + VTKM_EXEC_EXPORT + vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const + { + return __sync_val_compare_and_swap(address,oldValue, newValue); + } + + VTKM_EXEC_EXPORT + vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const + { + return __sync_val_compare_and_swap(address,oldValue,newValue); + } + +#endif }; } diff --git a/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h b/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h index ead428c09..473f3fb19 100644 --- a/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h +++ b/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h @@ -35,8 +35,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE -// gcc || clang -#if defined(_WIN32) +#if defined(VTKM_MSVC) // TBB includes windows.h, which clobbers min and max functions so we // define NOMINMAX to fix that problem. We also include WIN32_LEAN_AND_MEAN // to reduce the number of macros and objects windows.h imports as those also @@ -62,14 +61,12 @@ VTKM_THIRDPARTY_PRE_INCLUDE #include #include -#if defined(_WIN32) +#if defined(VTKM_MSVC) +#include #undef WIN32_LEAN_AND_MEAN #undef NOMINMAX #endif -#if defined(VTKM_MSVC) -#include -#endif VTKM_THIRDPARTY_POST_INCLUDE namespace vtkm { @@ -300,110 +297,6 @@ private: ::tbb::tick_count StartTime; }; -template -class DeviceAdapterAtomicArrayImplementation -{ -public: - VTKM_CONT_EXPORT - DeviceAdapterAtomicArrayImplementation( - vtkm::cont::ArrayHandle handle): - Iterators( IteratorsType( handle.PrepareForInPlace( - vtkm::cont::DeviceAdapterTagTBB()) - ) ) - { - } - - VTKM_EXEC_EXPORT - T Add(vtkm::Id index, const T& value) const - { - T* lockedValue; -#if defined(VTKM_MSVC) - typedef typename vtkm::cont::ArrayPortalToIterators::IteratorType IteratorType; - typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index)); - lockedValue = temp; - return vtkmAtomicAdd(lockedValue, value); -#else - lockedValue = (Iterators.GetBegin()+index); - return vtkmAtomicAdd(lockedValue, value); -#endif - } - - VTKM_EXEC_EXPORT - T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const - { - T* lockedValue; -#if defined(VTKM_MSVC) - typedef typename vtkm::cont::ArrayPortalToIterators::IteratorType IteratorType; - typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index)); - lockedValue = temp; - return vtkmCompareAndSwap(lockedValue, newValue, oldValue); -#else - lockedValue = (Iterators.GetBegin()+index); - return vtkmCompareAndSwap(lockedValue, newValue, oldValue); -#endif - } - -private: - typedef typename vtkm::cont::ArrayHandle - ::template ExecutionTypes::Portal PortalType; - typedef vtkm::cont::ArrayPortalToIterators IteratorsType; - IteratorsType Iterators; - -#if defined(VTKM_MSVC) //MSVC atomics - VTKM_EXEC_EXPORT - vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const - { - return InterlockedExchangeAdd(reinterpret_cast(address),value); - } - - VTKM_EXEC_EXPORT - vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const - { - return InterlockedExchangeAdd64(reinterpret_cast(address),value); - } - - VTKM_EXEC_EXPORT - vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const - { - return InterlockedCompareExchange(reinterpret_cast(address),newValue,oldValue); - } - - VTKM_EXEC_EXPORT - vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const - { - return InterlockedCompareExchange64(reinterpret_cast(address),newValue, oldValue); - } - -#else //gcc built-in atomics - - VTKM_EXEC_EXPORT - vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const - { - return __sync_fetch_and_add(address,value); - } - - VTKM_EXEC_EXPORT - vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const - { - return __sync_fetch_and_add(address,value); - } - - VTKM_EXEC_EXPORT - vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const - { - return __sync_val_compare_and_swap(address,oldValue, newValue); - } - - VTKM_EXEC_EXPORT - vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const - { - return __sync_val_compare_and_swap(address,oldValue,newValue); - } - -#endif - -}; - } } // namespace vtkm::cont diff --git a/vtkm/cont/testing/TestingDeviceAdapter.h b/vtkm/cont/testing/TestingDeviceAdapter.h index 60dbcaf90..264b716c3 100644 --- a/vtkm/cont/testing/TestingDeviceAdapter.h +++ b/vtkm/cont/testing/TestingDeviceAdapter.h @@ -279,7 +279,7 @@ public: }; template - struct AtomicKernel + struct AtomicKernel { VTKM_CONT_EXPORT AtomicKernel(const vtkm::exec::AtomicArray &array) @@ -299,7 +299,7 @@ public: }; template - struct AtomicCASKernel + struct AtomicCASKernel { VTKM_CONT_EXPORT AtomicCASKernel(const vtkm::exec::AtomicArray &array) @@ -314,12 +314,12 @@ public: //This creates an atomic add using the CAS operatoin T assumed = T(0); do - { + { assumed = oldValue; oldValue = this->AArray.CompareAndSwap(0, (assumed + value) , assumed); } while (assumed != oldValue); - + } VTKM_CONT_EXPORT void SetErrorMessageBuffer( @@ -1621,7 +1621,7 @@ private: vtkm::Int32 atomicCount = 0; for(vtkm::Int32 i = 0; i < ARRAY_SIZE; i++) atomicCount += i; std::cout << "-------------------------------------------" << std::endl; - // To test the atomics, ARRAY_SIZE number of threads will all increment + // To test the atomics, ARRAY_SIZE number of threads will all increment // a single atomic value. std::cout << "Testing Atomic Add with vtkm::Int32" << std::endl; { @@ -1675,7 +1675,7 @@ private: VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic CAS Int64"); } - + } struct TestAll