Moved SMP atomic operations from TBB to General.

Even when running with the serial backend, the compiler might enable SIMD
vectorization when optimizations are turned on. When this occurs, we need
to use properly atomic Add's and CAE's.
This commit is contained in:
Robert Maynard 2016-03-14 09:51:17 -04:00
parent 5b6676d21f
commit ee4e490f1d
3 changed files with 82 additions and 139 deletions

@ -28,6 +28,16 @@
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
VTKM_THIRDPARTY_PRE_INCLUDE
#if defined(VTKM_MSVC)
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#include <Windows.h>
#undef WIN32_LEAN_AND_MEAN
#undef NOMINMAX
#endif
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm {
namespace cont {
namespace internal {
@ -710,60 +720,100 @@ class DeviceAdapterAtomicArrayImplementation
{
public:
VTKM_CONT_EXPORT
DeviceAdapterAtomicArrayImplementation(vtkm::cont::ArrayHandle<T> handle):
Portal( handle.PrepareForInPlace(DeviceTag()) )
DeviceAdapterAtomicArrayImplementation(
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle):
Iterators( IteratorsType( handle.PrepareForInPlace(DeviceTag()) ) )
{
}
VTKM_EXEC_EXPORT
T Add(vtkm::Id index, const T& value) const
{
return vtkmAtomicAdd(index, value);
T* lockedValue;
#if defined(VTKM_MSVC)
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
lockedValue = temp;
return vtkmAtomicAdd(lockedValue, value);
#else
lockedValue = (Iterators.GetBegin()+index);
return vtkmAtomicAdd(lockedValue, value);
#endif
}
VTKM_EXEC_EXPORT
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
{
return vtkmCompareAndSwap(index, newValue, oldValue);
T* lockedValue;
#if defined(VTKM_MSVC)
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
lockedValue = temp;
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#else
lockedValue = (Iterators.GetBegin()+index);
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#endif
}
private:
typedef typename vtkm::cont::ArrayHandle<T>
::template ExecutionTypes<DeviceTag>::Portal PortalType;
PortalType Portal;
typedef typename vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
::template ExecutionTypes<DeviceTag>::Portal PortalType;
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
IteratorsType Iterators;
#if defined(VTKM_MSVC) //MSVC atomics
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int32 &value) const
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
const vtkm::Int32 old = this->Portal.Get(index);
this->Portal.Set(index, old + value);
return old;
return InterlockedExchangeAdd(reinterpret_cast<volatile long *>(address),value);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int64 &value) const
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
const vtkm::Int64 old = this->Portal.Get(index);
this->Portal.Set(index, old + value);
return old;
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long *>(address),value);
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
const vtkm::Int32 old = this->Portal.Get(index);
if(old == oldValue) this->Portal.Set(index, newValue);
return old;
return InterlockedCompareExchange(reinterpret_cast<volatile long *>(address),newValue,oldValue);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
const vtkm::Int64 old = this->Portal.Get(index);
if(old == oldValue) this->Portal.Set(index, newValue);
return old;
return InterlockedCompareExchange64(reinterpret_cast<volatile long long *>(address),newValue, oldValue);
}
#else //gcc built-in atomics
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
return __sync_fetch_and_add(address,value);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
return __sync_fetch_and_add(address,value);
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
return __sync_val_compare_and_swap(address,oldValue, newValue);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
return __sync_val_compare_and_swap(address,oldValue,newValue);
}
#endif
};
}

@ -35,8 +35,7 @@
VTKM_THIRDPARTY_PRE_INCLUDE
// gcc || clang
#if defined(_WIN32)
#if defined(VTKM_MSVC)
// TBB includes windows.h, which clobbers min and max functions so we
// define NOMINMAX to fix that problem. We also include WIN32_LEAN_AND_MEAN
// to reduce the number of macros and objects windows.h imports as those also
@ -62,14 +61,12 @@ VTKM_THIRDPARTY_PRE_INCLUDE
#include <tbb/partitioner.h>
#include <tbb/tick_count.h>
#if defined(_WIN32)
#if defined(VTKM_MSVC)
#include <Windows.h>
#undef WIN32_LEAN_AND_MEAN
#undef NOMINMAX
#endif
#if defined(VTKM_MSVC)
#include <Windows.h>
#endif
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm {
@ -300,110 +297,6 @@ private:
::tbb::tick_count StartTime;
};
template<typename T>
class DeviceAdapterAtomicArrayImplementation<T,vtkm::cont::DeviceAdapterTagTBB>
{
public:
VTKM_CONT_EXPORT
DeviceAdapterAtomicArrayImplementation(
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle):
Iterators( IteratorsType( handle.PrepareForInPlace(
vtkm::cont::DeviceAdapterTagTBB())
) )
{
}
VTKM_EXEC_EXPORT
T Add(vtkm::Id index, const T& value) const
{
T* lockedValue;
#if defined(VTKM_MSVC)
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
lockedValue = temp;
return vtkmAtomicAdd(lockedValue, value);
#else
lockedValue = (Iterators.GetBegin()+index);
return vtkmAtomicAdd(lockedValue, value);
#endif
}
VTKM_EXEC_EXPORT
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
{
T* lockedValue;
#if defined(VTKM_MSVC)
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
lockedValue = temp;
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#else
lockedValue = (Iterators.GetBegin()+index);
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#endif
}
private:
typedef typename vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
::template ExecutionTypes<DeviceAdapterTagTBB>::Portal PortalType;
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
IteratorsType Iterators;
#if defined(VTKM_MSVC) //MSVC atomics
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
return InterlockedExchangeAdd(reinterpret_cast<volatile long *>(address),value);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long *>(address),value);
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
return InterlockedCompareExchange(reinterpret_cast<volatile long *>(address),newValue,oldValue);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
return InterlockedCompareExchange64(reinterpret_cast<volatile long long *>(address),newValue, oldValue);
}
#else //gcc built-in atomics
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
return __sync_fetch_and_add(address,value);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
return __sync_fetch_and_add(address,value);
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
return __sync_val_compare_and_swap(address,oldValue, newValue);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
return __sync_val_compare_and_swap(address,oldValue,newValue);
}
#endif
};
}
} // namespace vtkm::cont

@ -279,7 +279,7 @@ public:
};
template<typename T>
struct AtomicKernel
struct AtomicKernel
{
VTKM_CONT_EXPORT
AtomicKernel(const vtkm::exec::AtomicArray<T,DeviceAdapterTag> &array)
@ -299,7 +299,7 @@ public:
};
template<typename T>
struct AtomicCASKernel
struct AtomicCASKernel
{
VTKM_CONT_EXPORT
AtomicCASKernel(const vtkm::exec::AtomicArray<T,DeviceAdapterTag> &array)
@ -314,12 +314,12 @@ public:
//This creates an atomic add using the CAS operatoin
T assumed = T(0);
do
{
{
assumed = oldValue;
oldValue = this->AArray.CompareAndSwap(0, (assumed + value) , assumed);
} while (assumed != oldValue);
}
VTKM_CONT_EXPORT void SetErrorMessageBuffer(
@ -1621,7 +1621,7 @@ private:
vtkm::Int32 atomicCount = 0;
for(vtkm::Int32 i = 0; i < ARRAY_SIZE; i++) atomicCount += i;
std::cout << "-------------------------------------------" << std::endl;
// To test the atomics, ARRAY_SIZE number of threads will all increment
// To test the atomics, ARRAY_SIZE number of threads will all increment
// a single atomic value.
std::cout << "Testing Atomic Add with vtkm::Int32" << std::endl;
{
@ -1675,7 +1675,7 @@ private:
VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic CAS Int64");
}
}
struct TestAll