Moved SMP atomic operations from TBB to General.

Even when running with the serial backend, the compiler might enable SIMD vectorization when optimizations are turned on. When this occurs, we need to use properly atomic Add's and CAE's.
2024-09-18 18:15:44 +00:00 · 2016-03-14 09:51:17 -04:00 · 2016-03-14 09:51:17 -04:00 · ee4e490f1d
commit ee4e490f1d
parent 5b6676d21f
3 changed files with 82 additions and 139 deletions
--- a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h
+++ b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h
@ -28,6 +28,16 @@

 #include <vtkm/exec/internal/ErrorMessageBuffer.h>

+VTKM_THIRDPARTY_PRE_INCLUDE
+#if  defined(VTKM_MSVC)
+#define WIN32_LEAN_AND_MEAN
+#define NOMINMAX
+#include <Windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#undef NOMINMAX
+#endif
+VTKM_THIRDPARTY_POST_INCLUDE
+
 namespace vtkm {
 namespace cont {
 namespace internal {
@ -710,60 +720,100 @@ class DeviceAdapterAtomicArrayImplementation
 {
 public:
  VTKM_CONT_EXPORT
-  DeviceAdapterAtomicArrayImplementation(vtkm::cont::ArrayHandle<T> handle):
-    Portal( handle.PrepareForInPlace(DeviceTag()) )
+  DeviceAdapterAtomicArrayImplementation(
+               vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle):
+    Iterators( IteratorsType( handle.PrepareForInPlace(DeviceTag()) ) )
  {
  }

  VTKM_EXEC_EXPORT
  T Add(vtkm::Id index, const T& value) const
  {
-    return vtkmAtomicAdd(index, value);
+    T* lockedValue;
+#if defined(VTKM_MSVC)
+    typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
+    typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
+    lockedValue = temp;
+    return vtkmAtomicAdd(lockedValue, value);
+#else
+    lockedValue = (Iterators.GetBegin()+index);
+    return vtkmAtomicAdd(lockedValue, value);
+#endif
  }

  VTKM_EXEC_EXPORT
  T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
  {
-    return vtkmCompareAndSwap(index, newValue, oldValue);
+    T* lockedValue;
+#if defined(VTKM_MSVC)
+    typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
+    typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
+    lockedValue = temp;
+    return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
+#else
+    lockedValue = (Iterators.GetBegin()+index);
+    return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
+#endif
  }

 private:
-    typedef typename vtkm::cont::ArrayHandle<T>
-              ::template ExecutionTypes<DeviceTag>::Portal PortalType;
-  PortalType Portal;
+  typedef typename vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
+        ::template ExecutionTypes<DeviceTag>::Portal PortalType;
+  typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
+  IteratorsType Iterators;

+#if defined(VTKM_MSVC) //MSVC atomics
  VTKM_EXEC_EXPORT
-  vtkm::Int32 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int32 &value) const
+  vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
  {
-    const vtkm::Int32 old = this->Portal.Get(index);
-    this->Portal.Set(index, old + value);
-    return old;
+    return InterlockedExchangeAdd(reinterpret_cast<volatile long *>(address),value);
  }

  VTKM_EXEC_EXPORT
-  vtkm::Int64 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int64 &value) const
+  vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
  {
-    const vtkm::Int64 old = this->Portal.Get(index);
-    this->Portal.Set(index, old + value);
-    return old;
+    return InterlockedExchangeAdd64(reinterpret_cast<volatile long long *>(address),value);
  }

  VTKM_EXEC_EXPORT
-  vtkm::Int32 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
+  vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
  {
-    const vtkm::Int32 old = this->Portal.Get(index);
-    if(old == oldValue) this->Portal.Set(index, newValue);
-    return old;
+    return InterlockedCompareExchange(reinterpret_cast<volatile long *>(address),newValue,oldValue);
  }

  VTKM_EXEC_EXPORT
-  vtkm::Int64 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
+  vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
  {
-    const vtkm::Int64 old = this->Portal.Get(index);
-    if(old == oldValue) this->Portal.Set(index, newValue);
-    return old;
+    return InterlockedCompareExchange64(reinterpret_cast<volatile long long *>(address),newValue, oldValue);
  }

+#else //gcc built-in atomics
+
+  VTKM_EXEC_EXPORT
+  vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
+  {
+    return __sync_fetch_and_add(address,value);
+  }
+
+  VTKM_EXEC_EXPORT
+  vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
+  {
+    return __sync_fetch_and_add(address,value);
+  }
+
+  VTKM_EXEC_EXPORT
+  vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
+  {
+    return __sync_val_compare_and_swap(address,oldValue, newValue);
+  }
+
+  VTKM_EXEC_EXPORT
+  vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
+  {
+    return __sync_val_compare_and_swap(address,oldValue,newValue);
+  }
+
+#endif
 };

 }
--- a/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h
+++ b/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h
@ -35,8 +35,7 @@

 VTKM_THIRDPARTY_PRE_INCLUDE

-// gcc || clang
-#if  defined(_WIN32)
+#if  defined(VTKM_MSVC)
 // TBB includes windows.h, which clobbers min and max functions so we
 // define NOMINMAX to fix that problem. We also include WIN32_LEAN_AND_MEAN
 // to reduce the number of macros and objects windows.h imports as those also
@ -62,14 +61,12 @@ VTKM_THIRDPARTY_PRE_INCLUDE
 #include <tbb/partitioner.h>
 #include <tbb/tick_count.h>

-#if defined(_WIN32)
+#if defined(VTKM_MSVC)
+#include <Windows.h>
 #undef WIN32_LEAN_AND_MEAN
 #undef NOMINMAX
 #endif

-#if defined(VTKM_MSVC)
-#include <Windows.h>
-#endif
 VTKM_THIRDPARTY_POST_INCLUDE

 namespace vtkm {
@ -300,110 +297,6 @@ private:
  ::tbb::tick_count StartTime;
 };

-template<typename T>
-class DeviceAdapterAtomicArrayImplementation<T,vtkm::cont::DeviceAdapterTagTBB>
-{
-public:
-  VTKM_CONT_EXPORT
-  DeviceAdapterAtomicArrayImplementation(
-               vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle):
-    Iterators( IteratorsType( handle.PrepareForInPlace(
-                                      vtkm::cont::DeviceAdapterTagTBB())
-                             ) )
-  {
-  }
-
-  VTKM_EXEC_EXPORT
-  T Add(vtkm::Id index, const T& value) const
-  {
-    T* lockedValue;
-#if defined(VTKM_MSVC)
-    typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
-    typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
-    lockedValue = temp;
-    return vtkmAtomicAdd(lockedValue, value);
-#else
-    lockedValue = (Iterators.GetBegin()+index);
-    return vtkmAtomicAdd(lockedValue, value);
-#endif
-  }
-
-  VTKM_EXEC_EXPORT
-  T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
-  {
-    T* lockedValue;
-#if defined(VTKM_MSVC)
-    typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
-    typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
-    lockedValue = temp;
-    return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
-#else
-    lockedValue = (Iterators.GetBegin()+index);
-    return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
-#endif
-  }
-
-private:
-  typedef typename vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
-        ::template ExecutionTypes<DeviceAdapterTagTBB>::Portal PortalType;
-  typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
-  IteratorsType Iterators;
-
-#if defined(VTKM_MSVC) //MSVC atomics
-  VTKM_EXEC_EXPORT
-  vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
-  {
-    return InterlockedExchangeAdd(reinterpret_cast<volatile long *>(address),value);
-  }
-
-  VTKM_EXEC_EXPORT
-  vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
-  {
-    return InterlockedExchangeAdd64(reinterpret_cast<volatile long long *>(address),value);
-  }
-
-  VTKM_EXEC_EXPORT
-  vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
-  {
-    return InterlockedCompareExchange(reinterpret_cast<volatile long *>(address),newValue,oldValue);
-  }
-
-  VTKM_EXEC_EXPORT
-  vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
-  {
-    return InterlockedCompareExchange64(reinterpret_cast<volatile long long *>(address),newValue, oldValue);
-  }
-
-#else //gcc built-in atomics
-
-  VTKM_EXEC_EXPORT
-  vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
-  {
-    return __sync_fetch_and_add(address,value);
-  }
-
-  VTKM_EXEC_EXPORT
-  vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
-  {
-    return __sync_fetch_and_add(address,value);
-  }
-
-  VTKM_EXEC_EXPORT
-  vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
-  {
-    return __sync_val_compare_and_swap(address,oldValue, newValue);
-  }
-
-  VTKM_EXEC_EXPORT
-  vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
-  {
-    return __sync_val_compare_and_swap(address,oldValue,newValue);
-  }
-
-#endif
-
-};
-
 }
 } // namespace vtkm::cont

--- a/vtkm/cont/testing/TestingDeviceAdapter.h
+++ b/vtkm/cont/testing/TestingDeviceAdapter.h
@ -279,7 +279,7 @@ public:
  };

  template<typename T>
-  struct AtomicKernel 
+  struct AtomicKernel
  {
    VTKM_CONT_EXPORT
    AtomicKernel(const vtkm::exec::AtomicArray<T,DeviceAdapterTag> &array)
@ -299,7 +299,7 @@ public:
  };

  template<typename T>
-  struct AtomicCASKernel 
+  struct AtomicCASKernel
  {
    VTKM_CONT_EXPORT
    AtomicCASKernel(const vtkm::exec::AtomicArray<T,DeviceAdapterTag> &array)
@ -314,12 +314,12 @@ public:
      //This creates an atomic add using the CAS operatoin
      T assumed = T(0);
      do
-      { 
+      {
        assumed = oldValue;
        oldValue = this->AArray.CompareAndSwap(0, (assumed + value) , assumed);

      } while (assumed != oldValue);
-      
+
    }

    VTKM_CONT_EXPORT void SetErrorMessageBuffer(
@ -1621,7 +1621,7 @@ private:
    vtkm::Int32 atomicCount = 0;
    for(vtkm::Int32 i = 0; i < ARRAY_SIZE; i++) atomicCount += i;
    std::cout << "-------------------------------------------" << std::endl;
-    // To test the atomics, ARRAY_SIZE number of threads will all increment  
+    // To test the atomics, ARRAY_SIZE number of threads will all increment
    // a single atomic value.
    std::cout << "Testing Atomic Add with vtkm::Int32" << std::endl;
    {
@ -1675,7 +1675,7 @@ private:
    VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic CAS Int64");
    }

-    
+
  }

  struct TestAll