Merge topic 'feature/atomics'

5ddade7a Adding some basic documentation on atomics.
e5c4aa3f Fixing cuda index error
00a7f6c1 Correcting function call
249cce35 Adding type restrictions to serial atomics
43131ee0 Adding comments about CAS
40b6db7e Inserted missing ,
3b46706e Adding compare and swap and removing unsigned atomics
12632de3 Casting Uint64 to long long
...

Acked-by: Kitware Robot <kwrobot@kitware.com>
Merge-request: !336
This commit is contained in:
Robert Maynard 2016-03-11 13:49:38 -05:00 committed by Kitware Robot
commit 5b6676d21f
6 changed files with 443 additions and 0 deletions

@ -523,6 +523,14 @@ public:
}
};
/// \brief Class providing a device-specific support for atomic operations.
///
/// The class provide the actual implementation used by
/// vtkm::cont::DeviceAdapterAtomicArrayImplementation.
///
template<typename T, typename DeviceTag>
class DeviceAdapterAtomicArrayImplementation;
}
} // namespace vtkm::cont

@ -185,6 +185,65 @@ private:
vtkm::Int32 HighestArchSupported;
};
/// CUDA contains its own atomic operations
///
template<typename T>
class DeviceAdapterAtomicArrayImplementation<T,vtkm::cont::DeviceAdapterTagCuda>
{
public:
VTKM_CONT_EXPORT
DeviceAdapterAtomicArrayImplementation(
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle):
Portal( handle.PrepareForInPlace( vtkm::cont::DeviceAdapterTagCuda()) )
{
}
inline __device__
T Add(vtkm::Id index, const T& value) const
{
T *lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index);
return vtkmAtomicAdd(lockedValue, value);
}
inline __device__
T CompareAndSwap(vtkm::Id index, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
T *lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index);
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
}
private:
typedef typename vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
::template ExecutionTypes<vtkm::cont::DeviceAdapterTagCuda>::Portal PortalType;
PortalType Portal;
inline __device__
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
return atomicAdd((unsigned long long *)address,(unsigned long long) value);
}
inline __device__
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
return atomicAdd(address,value);
}
inline __device__
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
return atomicCAS(address,oldValue,newValue);
}
inline __device__
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
return atomicCAS((unsigned long long int*) address,
(unsigned long long int ) oldValue,
(unsigned long long int ) newValue);
}
};
}
} // namespace vtkm::cont

@ -695,4 +695,78 @@ template<typename T, typename U, class CIn, class CStencil, class COut>
}
} // namespace vtkm::cont::internal
namespace vtkm {
namespace cont {
/// \brief Class providing a device-specific atomic interface.
///
/// The class provide the actual implementation used by vtkm::exec::AtomicArray.
/// A serial default implementation is provided. But each device will have a different
/// implementation.
///
/// Serial requires no form of atomicity
///
template<typename T, typename DeviceTag>
class DeviceAdapterAtomicArrayImplementation
{
public:
VTKM_CONT_EXPORT
DeviceAdapterAtomicArrayImplementation(vtkm::cont::ArrayHandle<T> handle):
Portal( handle.PrepareForInPlace(DeviceTag()) )
{
}
VTKM_EXEC_EXPORT
T Add(vtkm::Id index, const T& value) const
{
return vtkmAtomicAdd(index, value);
}
VTKM_EXEC_EXPORT
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
{
return vtkmCompareAndSwap(index, newValue, oldValue);
}
private:
typedef typename vtkm::cont::ArrayHandle<T>
::template ExecutionTypes<DeviceTag>::Portal PortalType;
PortalType Portal;
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int32 &value) const
{
const vtkm::Int32 old = this->Portal.Get(index);
this->Portal.Set(index, old + value);
return old;
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(const vtkm::Id &index, const vtkm::Int64 &value) const
{
const vtkm::Int64 old = this->Portal.Get(index);
this->Portal.Set(index, old + value);
return old;
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
const vtkm::Int32 old = this->Portal.Get(index);
if(old == oldValue) this->Portal.Set(index, newValue);
return old;
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(const vtkm::Id &index, const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
const vtkm::Int64 old = this->Portal.Get(index);
if(old == oldValue) this->Portal.Set(index, newValue);
return old;
}
};
}
} // namespace vtkm::cont
#endif //vtk_m_cont_internal_DeviceAdapterAlgorithmGeneral_h

@ -67,6 +67,9 @@ VTKM_THIRDPARTY_PRE_INCLUDE
#undef NOMINMAX
#endif
#if defined(VTKM_MSVC)
#include <Windows.h>
#endif
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm {
@ -297,6 +300,110 @@ private:
::tbb::tick_count StartTime;
};
template<typename T>
class DeviceAdapterAtomicArrayImplementation<T,vtkm::cont::DeviceAdapterTagTBB>
{
public:
VTKM_CONT_EXPORT
DeviceAdapterAtomicArrayImplementation(
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle):
Iterators( IteratorsType( handle.PrepareForInPlace(
vtkm::cont::DeviceAdapterTagTBB())
) )
{
}
VTKM_EXEC_EXPORT
T Add(vtkm::Id index, const T& value) const
{
T* lockedValue;
#if defined(VTKM_MSVC)
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
lockedValue = temp;
return vtkmAtomicAdd(lockedValue, value);
#else
lockedValue = (Iterators.GetBegin()+index);
return vtkmAtomicAdd(lockedValue, value);
#endif
}
VTKM_EXEC_EXPORT
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
{
T* lockedValue;
#if defined(VTKM_MSVC)
typedef typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType IteratorType;
typename IteratorType::pointer temp = &(*(Iterators.GetBegin()+index));
lockedValue = temp;
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#else
lockedValue = (Iterators.GetBegin()+index);
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
#endif
}
private:
typedef typename vtkm::cont::ArrayHandle<T,vtkm::cont::StorageTagBasic>
::template ExecutionTypes<DeviceAdapterTagTBB>::Portal PortalType;
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
IteratorsType Iterators;
#if defined(VTKM_MSVC) //MSVC atomics
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
return InterlockedExchangeAdd(reinterpret_cast<volatile long *>(address),value);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long *>(address),value);
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
return InterlockedCompareExchange(reinterpret_cast<volatile long *>(address),newValue,oldValue);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
return InterlockedCompareExchange64(reinterpret_cast<volatile long long *>(address),newValue, oldValue);
}
#else //gcc built-in atomics
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32 *address, const vtkm::Int32 &value) const
{
return __sync_fetch_and_add(address,value);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64 *address, const vtkm::Int64 &value) const
{
return __sync_fetch_and_add(address,value);
}
VTKM_EXEC_EXPORT
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32 *address, const vtkm::Int32 &newValue, const vtkm::Int32 &oldValue) const
{
return __sync_val_compare_and_swap(address,oldValue, newValue);
}
VTKM_EXEC_EXPORT
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64 *address,const vtkm::Int64 &newValue, const vtkm::Int64 &oldValue) const
{
return __sync_val_compare_and_swap(address,oldValue,newValue);
}
#endif
};
}
} // namespace vtkm::cont

@ -39,6 +39,8 @@
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/exec/AtomicArray.h>
#include <algorithm>
#include <cmath>
#include <utility>
@ -276,6 +278,56 @@ public:
}
};
template<typename T>
struct AtomicKernel
{
VTKM_CONT_EXPORT
AtomicKernel(const vtkm::exec::AtomicArray<T,DeviceAdapterTag> &array)
: AArray(array)
{ }
VTKM_EXEC_EXPORT void operator()(vtkm::Id index) const
{
T value = (T) index;
this->AArray.Add(0, value);
}
VTKM_CONT_EXPORT void SetErrorMessageBuffer(
const vtkm::exec::internal::ErrorMessageBuffer &) { }
vtkm::exec::AtomicArray<T,DeviceAdapterTag> AArray;
};
template<typename T>
struct AtomicCASKernel
{
VTKM_CONT_EXPORT
AtomicCASKernel(const vtkm::exec::AtomicArray<T,DeviceAdapterTag> &array)
: AArray(array)
{ }
VTKM_EXEC_EXPORT void operator()(vtkm::Id index) const
{
T value = (T) index;
//Get the old value from the array with a no-op
T oldValue = this->AArray.Add(0,T(0));
//This creates an atomic add using the CAS operatoin
T assumed = T(0);
do
{
assumed = oldValue;
oldValue = this->AArray.CompareAndSwap(0, (assumed + value) , assumed);
} while (assumed != oldValue);
}
VTKM_CONT_EXPORT void SetErrorMessageBuffer(
const vtkm::exec::internal::ErrorMessageBuffer &) { }
vtkm::exec::AtomicArray<T,DeviceAdapterTag> AArray;
};
private:
@ -1564,6 +1616,68 @@ private:
}
static VTKM_CONT_EXPORT void TestAtomicArray()
{
vtkm::Int32 atomicCount = 0;
for(vtkm::Int32 i = 0; i < ARRAY_SIZE; i++) atomicCount += i;
std::cout << "-------------------------------------------" << std::endl;
// To test the atomics, ARRAY_SIZE number of threads will all increment
// a single atomic value.
std::cout << "Testing Atomic Add with vtkm::Int32" << std::endl;
{
std::vector<vtkm::Int32> singleElement;
singleElement.push_back(0);
vtkm::cont::ArrayHandle<vtkm::Int32> atomicElement = vtkm::cont::make_ArrayHandle(singleElement);
vtkm::exec::AtomicArray<vtkm::Int32, DeviceAdapterTag> atomic(atomicElement);
Algorithm::Schedule(AtomicKernel<vtkm::Int32>(atomic), ARRAY_SIZE);
vtkm::Int32 expected = vtkm::Int32(atomicCount);
vtkm::Int32 actual= atomicElement.GetPortalControl().Get(0);
VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic add Int32");
}
std::cout << "Testing Atomic Add with vtkm::Int64" << std::endl;
{
std::vector<vtkm::Int64> singleElement;
singleElement.push_back(0);
vtkm::cont::ArrayHandle<vtkm::Int64> atomicElement = vtkm::cont::make_ArrayHandle(singleElement);
vtkm::exec::AtomicArray<vtkm::Int64, DeviceAdapterTag> atomic(atomicElement);
Algorithm::Schedule(AtomicKernel<vtkm::Int64>(atomic), ARRAY_SIZE);
vtkm::Int64 expected = vtkm::Int64(atomicCount);
vtkm::Int64 actual= atomicElement.GetPortalControl().Get(0);
VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic add Int64");
}
std::cout << "Testing Atomic CAS with vtkm::Int32" << std::endl;
{
std::vector<vtkm::Int32> singleElement;
singleElement.push_back(0);
vtkm::cont::ArrayHandle<vtkm::Int32> atomicElement = vtkm::cont::make_ArrayHandle(singleElement);
vtkm::exec::AtomicArray<vtkm::Int32, DeviceAdapterTag> atomic(atomicElement);
Algorithm::Schedule(AtomicCASKernel<vtkm::Int32>(atomic), ARRAY_SIZE);
vtkm::Int32 expected = vtkm::Int32(atomicCount);
vtkm::Int32 actual= atomicElement.GetPortalControl().Get(0);
VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic CAS Int32");
}
std::cout << "Testing Atomic CAS with vtkm::Int64" << std::endl;
{
std::vector<vtkm::Int64> singleElement;
singleElement.push_back(0);
vtkm::cont::ArrayHandle<vtkm::Int64> atomicElement = vtkm::cont::make_ArrayHandle(singleElement);
vtkm::exec::AtomicArray<vtkm::Int64, DeviceAdapterTag> atomic(atomicElement);
Algorithm::Schedule(AtomicCASKernel<vtkm::Int64>(atomic), ARRAY_SIZE);
vtkm::Int64 expected = vtkm::Int64(atomicCount);
vtkm::Int64 actual= atomicElement.GetPortalControl().Get(0);
VTKM_TEST_ASSERT(expected == actual, "Did not get expected value: Atomic CAS Int64");
}
}
struct TestAll
{
VTKM_CONT_EXPORT void operator()() const
@ -1605,6 +1719,8 @@ private:
TestStreamCompact();
TestCopyArraysInDiffTypes();
TestAtomicArray();
}
};

79
vtkm/exec/AtomicArray.h Normal file

@ -0,0 +1,79 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2015 Sandia Corporation.
// Copyright 2015 UT-Battelle, LLC.
// Copyright 2015 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_exec_AtomicArray_h
#define vtk_m_exec_AtomicArray_h
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/exec/ExecutionObjectBase.h>
namespace vtkm {
namespace exec {
/// A class that can be used to atomically operate on an array of values
/// safely across multiple instances of the same worklet. This is useful when
/// you have an algorithm that needs to accumulate values in parallel, but writing
/// out a value per worklet might be memory prohibitive.
///
/// To construct an AtomicArray you will need to pass in an vtkm::cont::ArrayHandle
/// that is used as the underlying storage for the AtomicArray
///
/// Supported Operations: add / compare and swap (CAS)
///
/// Supported Types: 32 / 64 bit signed integers
///
///
template<typename T, typename DeviceAdapterTag>
class AtomicArray : public vtkm::exec::ExecutionObjectBase
{
public:
template<typename StorageType>
VTKM_CONT_EXPORT
AtomicArray(vtkm::cont::ArrayHandle<T, StorageType> handle):
AtomicImplementation( handle )
{
}
VTKM_EXEC_EXPORT
T Add(vtkm::Id index, const T& value) const
{
return this->AtomicImplementation.Add(index,value);
}
//
// Compare and Swap is an atomic exchange operation. If the value at
// the index is equal to oldValue, then newValue is written to the index.
// The operation was successful if return value is eqaul to oldValue
//
VTKM_EXEC_EXPORT
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
{
return this->AtomicImplementation.CompareAndSwap(index,newValue, oldValue);
}
private:
vtkm::cont::DeviceAdapterAtomicArrayImplementation<T,DeviceAdapterTag>
AtomicImplementation;
};
}
} // namespace vtkm::exec
#endif //vtk_m_exec_AtomicArray_h