diff --git a/vtkm/Atomic.h b/vtkm/Atomic.h index 24b6dc707..04cbe8b06 100644 --- a/vtkm/Atomic.h +++ b/vtkm/Atomic.h @@ -225,6 +225,44 @@ VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr, return false; } } +#if __CUDA_ARCH__ < 200 +VTKM_EXEC_CONT inline vtkm::Float32 vtkmAtomicAddImpl(vtkm::Float32* address, + vtkm::Float32 value, + vtkm::MemoryOrder order) +{ + AtomicStoreFence(order); + vtkm::UInt32 assumed; + vtkm::UInt32 old = __float_as_int(*address); + do + { + assumed = old; + old = atomicCAS(reinterpret_cast(address), + assumed, + __float_as_int(__int_as_float(assumed) + value)); + } while (assumed != old); + AtomicLoadFence(order); + return __int_as_float(old); +} +#endif +#if __CUDA_ARCH__ < 600 +VTKM_EXEC_CONT inline vtkm::Float64 vtkmAtomicAdd(vtkm::Float64* address, + vtkm::Float64 value, + vtkm::MemoryOrder order) +{ + AtomicStoreFence(order); + vtkm::UInt64 assumed; + vtkm::UInt64 old = __double_as_longlong(*address); + do + { + assumed = old; + old = atomicCAS(reinterpret_cast(address), + assumed, + __double_as_longlong(__longlong_as_double(assumed) + value)); + } while (assumed != old); + AtomicLoadFence(order); + return __longlong_as_double(old); +} +#endif } } // namespace vtkm::detail @@ -532,6 +570,39 @@ VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, ) VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64) #undef VTKM_ATOMIC_OPS_FOR_TYPE + +VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float32* address, + vtkm::Float32 value, + vtkm::MemoryOrder vtkmNotUsed(order)) +{ + LONG assumed; + LONG old = BitCast(*address); + do + { + assumed = old; + old = _InterlockedCompareExchange(reinterpret_cast(address), + BitCast(BitCast(assumed) + value), + assumed); + } while (assumed != old); + return BitCast(old); +} + +VTKM_EXEC_CONT inline vtkm::Float64 AtomicAddImpl(vtkm::Float64* address, + vtkm::Float64 value, + vtkm::MemoryOrder vtkmNotUsed(order)) +{ + LONG64 assumed; + LONG64 old = BitCast(*address); + do + { + assumed = old; + old = _InterlockedCompareExchange64(reinterpret_cast(address), + BitCast(BitCast(assumed) + value), + assumed); + } while (assumed != old); + return BitCast(old); +} + } } // namespace vtkm::detail @@ -585,6 +656,52 @@ VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order) return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order)); } +#include + +// TODO: Use enable_if to write one version for both Float32 and Float64. +VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float32* addr, + vtkm::Float32 arg, + vtkm::MemoryOrder order) +{ + vtkm::UInt32 expected = vtkmstd::bit_cast(*addr); + vtkm::UInt32 desired; + + do + { + desired = vtkmstd::bit_cast(vtkmstd::bit_cast(expected) + arg); + } while ( + !__atomic_compare_exchange_n(reinterpret_cast(addr), + &expected, // reloads expected with *addr prior to the operation + desired, + false, + GccAtomicMemOrder(order), + GccAtomicMemOrder(order))); + // return the "old" value that was in the memory. + return vtkmstd::bit_cast(expected); +} + +// TODO: Use enable_if to write one version for both Float32 and Float64. +VTKM_EXEC_CONT inline vtkm::Float64 AtomicAddImpl(vtkm::Float64* addr, + vtkm::Float64 arg, + vtkm::MemoryOrder order) +{ + vtkm::UInt64 expected = vtkmstd::bit_cast(*addr); + vtkm::UInt64 desired; + + do + { + desired = vtkmstd::bit_cast(vtkmstd::bit_cast(expected) + arg); + } while ( + !__atomic_compare_exchange_n(reinterpret_cast(addr), + &expected, // reloads expected with *addr prior to the operation + desired, + false, + GccAtomicMemOrder(order), + GccAtomicMemOrder(order))); + // return the "old" value that was in the memory. + return vtkmstd::bit_cast(expected); +} + template VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order) { @@ -706,7 +823,7 @@ VTKM_EXEC_CONT inline T AtomicAdd( { return detail::AtomicAddImpl(pointer, operand, order); } -template +template ::value>::type* = nullptr> VTKM_EXEC_CONT inline T AtomicAdd( T* pointer, detail::OppositeSign operand, @@ -846,7 +963,7 @@ VTKM_EXEC_CONT inline T AtomicNot( /// pointing to an object on the stack). /// template -VTKM_EXEC_CONT inline T AtomicCompareExchange( +VTKM_EXEC_CONT inline bool AtomicCompareExchange( T* shared, T* expected, T desired, diff --git a/vtkm/cont/AtomicArray.h b/vtkm/cont/AtomicArray.h index eb3fc6aaf..35569b0a4 100644 --- a/vtkm/cont/AtomicArray.h +++ b/vtkm/cont/AtomicArray.h @@ -26,7 +26,8 @@ namespace cont /// \brief A type list containing types that can be used with an AtomicArray. /// /// @cond NONE -using AtomicArrayTypeList = vtkm::List; +using AtomicArrayTypeList = + vtkm::List; struct VTKM_DEPRECATED(1.6, "AtomicArrayTypeListTag replaced by AtomicArrayTypeList. Note that the " diff --git a/vtkm/cont/arg/testing/UnitTestTypeCheckArray.cxx b/vtkm/cont/arg/testing/UnitTestTypeCheckArray.cxx index cca2bbf7a..ade6eccf3 100644 --- a/vtkm/cont/arg/testing/UnitTestTypeCheckArray.cxx +++ b/vtkm/cont/arg/testing/UnitTestTypeCheckArray.cxx @@ -89,7 +89,7 @@ void TestCheckAtomicArray() "Check for 32-bit int failed."); VTKM_TEST_ASSERT((TypeCheck::value), "Check for 64-bit int failed."); - VTKM_TEST_ASSERT(!(TypeCheck::value), + VTKM_TEST_ASSERT((TypeCheck::value), "Check for float failed."); } diff --git a/vtkm/exec/AtomicArrayExecutionObject.h b/vtkm/exec/AtomicArrayExecutionObject.h index 92bfa4910..03687f170 100644 --- a/vtkm/exec/AtomicArrayExecutionObject.h +++ b/vtkm/exec/AtomicArrayExecutionObject.h @@ -49,6 +49,32 @@ struct MakeUnsigned { using type = vtkm::UInt64; }; +template <> +struct MakeUnsigned +{ + using type = vtkm::UInt32; +}; +template <> +struct MakeUnsigned +{ + using type = vtkm::UInt64; +}; + +template +struct ArithType +{ + using type = typename MakeUnsigned::type; +}; +template <> +struct ArithType +{ + using type = vtkm::Float32; +}; +template <> +struct ArithType +{ + using type = vtkm::Float64; +}; } template @@ -168,7 +194,7 @@ public: // This is safe, since the only difference between signed/unsigned types // is how overflow works, and signed overflow is already undefined. We also // document that overflow is undefined for this operation. - using APIType = typename detail::MakeUnsigned::type; + using APIType = typename detail::ArithType::type; return static_cast( vtkm::AtomicAdd(reinterpret_cast(this->Data + index), static_cast(value))); diff --git a/vtkmstd/CMakeLists.txt b/vtkmstd/CMakeLists.txt index 47d40cad2..afa6b894f 100644 --- a/vtkmstd/CMakeLists.txt +++ b/vtkmstd/CMakeLists.txt @@ -10,6 +10,7 @@ set(headers aligned_union.h + bit_cast.h integer_sequence.h is_trivial.h void_t.h diff --git a/vtkmstd/bit_cast.h b/vtkmstd/bit_cast.h new file mode 100644 index 000000000..0535b58e9 --- /dev/null +++ b/vtkmstd/bit_cast.h @@ -0,0 +1,35 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +//============================================================================ +#ifndef vtk_m_std_bit_cast_h +#define vtk_m_std_bit_cast_h + +#include +#include + +namespace vtkmstd +{ +// Copy/Paste from cppreference.com +template +typename std::enable_if::value && + std::is_trivially_copyable::value, + To>::type +// constexpr support needs compiler magic +bit_cast(const From& src) noexcept +{ + static_assert( + std::is_trivially_constructible::value, + "This implementation additionally requires destination type to be trivially constructible"); + + To dst; + std::memcpy(&dst, &src, sizeof(To)); + return dst; +} +} +#endif //vtk_m_std_bit_cast_h