Workaround thrust 1.8 inclusive scan issue.
Starting in thrust 1.8 the implementation of scan inclusive inside thrust became highly optimized by using parallel task groups. This new implementation has a bug that only exists when using custom binary operators, large size arrays, release mode, and no debugger or mem-checker attached. While I have submitted the issue to thrust, we need to be able to work around the existing issue. The solution I have chosen is to mark all vtkm::exec::cuda::interal::WrappedBinaryOperators as being commutative as far as thrust is concerened. To make sure we don't get any unexpected behavior I have also had to create WrappedBinaryPredicate so that we don't mark any predicate as commutative.
This commit is contained in:
parent
e182388cbe
commit
157d8efee4
@ -298,11 +298,12 @@ private:
|
||||
template<class InputPortal, class ValuesPortal, class OutputPortal,
|
||||
class BinaryCompare>
|
||||
VTKM_CONT_EXPORT static void LowerBoundsPortal(const InputPortal &input,
|
||||
const ValuesPortal &values,
|
||||
const OutputPortal &output,
|
||||
BinaryCompare binary_compare)
|
||||
const ValuesPortal &values,
|
||||
const OutputPortal &output,
|
||||
BinaryCompare binary_compare)
|
||||
{
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,
|
||||
typedef typename InputPortal::ValueType ValueType;
|
||||
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
|
||||
BinaryCompare> bop(binary_compare);
|
||||
::thrust::lower_bound(thrust::cuda::par,
|
||||
IteratorBegin(input),
|
||||
@ -330,7 +331,8 @@ private:
|
||||
typename InputPortal::ValueType initialValue,
|
||||
BinaryFunctor binary_functor)
|
||||
{
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<typename InputPortal::ValueType,
|
||||
typedef typename InputPortal::ValueType ValueType;
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
|
||||
BinaryFunctor> bop(binary_functor);
|
||||
return ::thrust::reduce(thrust::cuda::par,
|
||||
IteratorBegin(input),
|
||||
@ -361,7 +363,8 @@ private:
|
||||
|
||||
::thrust::equal_to<typename KeysPortal::ValueType> binaryPredicate;
|
||||
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<typename ValuesPortal::ValueType,
|
||||
typedef typename ValuesPortal::ValueType ValueType;
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
|
||||
BinaryFunctor> bop(binary_functor);
|
||||
result_iterators = ::thrust::reduce_by_key(thrust::cuda::par,
|
||||
IteratorBegin(keys),
|
||||
@ -381,7 +384,7 @@ private:
|
||||
typename InputPortal::ValueType ScanExclusivePortal(const InputPortal &input,
|
||||
const OutputPortal &output)
|
||||
{
|
||||
typedef typename InputPortal::ValueType ValueType;
|
||||
typedef typename OutputPortal::ValueType ValueType;
|
||||
|
||||
return ScanExclusivePortal(input,
|
||||
output,
|
||||
@ -397,7 +400,7 @@ private:
|
||||
{
|
||||
// Use iterator to get value so that thrust device_ptr has chance to handle
|
||||
// data on device.
|
||||
typedef typename InputPortal::ValueType ValueType;
|
||||
typedef typename OutputPortal::ValueType ValueType;
|
||||
ValueType inputEnd = *(IteratorEnd(input) - 1);
|
||||
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
|
||||
@ -421,7 +424,7 @@ private:
|
||||
typename InputPortal::ValueType ScanInclusivePortal(const InputPortal &input,
|
||||
const OutputPortal &output)
|
||||
{
|
||||
typedef typename InputPortal::ValueType ValueType;
|
||||
typedef typename OutputPortal::ValueType ValueType;
|
||||
return ScanInclusivePortal(input, output, ::thrust::plus<ValueType>() );
|
||||
}
|
||||
|
||||
@ -431,7 +434,8 @@ private:
|
||||
const OutputPortal &output,
|
||||
BinaryFunctor binary_functor)
|
||||
{
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<typename InputPortal::ValueType,
|
||||
typedef typename OutputPortal::ValueType ValueType;
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
|
||||
BinaryFunctor> bop(binary_functor);
|
||||
|
||||
typedef typename detail::IteratorTraits<OutputPortal>::IteratorType
|
||||
@ -458,7 +462,9 @@ private:
|
||||
VTKM_CONT_EXPORT static void SortPortal(const ValuesPortal &values,
|
||||
BinaryCompare binary_compare)
|
||||
{
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,BinaryCompare> bop(binary_compare);
|
||||
typedef typename ValuesPortal::ValueType ValueType;
|
||||
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
|
||||
BinaryCompare> bop(binary_compare);
|
||||
::thrust::sort(thrust::cuda::par,
|
||||
IteratorBegin(values),
|
||||
IteratorEnd(values),
|
||||
@ -479,7 +485,9 @@ private:
|
||||
const ValuesPortal &values,
|
||||
BinaryCompare binary_compare)
|
||||
{
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,BinaryCompare> bop(binary_compare);
|
||||
typedef typename KeysPortal::ValueType ValueType;
|
||||
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
|
||||
BinaryCompare> bop(binary_compare);
|
||||
::thrust::sort_by_key(thrust::cuda::par,
|
||||
IteratorBegin(keys),
|
||||
IteratorEnd(keys),
|
||||
@ -548,7 +556,10 @@ private:
|
||||
{
|
||||
typedef typename detail::IteratorTraits<ValuesPortal>::IteratorType
|
||||
IteratorType;
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,BinaryCompare> bop(binary_compare);
|
||||
typedef typename ValuesPortal::ValueType ValueType;
|
||||
|
||||
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
|
||||
BinaryCompare> bop(binary_compare);
|
||||
IteratorType begin = IteratorBegin(values);
|
||||
IteratorType newLast = ::thrust::unique(thrust::cuda::par,
|
||||
begin,
|
||||
@ -579,7 +590,10 @@ private:
|
||||
const OutputPortal &output,
|
||||
BinaryCompare binary_compare)
|
||||
{
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,BinaryCompare> bop(binary_compare);
|
||||
typedef typename OutputPortal::ValueType ValueType;
|
||||
|
||||
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
|
||||
BinaryCompare> bop(binary_compare);
|
||||
::thrust::upper_bound(thrust::cuda::par,
|
||||
IteratorBegin(input),
|
||||
IteratorEnd(input),
|
||||
|
@ -28,6 +28,7 @@
|
||||
// Disable warnings we check vtkm for but Thrust does not.
|
||||
VTKM_THIRDPARTY_PRE_INCLUDE
|
||||
#include <thrust/system/cuda/memory.h>
|
||||
#include <boost/type_traits/remove_const.hpp>
|
||||
VTKM_THIRDPARTY_POST_INCLUDE
|
||||
|
||||
namespace vtkm {
|
||||
@ -39,9 +40,16 @@ namespace internal {
|
||||
// wrapped operator with complex value types such as
|
||||
// PortalValue which happen when passed an input array that
|
||||
// is implicit.
|
||||
template<typename ResultType, typename Function>
|
||||
struct WrappedBinaryOperator
|
||||
template<typename T_, typename Function>
|
||||
struct WrappedBinaryOperator
|
||||
{
|
||||
typedef typename boost::remove_const<T_>::type T;
|
||||
|
||||
//make typedefs that thust expects binary operators to have
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef T result_type;
|
||||
|
||||
Function m_f;
|
||||
|
||||
VTKM_EXEC_EXPORT
|
||||
@ -54,61 +62,128 @@ template<typename ResultType, typename Function>
|
||||
: m_f(f)
|
||||
{}
|
||||
|
||||
template<typename T, typename U>
|
||||
VTKM_EXEC_EXPORT ResultType operator()(const T &x, const U &y) const
|
||||
VTKM_EXEC_EXPORT T operator()(const T &x, const T &y) const
|
||||
{
|
||||
return m_f(x, y);
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
VTKM_EXEC_EXPORT ResultType operator()(const T &x,
|
||||
const PortalValue<U> &y) const
|
||||
|
||||
template<typename U>
|
||||
VTKM_EXEC_EXPORT T operator()(const T &x,
|
||||
const PortalValue<U> &y) const
|
||||
{
|
||||
typedef typename PortalValue<U>::ValueType ValueType;
|
||||
return m_f(x, (ValueType)y);
|
||||
return m_f(x, (T)y);
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
VTKM_EXEC_EXPORT ResultType operator()(const PortalValue<T> &x,
|
||||
const U &y) const
|
||||
template<typename U>
|
||||
VTKM_EXEC_EXPORT T operator()(const PortalValue<U> &x,
|
||||
const T &y) const
|
||||
{
|
||||
typedef typename PortalValue<T>::ValueType ValueType;
|
||||
return m_f((ValueType)x, y);
|
||||
return m_f((T)x, y);
|
||||
}
|
||||
|
||||
template<typename T, typename U>
|
||||
VTKM_EXEC_EXPORT ResultType operator()(const PortalValue<T> &x,
|
||||
const PortalValue<U> &y) const
|
||||
template<typename U, typename V>
|
||||
VTKM_EXEC_EXPORT T operator()(const PortalValue<U> &x,
|
||||
const PortalValue<V> &y) const
|
||||
{
|
||||
typedef typename PortalValue<T>::ValueType ValueTypeT;
|
||||
typedef typename PortalValue<U>::ValueType ValueTypeU;
|
||||
return m_f((ValueTypeT)x, (ValueTypeU)y);
|
||||
return m_f((T)x, (T)y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
VTKM_EXEC_EXPORT ResultType operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const T* y) const
|
||||
VTKM_EXEC_EXPORT T operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const T* y) const
|
||||
{
|
||||
return m_f(*x, *y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
VTKM_EXEC_EXPORT ResultType operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const T& y) const
|
||||
VTKM_EXEC_EXPORT T operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const T& y) const
|
||||
{
|
||||
return m_f(*x, y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
VTKM_EXEC_EXPORT ResultType operator()(const T& x,
|
||||
const thrust::system::cuda::pointer<T> y) const
|
||||
VTKM_EXEC_EXPORT T operator()(const T& x,
|
||||
const thrust::system::cuda::pointer<T> y) const
|
||||
{
|
||||
return m_f(x, *y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
VTKM_EXEC_EXPORT ResultType operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const thrust::system::cuda::pointer<T> y) const
|
||||
VTKM_EXEC_EXPORT T operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const thrust::system::cuda::pointer<T> y) const
|
||||
{
|
||||
return m_f(*x, *y);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename T_, typename Function>
|
||||
struct WrappedBinaryPredicate
|
||||
{
|
||||
typedef typename boost::remove_const<T_>::type T;
|
||||
|
||||
//make typedefs that thust expects binary operators to have
|
||||
typedef T first_argument_type;
|
||||
typedef T second_argument_type;
|
||||
typedef T result_type;
|
||||
|
||||
Function m_f;
|
||||
|
||||
VTKM_EXEC_EXPORT
|
||||
WrappedBinaryPredicate()
|
||||
: m_f()
|
||||
{}
|
||||
|
||||
VTKM_CONT_EXPORT
|
||||
WrappedBinaryPredicate(const Function &f)
|
||||
: m_f(f)
|
||||
{}
|
||||
|
||||
VTKM_EXEC_EXPORT bool operator()(const T &x, const T &y) const
|
||||
{
|
||||
return m_f(x, y);
|
||||
}
|
||||
|
||||
|
||||
template<typename U>
|
||||
VTKM_EXEC_EXPORT bool operator()(const T &x,
|
||||
const PortalValue<U> &y) const
|
||||
{
|
||||
return m_f(x, (T)y);
|
||||
}
|
||||
|
||||
template<typename U>
|
||||
VTKM_EXEC_EXPORT bool operator()(const PortalValue<U> &x,
|
||||
const T &y) const
|
||||
{
|
||||
return m_f((T)x, y);
|
||||
}
|
||||
|
||||
template<typename U, typename V>
|
||||
VTKM_EXEC_EXPORT bool operator()(const PortalValue<U> &x,
|
||||
const PortalValue<V> &y) const
|
||||
{
|
||||
return m_f((T)x, (T)y);
|
||||
}
|
||||
|
||||
VTKM_EXEC_EXPORT bool operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const T* y) const
|
||||
{
|
||||
return m_f(*x, *y);
|
||||
}
|
||||
|
||||
VTKM_EXEC_EXPORT bool operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const T& y) const
|
||||
{
|
||||
return m_f(*x, y);
|
||||
}
|
||||
|
||||
VTKM_EXEC_EXPORT bool operator()(const T& x,
|
||||
const thrust::system::cuda::pointer<T> y) const
|
||||
{
|
||||
return m_f(x, *y);
|
||||
}
|
||||
|
||||
VTKM_EXEC_EXPORT bool operator()(const thrust::system::cuda::pointer<T> x,
|
||||
const thrust::system::cuda::pointer<T> y) const
|
||||
{
|
||||
return m_f(*x, *y);
|
||||
}
|
||||
@ -120,5 +195,28 @@ template<typename ResultType, typename Function>
|
||||
}
|
||||
} //namespace vtkm::exec::cuda::internal
|
||||
|
||||
#if defined(THRUST_MAJOR_VERSION) && THRUST_MAJOR_VERSION == 1 && \
|
||||
THRUST_MINOR_VERSION == 8 && THRUST_SUBMINOR_VERSION < 3
|
||||
//So for thrust 1.8.0 - 1.8.2 the inclusive_scan has a bug when accumulating
|
||||
//values when the binary operators states it is not commutative. For now
|
||||
//we can work around this issue by stating that any BinaryOperator
|
||||
//from vtkm is considered to be a commutative BinaryOperator. I have
|
||||
//also moved Predicates over to WrappedBinaryPredicates so that they
|
||||
//don't get marked as commutative incorrectly.
|
||||
//
|
||||
//You can follow the status of the thrust issue at:
|
||||
// https://github.com/thrust/thrust/issues/692
|
||||
namespace thrust
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template< typename T, typename F>
|
||||
struct is_commutative< vtkm::exec::cuda::internal::WrappedBinaryOperator<T, F> > :
|
||||
public thrust::detail::is_arithmetic<T> { };
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif //vtk_m_exec_cuda_internal_WrappedOperators_h
|
||||
|
Loading…
Reference in New Issue
Block a user