Workaround thrust 1.8 inclusive scan issue.

Starting in thrust 1.8 the implementation of scan inclusive inside
thrust became highly optimized by using parallel task groups. This
new implementation has a bug that only exists when using custom
binary operators, large size arrays, release mode, and no
debugger or mem-checker attached.

While I have submitted the issue to thrust, we need to be able
to work around the existing issue. The solution I have chosen is
to mark all vtkm::exec::cuda::interal::WrappedBinaryOperators
as being commutative as far as thrust is concerened. To make
sure we don't get any unexpected behavior I have also had
to create WrappedBinaryPredicate so that we don't mark any
predicate as commutative.
This commit is contained in:
Robert Maynard 2015-08-14 11:33:37 -04:00
parent e182388cbe
commit 157d8efee4
2 changed files with 158 additions and 46 deletions

@ -298,11 +298,12 @@ private:
template<class InputPortal, class ValuesPortal, class OutputPortal,
class BinaryCompare>
VTKM_CONT_EXPORT static void LowerBoundsPortal(const InputPortal &input,
const ValuesPortal &values,
const OutputPortal &output,
BinaryCompare binary_compare)
const ValuesPortal &values,
const OutputPortal &output,
BinaryCompare binary_compare)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,
typedef typename InputPortal::ValueType ValueType;
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
BinaryCompare> bop(binary_compare);
::thrust::lower_bound(thrust::cuda::par,
IteratorBegin(input),
@ -330,7 +331,8 @@ private:
typename InputPortal::ValueType initialValue,
BinaryFunctor binary_functor)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<typename InputPortal::ValueType,
typedef typename InputPortal::ValueType ValueType;
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
BinaryFunctor> bop(binary_functor);
return ::thrust::reduce(thrust::cuda::par,
IteratorBegin(input),
@ -361,7 +363,8 @@ private:
::thrust::equal_to<typename KeysPortal::ValueType> binaryPredicate;
vtkm::exec::cuda::internal::WrappedBinaryOperator<typename ValuesPortal::ValueType,
typedef typename ValuesPortal::ValueType ValueType;
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
BinaryFunctor> bop(binary_functor);
result_iterators = ::thrust::reduce_by_key(thrust::cuda::par,
IteratorBegin(keys),
@ -381,7 +384,7 @@ private:
typename InputPortal::ValueType ScanExclusivePortal(const InputPortal &input,
const OutputPortal &output)
{
typedef typename InputPortal::ValueType ValueType;
typedef typename OutputPortal::ValueType ValueType;
return ScanExclusivePortal(input,
output,
@ -397,7 +400,7 @@ private:
{
// Use iterator to get value so that thrust device_ptr has chance to handle
// data on device.
typedef typename InputPortal::ValueType ValueType;
typedef typename OutputPortal::ValueType ValueType;
ValueType inputEnd = *(IteratorEnd(input) - 1);
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
@ -421,7 +424,7 @@ private:
typename InputPortal::ValueType ScanInclusivePortal(const InputPortal &input,
const OutputPortal &output)
{
typedef typename InputPortal::ValueType ValueType;
typedef typename OutputPortal::ValueType ValueType;
return ScanInclusivePortal(input, output, ::thrust::plus<ValueType>() );
}
@ -431,7 +434,8 @@ private:
const OutputPortal &output,
BinaryFunctor binary_functor)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<typename InputPortal::ValueType,
typedef typename OutputPortal::ValueType ValueType;
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
BinaryFunctor> bop(binary_functor);
typedef typename detail::IteratorTraits<OutputPortal>::IteratorType
@ -458,7 +462,9 @@ private:
VTKM_CONT_EXPORT static void SortPortal(const ValuesPortal &values,
BinaryCompare binary_compare)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,BinaryCompare> bop(binary_compare);
typedef typename ValuesPortal::ValueType ValueType;
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
BinaryCompare> bop(binary_compare);
::thrust::sort(thrust::cuda::par,
IteratorBegin(values),
IteratorEnd(values),
@ -479,7 +485,9 @@ private:
const ValuesPortal &values,
BinaryCompare binary_compare)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,BinaryCompare> bop(binary_compare);
typedef typename KeysPortal::ValueType ValueType;
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
BinaryCompare> bop(binary_compare);
::thrust::sort_by_key(thrust::cuda::par,
IteratorBegin(keys),
IteratorEnd(keys),
@ -548,7 +556,10 @@ private:
{
typedef typename detail::IteratorTraits<ValuesPortal>::IteratorType
IteratorType;
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,BinaryCompare> bop(binary_compare);
typedef typename ValuesPortal::ValueType ValueType;
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
BinaryCompare> bop(binary_compare);
IteratorType begin = IteratorBegin(values);
IteratorType newLast = ::thrust::unique(thrust::cuda::par,
begin,
@ -579,7 +590,10 @@ private:
const OutputPortal &output,
BinaryCompare binary_compare)
{
vtkm::exec::cuda::internal::WrappedBinaryOperator<bool,BinaryCompare> bop(binary_compare);
typedef typename OutputPortal::ValueType ValueType;
vtkm::exec::cuda::internal::WrappedBinaryPredicate<ValueType,
BinaryCompare> bop(binary_compare);
::thrust::upper_bound(thrust::cuda::par,
IteratorBegin(input),
IteratorEnd(input),

@ -28,6 +28,7 @@
// Disable warnings we check vtkm for but Thrust does not.
VTKM_THIRDPARTY_PRE_INCLUDE
#include <thrust/system/cuda/memory.h>
#include <boost/type_traits/remove_const.hpp>
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm {
@ -39,9 +40,16 @@ namespace internal {
// wrapped operator with complex value types such as
// PortalValue which happen when passed an input array that
// is implicit.
template<typename ResultType, typename Function>
struct WrappedBinaryOperator
template<typename T_, typename Function>
struct WrappedBinaryOperator
{
typedef typename boost::remove_const<T_>::type T;
//make typedefs that thust expects binary operators to have
typedef T first_argument_type;
typedef T second_argument_type;
typedef T result_type;
Function m_f;
VTKM_EXEC_EXPORT
@ -54,61 +62,128 @@ template<typename ResultType, typename Function>
: m_f(f)
{}
template<typename T, typename U>
VTKM_EXEC_EXPORT ResultType operator()(const T &x, const U &y) const
VTKM_EXEC_EXPORT T operator()(const T &x, const T &y) const
{
return m_f(x, y);
}
template<typename T, typename U>
VTKM_EXEC_EXPORT ResultType operator()(const T &x,
const PortalValue<U> &y) const
template<typename U>
VTKM_EXEC_EXPORT T operator()(const T &x,
const PortalValue<U> &y) const
{
typedef typename PortalValue<U>::ValueType ValueType;
return m_f(x, (ValueType)y);
return m_f(x, (T)y);
}
template<typename T, typename U>
VTKM_EXEC_EXPORT ResultType operator()(const PortalValue<T> &x,
const U &y) const
template<typename U>
VTKM_EXEC_EXPORT T operator()(const PortalValue<U> &x,
const T &y) const
{
typedef typename PortalValue<T>::ValueType ValueType;
return m_f((ValueType)x, y);
return m_f((T)x, y);
}
template<typename T, typename U>
VTKM_EXEC_EXPORT ResultType operator()(const PortalValue<T> &x,
const PortalValue<U> &y) const
template<typename U, typename V>
VTKM_EXEC_EXPORT T operator()(const PortalValue<U> &x,
const PortalValue<V> &y) const
{
typedef typename PortalValue<T>::ValueType ValueTypeT;
typedef typename PortalValue<U>::ValueType ValueTypeU;
return m_f((ValueTypeT)x, (ValueTypeU)y);
return m_f((T)x, (T)y);
}
template<typename T>
VTKM_EXEC_EXPORT ResultType operator()(const thrust::system::cuda::pointer<T> x,
const T* y) const
VTKM_EXEC_EXPORT T operator()(const thrust::system::cuda::pointer<T> x,
const T* y) const
{
return m_f(*x, *y);
}
template<typename T>
VTKM_EXEC_EXPORT ResultType operator()(const thrust::system::cuda::pointer<T> x,
const T& y) const
VTKM_EXEC_EXPORT T operator()(const thrust::system::cuda::pointer<T> x,
const T& y) const
{
return m_f(*x, y);
}
template<typename T>
VTKM_EXEC_EXPORT ResultType operator()(const T& x,
const thrust::system::cuda::pointer<T> y) const
VTKM_EXEC_EXPORT T operator()(const T& x,
const thrust::system::cuda::pointer<T> y) const
{
return m_f(x, *y);
}
template<typename T>
VTKM_EXEC_EXPORT ResultType operator()(const thrust::system::cuda::pointer<T> x,
const thrust::system::cuda::pointer<T> y) const
VTKM_EXEC_EXPORT T operator()(const thrust::system::cuda::pointer<T> x,
const thrust::system::cuda::pointer<T> y) const
{
return m_f(*x, *y);
}
};
template<typename T_, typename Function>
struct WrappedBinaryPredicate
{
typedef typename boost::remove_const<T_>::type T;
//make typedefs that thust expects binary operators to have
typedef T first_argument_type;
typedef T second_argument_type;
typedef T result_type;
Function m_f;
VTKM_EXEC_EXPORT
WrappedBinaryPredicate()
: m_f()
{}
VTKM_CONT_EXPORT
WrappedBinaryPredicate(const Function &f)
: m_f(f)
{}
VTKM_EXEC_EXPORT bool operator()(const T &x, const T &y) const
{
return m_f(x, y);
}
template<typename U>
VTKM_EXEC_EXPORT bool operator()(const T &x,
const PortalValue<U> &y) const
{
return m_f(x, (T)y);
}
template<typename U>
VTKM_EXEC_EXPORT bool operator()(const PortalValue<U> &x,
const T &y) const
{
return m_f((T)x, y);
}
template<typename U, typename V>
VTKM_EXEC_EXPORT bool operator()(const PortalValue<U> &x,
const PortalValue<V> &y) const
{
return m_f((T)x, (T)y);
}
VTKM_EXEC_EXPORT bool operator()(const thrust::system::cuda::pointer<T> x,
const T* y) const
{
return m_f(*x, *y);
}
VTKM_EXEC_EXPORT bool operator()(const thrust::system::cuda::pointer<T> x,
const T& y) const
{
return m_f(*x, y);
}
VTKM_EXEC_EXPORT bool operator()(const T& x,
const thrust::system::cuda::pointer<T> y) const
{
return m_f(x, *y);
}
VTKM_EXEC_EXPORT bool operator()(const thrust::system::cuda::pointer<T> x,
const thrust::system::cuda::pointer<T> y) const
{
return m_f(*x, *y);
}
@ -120,5 +195,28 @@ template<typename ResultType, typename Function>
}
} //namespace vtkm::exec::cuda::internal
#if defined(THRUST_MAJOR_VERSION) && THRUST_MAJOR_VERSION == 1 && \
THRUST_MINOR_VERSION == 8 && THRUST_SUBMINOR_VERSION < 3
//So for thrust 1.8.0 - 1.8.2 the inclusive_scan has a bug when accumulating
//values when the binary operators states it is not commutative. For now
//we can work around this issue by stating that any BinaryOperator
//from vtkm is considered to be a commutative BinaryOperator. I have
//also moved Predicates over to WrappedBinaryPredicates so that they
//don't get marked as commutative incorrectly.
//
//You can follow the status of the thrust issue at:
// https://github.com/thrust/thrust/issues/692
namespace thrust
{
namespace detail
{
template< typename T, typename F>
struct is_commutative< vtkm::exec::cuda::internal::WrappedBinaryOperator<T, F> > :
public thrust::detail::is_arithmetic<T> { };
}
}
#endif
#endif //vtk_m_exec_cuda_internal_WrappedOperators_h