vtk-m2/vtkm/exec/cuda/internal/ThrustPatches.h
Robert Maynard b3687c6f3c Workaround inclusive_scan issues in thrust 1.8.X for complex value types.
The original workaround for inclusive_scan bugs in thrust 1.8 only solved the
issue for basic arithmetic types such as int, float, double. Now we go one
step further and fix the problem for all types.

The solution is to provide a proper implementation of destructive_accumulate_n
and make sure it exists before any includes of thrust occur.
2015-11-09 17:14:30 -05:00

86 lines
2.7 KiB
C++

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_exec_cuda_internal_ThrustPatches_h
#define vtk_m_exec_cuda_internal_ThrustPatches_h
//Forward declare of WrappedBinaryOperator
namespace vtkm { namespace exec { namespace cuda { namespace internal {
template<typename T, typename F>
class WrappedBinaryOperator;
} } } } //namespace vtkm::exec::cuda::internal
namespace thrust { namespace system { namespace cuda { namespace detail {
namespace bulk_ { namespace detail { namespace accumulate_detail {
//So for thrust 1.8.0 - 1.8.2 the inclusive_scan has a bug when accumulating
//values when the binary operators states it is not commutative.
//For more complex value types, we patch thrust/bulk with fix that is found
//in issue: https://github.com/thrust/thrust/issues/692
//
//This specialization needs to be included before ANY thrust includes otherwise
//other device code inside thrust that calls it will not see it
template<typename ConcurrentGroup, typename RandomAccessIterator, typename Size, typename T, typename F>
__device__ T destructive_accumulate_n(ConcurrentGroup &g, RandomAccessIterator first, Size n, T init,
vtkm::exec::cuda::internal::WrappedBinaryOperator<T, F> binary_op)
{
typedef typename ConcurrentGroup::size_type size_type;
size_type tid = g.this_exec.index();
T x = init;
if(tid < n)
{
x = first[tid];
}
g.wait();
for(size_type offset = 1; offset < g.size(); offset += offset)
{
if(tid >= offset && tid - offset < n)
{
x = binary_op(first[tid - offset], x);
}
g.wait();
if(tid < n)
{
first[tid] = x;
}
g.wait();
}
T result = binary_op(init, first[n - 1]);
g.wait();
return result;
}
} } } //namespace bulk_::detail::accumulate_detail
} } } } //namespace thrust::system::cuda::detail
#endif //vtk_m_exec_cuda_internal_ThrustPatches_h