Solve reduce by key bugs with cuda 7.5 + maxwell hardware.
The concern is now all architectures are doing a hardware sync on reduce_by_key. This isn't a super serious concern, but it is a downside.
This commit is contained in:
parent
c2dcd1df2f
commit
e5c3f9c42d
@ -400,9 +400,10 @@ private:
|
||||
vtkm::exec::cuda::internal::WrappedBinaryOperator<ValueType,
|
||||
BinaryFunctor> bop(binary_functor);
|
||||
|
||||
|
||||
try
|
||||
{
|
||||
result_iterators = ::thrust::reduce_by_key(thrust::cuda::par,
|
||||
result_iterators = ::thrust::reduce_by_key(vtkm_cuda_policy(),
|
||||
IteratorBegin(keys),
|
||||
IteratorEnd(keys),
|
||||
IteratorBegin(values),
|
||||
|
@ -167,4 +167,58 @@ __host__ __device__
|
||||
}
|
||||
|
||||
|
||||
template<typename T,
|
||||
typename InputIterator2,
|
||||
typename OutputIterator1,
|
||||
typename OutputIterator2,
|
||||
typename BinaryPredicate,
|
||||
typename BinaryFunction>
|
||||
__host__
|
||||
::thrust::pair<OutputIterator1,OutputIterator2>
|
||||
reduce_by_key(const vtkm_cuda_policy &exec,
|
||||
thrust::system::cuda::pointer<T> keys_first,
|
||||
thrust::system::cuda::pointer<T> keys_last,
|
||||
InputIterator2 values_first,
|
||||
OutputIterator1 keys_output,
|
||||
OutputIterator2 values_output,
|
||||
BinaryPredicate binary_pred,
|
||||
BinaryFunction binary_op)
|
||||
|
||||
{
|
||||
#if defined(__CUDACC_VER__) && (__CUDACC_VER__ >= 70500) && (__CUDACC_VER__ < 80000)
|
||||
::thrust::pair<OutputIterator1,OutputIterator2> result = thrust::reduce_by_key(thrust::cuda::par, keys_first.get(), keys_last.get(), values_first, keys_output, values_output, binary_pred, binary_op);
|
||||
cudaDeviceSynchronize();
|
||||
return result;
|
||||
#else
|
||||
return thrust::reduce_by_key(thrust::cuda::par, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename InputIterator1,
|
||||
typename InputIterator2,
|
||||
typename OutputIterator1,
|
||||
typename OutputIterator2,
|
||||
typename BinaryPredicate,
|
||||
typename BinaryFunction>
|
||||
__host__
|
||||
::thrust::pair<OutputIterator1,OutputIterator2>
|
||||
reduce_by_key(const vtkm_cuda_policy &exec,
|
||||
InputIterator1 keys_first,
|
||||
InputIterator1 keys_last,
|
||||
InputIterator2 values_first,
|
||||
OutputIterator1 keys_output,
|
||||
OutputIterator2 values_output,
|
||||
BinaryPredicate binary_pred,
|
||||
BinaryFunction binary_op)
|
||||
|
||||
{
|
||||
#if defined(__CUDACC_VER__) && (__CUDACC_VER__ >= 70500) && (__CUDACC_VER__ < 80000)
|
||||
::thrust::pair<OutputIterator1,OutputIterator2> result = thrust::reduce_by_key(thrust::cuda::par, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op);
|
||||
cudaDeviceSynchronize();
|
||||
return result;
|
||||
#else
|
||||
return thrust::reduce_by_key(thrust::cuda::par, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user