diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h index ea19c27ce..461d2a65e 100644 --- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h +++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h @@ -400,9 +400,10 @@ private: vtkm::exec::cuda::internal::WrappedBinaryOperator bop(binary_functor); + try { - result_iterators = ::thrust::reduce_by_key(thrust::cuda::par, + result_iterators = ::thrust::reduce_by_key(vtkm_cuda_policy(), IteratorBegin(keys), IteratorEnd(keys), IteratorBegin(values), diff --git a/vtkm/exec/cuda/internal/ExecutionPolicy.h b/vtkm/exec/cuda/internal/ExecutionPolicy.h index cc7e4b6c3..313462511 100644 --- a/vtkm/exec/cuda/internal/ExecutionPolicy.h +++ b/vtkm/exec/cuda/internal/ExecutionPolicy.h @@ -167,4 +167,58 @@ __host__ __device__ } +template +__host__ + ::thrust::pair + reduce_by_key(const vtkm_cuda_policy &exec, + thrust::system::cuda::pointer keys_first, + thrust::system::cuda::pointer keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) + +{ +#if defined(__CUDACC_VER__) && (__CUDACC_VER__ >= 70500) && (__CUDACC_VER__ < 80000) + ::thrust::pair result = thrust::reduce_by_key(thrust::cuda::par, keys_first.get(), keys_last.get(), values_first, keys_output, values_output, binary_pred, binary_op); + cudaDeviceSynchronize(); + return result; +#else + return thrust::reduce_by_key(thrust::cuda::par, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); +#endif +} + +template +__host__ + ::thrust::pair + reduce_by_key(const vtkm_cuda_policy &exec, + InputIterator1 keys_first, + InputIterator1 keys_last, + InputIterator2 values_first, + OutputIterator1 keys_output, + OutputIterator2 values_output, + BinaryPredicate binary_pred, + BinaryFunction binary_op) + +{ +#if defined(__CUDACC_VER__) && (__CUDACC_VER__ >= 70500) && (__CUDACC_VER__ < 80000) + ::thrust::pair result = thrust::reduce_by_key(thrust::cuda::par, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); + cudaDeviceSynchronize(); + return result; +#else + return thrust::reduce_by_key(thrust::cuda::par, keys_first, keys_last, values_first, keys_output, values_output, binary_pred, binary_op); +#endif +} + #endif