Added custom CopyIf function

This commit is contained in:
Brent Lessley 2019-01-14 14:35:19 -08:00 committed by Brent Lessley
parent d370674d2b
commit e0f0e4d673
4 changed files with 571 additions and 501 deletions

@ -94,6 +94,13 @@ struct DeviceAdapterAlgorithm
vtkm::cont::ArrayHandle<T, COut>& output,
UnaryPredicate unary_predicate);
template <typename T, typename U, class CIn, class CStencil, class COut>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output,
const vtkm::Id& output_size);
/// \brief Copy the contents of a section of one ArrayHandle to another
///
/// Copies the a range of elements of \c input to \c output. The number of

@ -1142,6 +1142,21 @@ public:
output.Shrink(newSize);
}
template <typename T, typename U, class SIn, class SStencil, class SOut>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<U, SIn>& input,
const vtkm::cont::ArrayHandle<T, SStencil>& stencil,
vtkm::cont::ArrayHandle<U, SOut>& output,
const vtkm::Id& output_size)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::Id size = stencil.GetNumberOfValues();
CopyIfPortal(input.PrepareForInput(DeviceAdapterTagCuda()),
stencil.PrepareForInput(DeviceAdapterTagCuda()),
output.PrepareForOutput(output_size, DeviceAdapterTagCuda()),
::vtkm::NotZeroInitialized()); //yes on the stencil
}
template <typename T, typename U, class SIn, class SOut>
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, SIn>& input,
vtkm::Id inputStartIndex,
@ -1207,6 +1222,22 @@ public:
return CountSetBitsPortal<vtkm::UInt64>(bitsPortal);
}
template <typename T, class SIn, class SVal, class SOut>
//resized array
vtkm::cont::ArrayHandle<U, SOut> temp;
temp.Allocate(copyOutEnd);
CopySubRange(output, 0, outSize, temp);
output = temp;
}
}
CopySubRangePortal(input.PrepareForInput(DeviceAdapterTagCuda()),
inputStartIndex,
numberOfElementsToCopy,
output.PrepareForInPlace(DeviceAdapterTagCuda()),
outputIndex);
return true;
}
template <typename T, class SIn, class SVal, class SOut>
VTKM_CONT static void LowerBounds(const vtkm::cont::ArrayHandle<T, SIn>& input,
const vtkm::cont::ArrayHandle<T, SVal>& values,
@ -1270,17 +1301,10 @@ public:
{
return initialValue;
}
return ReducePortal(
input.PrepareForInput(DeviceAdapterTagCuda()), initialValue, binary_functor);
return ReducePortal(input.PrepareForInput(DeviceAdapterTagCuda()), initialValue, binary_functor);
}
template <typename T,
typename U,
class KIn,
class VIn,
class KOut,
class VOut,
class BinaryFunctor>
template <typename T, typename U, class KIn, class VIn, class KOut, class VOut, class BinaryFunctor>
VTKM_CONT static void ReduceByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<T, KOut>& keys_output,
@ -1423,12 +1447,7 @@ public:
keysPortal, valuesPortal, output.PrepareForOutput(numberOfValues, DeviceAdapterTagCuda()));
}
template <typename T,
typename U,
typename KIn,
typename VIn,
typename VOut,
typename BinaryFunctor>
template <typename T, typename U, typename KIn, typename VIn, typename VOut, typename BinaryFunctor>
VTKM_CONT static void ScanInclusiveByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<U, VOut>& output,
@ -1483,12 +1502,7 @@ public:
vtkm::Add());
}
template <typename T,
typename U,
typename KIn,
typename VIn,
typename VOut,
typename BinaryFunctor>
template <typename T, typename U, typename KIn, typename VIn, typename VOut, typename BinaryFunctor>
VTKM_CONT static void ScanExclusiveByKey(const vtkm::cont::ArrayHandle<T, KIn>& keys,
const vtkm::cont::ArrayHandle<U, VIn>& values,
vtkm::cont::ArrayHandle<U, VOut>& output,
@ -1537,9 +1551,7 @@ public:
static void SetupErrorBuffer(vtkm::exec::cuda::internal::TaskStrided& functor);
VTKM_CONT_EXPORT
static void GetBlocksAndThreads(vtkm::UInt32& blocks,
vtkm::UInt32& threadsPerBlock,
vtkm::Id size);
static void GetBlocksAndThreads(vtkm::UInt32& blocks, vtkm::UInt32& threadsPerBlock, vtkm::Id size);
VTKM_CONT_EXPORT
static void GetBlocksAndThreads(vtkm::UInt32& blocks, dim3& threadsPerBlock, const dim3& size);
@ -1581,8 +1593,8 @@ public:
{
using FunctorType = vtkm::exec::cuda::internal::TaskStrided1D<WType, IType>;
cudaFuncAttributes empty_kernel_attrs;
VTKM_CUDA_CALL(cudaFuncGetAttributes(&empty_kernel_attrs,
cuda::internal::TaskStrided1DLaunch<FunctorType>));
VTKM_CUDA_CALL(
cudaFuncGetAttributes(&empty_kernel_attrs, cuda::internal::TaskStrided1DLaunch<FunctorType>));
LogKernelLaunch(empty_kernel_attrs, typeid(WType), blocks, threadsPerBlock, numInstances);
}
#endif
@ -1618,14 +1630,14 @@ public:
{
using FunctorType = vtkm::exec::cuda::internal::TaskStrided3D<WType, IType>;
cudaFuncAttributes empty_kernel_attrs;
VTKM_CUDA_CALL(cudaFuncGetAttributes(&empty_kernel_attrs,
cuda::internal::TaskStrided3DLaunch<FunctorType>));
VTKM_CUDA_CALL(
cudaFuncGetAttributes(&empty_kernel_attrs, cuda::internal::TaskStrided3DLaunch<FunctorType>));
LogKernelLaunch(empty_kernel_attrs, typeid(WType), blocks, threadsPerBlock, ranges);
}
#endif
cuda::internal::TaskStrided3DLaunch<<<blocks, threadsPerBlock, 0, cudaStreamPerThread>>>(
functor, ranges);
cuda::internal::TaskStrided3DLaunch<<<blocks, threadsPerBlock, 0, cudaStreamPerThread>>>(functor,
ranges);
}
template <class Functor>
@ -1702,8 +1714,7 @@ public:
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::Id newSize =
UniquePortal(values.PrepareForInPlace(DeviceAdapterTagCuda()), binary_compare);
vtkm::Id newSize = UniquePortal(values.PrepareForInPlace(DeviceAdapterTagCuda()), binary_compare);
values.Shrink(newSize);
}
@ -1753,7 +1764,8 @@ public:
VTKM_CUDA_CALL(cudaStreamSynchronize(cudaStreamPerThread));
CheckForErrors();
}
};
}
;
template <>
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagCuda>
@ -1769,19 +1781,3 @@ public:
using Task = vtkm::exec::cuda::internal::TaskStrided1D<WorkletType, InvocationType>;
return Task(worklet, invocation, globalIndexOffset);
}
template <typename WorkletType, typename InvocationType>
static vtkm::exec::cuda::internal::TaskStrided3D<WorkletType, InvocationType> MakeTask(
WorkletType& worklet,
InvocationType& invocation,
vtkm::Id3,
vtkm::Id globalIndexOffset = 0)
{
using Task = vtkm::exec::cuda::internal::TaskStrided3D<WorkletType, InvocationType>;
return Task(worklet, invocation, globalIndexOffset);
}
};
}
} // namespace vtkm::cont
#endif //vtk_m_cont_cuda_internal_DeviceAdapterAlgorithmCuda_h

@ -124,6 +124,44 @@ public:
output.Shrink(writePos);
}
template <typename T, typename U, class CIn, class CStencil, class COut>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output,
const vtkm::Id& output_size)
{
::vtkm::NotZeroInitialized unary_predicate;
CopyIf(input, stencil, output, output_size, unary_predicate);
}
template <typename T, typename U, class CIn, class CStencil, class COut, class UnaryPredicate>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output,
const vtkm::Id& output_size,
UnaryPredicate predicate)
{
vtkm::Id inputSize = input.GetNumberOfValues();
VTKM_ASSERT(inputSize == stencil.GetNumberOfValues());
auto inputPortal = input.PrepareForInput(DeviceAdapterTagSerial());
auto stencilPortal = stencil.PrepareForInput(DeviceAdapterTagSerial());
auto outputPortal = output.PrepareForOutput(output_size, DeviceAdapterTagSerial());
vtkm::Id readPos = 0;
vtkm::Id writePos = 0;
for (; readPos < inputSize; ++readPos)
{
if (predicate(stencilPortal.Get(readPos)))
{
outputPortal.Set(writePos, inputPortal.Get(readPos));
++writePos;
}
}
}
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::Id inputStartIndex,

@ -79,6 +79,35 @@ public:
output.Shrink(outputSize);
}
template <typename T, typename U, class CIn, class CStencil, class COut>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output,
const vtkm::Id& output_size)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
::vtkm::NotZeroInitialized unary_predicate;
CopyIf(input, stencil, output, output_size, unary_predicate);
}
template <typename T, typename U, class CIn, class CStencil, class COut, class UnaryPredicate>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output,
const vtkm::Id& output_size,
UnaryPredicate unary_predicate)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::Id inputSize = input.GetNumberOfValues();
tbb::CopyIfPortals(input.PrepareForInput(DeviceAdapterTagTBB()),
stencil.PrepareForInput(DeviceAdapterTagTBB()),
output.PrepareForOutput(output_size, DeviceAdapterTagTBB()),
unary_predicate);
}
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::Id inputStartIndex,