Merge topic 'hints'

c44f68649 Add hints to device adapter scheduler

Acked-by: Kitware Robot <kwrobot@kitware.com>
Acked-by: Dave Pugmire <dpugmire@gmail.com>
Merge-request: !3189
This commit is contained in:
Kenneth Moreland 2024-02-17 12:47:10 +00:00 committed by Kitware Robot
commit af28ec2766
20 changed files with 598 additions and 102 deletions

32
docs/changelog/hints.md Normal file

@ -0,0 +1,32 @@
# Add hints to device adapter scheduler
The `DeviceAdapter` provides an abstract interface to the accelerator
devices worklets and other algorithms run on. As such, the programmer has
less control about how the device launches each worklet. Each device
adapter has its own configuration parameters and other ways to attempt to
optimize how things are run, but these are always a universal set of
options that are applied to everything run on the device. There is no way
to specify launch parameters for a particular worklet.
To provide this information, VTK-m now supports `Hint`s to the device
adapter. The `DeviceAdapterAlgorithm::Schedule` method takes a templated
argument that is of the type `HintList`. This object contains a template
list of `Hint` types that provide suggestions on how to launch the parallel
execution. The device adapter will pick out hints that pertain to it and
adjust its launching accordingly.
These are called hints rather than, say, directives, because they don't
force the device adapter to do anything. The device adapter is free to
ignore any (and all) hints. The point is that the device adapter can take
into account the information to try to optimize for itself.
A provided hint can be tied to specific device adapters. In this way, an
worklet can further optimize itself. If multiple hints match a device
adapter, the last one in the list will be selected.
The `Worklet` base now has an internal type named `Hints` that points to a
`HintList` that is applied when the worklet is scheduled. Derived worklet
classes can provide hints by simply defining their own `Hints` type.
This feature is experimental and consequently hidden in an `internal`
namespace.

@ -17,6 +17,7 @@
#include <vtkm/cont/ExecutionObjectBase.h>
#include <vtkm/cont/Token.h>
#include <vtkm/cont/TryExecute.h>
#include <vtkm/cont/internal/Hints.h>
namespace vtkm
@ -932,29 +933,43 @@ struct Algorithm
ScanExtended(vtkm::cont::DeviceAdapterTagAny(), input, output, binaryFunctor, initialValue);
}
template <class Functor>
// Should this be deprecated in favor of `RuntimeDeviceTracker`?
template <typename Functor>
VTKM_CONT static void Schedule(vtkm::cont::DeviceAdapterId devId,
Functor functor,
vtkm::Id numInstances)
{
vtkm::cont::TryExecuteOnDevice(devId, detail::ScheduleFunctor(), functor, numInstances);
vtkm::cont::TryExecuteOnDevice(devId, detail::ScheduleFunctor{}, functor, numInstances);
}
template <class Functor>
template <typename... Hints, typename Functor>
VTKM_CONT static void Schedule(vtkm::cont::internal::HintList<Hints...> hints,
Functor functor,
vtkm::Id numInstances)
{
vtkm::cont::TryExecute(detail::ScheduleFunctor{}, hints, functor, numInstances);
}
template <typename Functor>
VTKM_CONT static void Schedule(Functor functor, vtkm::Id numInstances)
{
Schedule(vtkm::cont::DeviceAdapterTagAny(), functor, numInstances);
Schedule(vtkm::cont::DeviceAdapterTagAny{}, functor, numInstances);
}
template <class Functor>
template <typename Functor>
VTKM_CONT static void Schedule(vtkm::cont::DeviceAdapterId devId,
Functor functor,
vtkm::Id3 rangeMax)
{
vtkm::cont::TryExecuteOnDevice(devId, detail::ScheduleFunctor(), functor, rangeMax);
}
template <class Functor>
template <typename... Hints, typename Functor>
VTKM_CONT static void Schedule(vtkm::cont::internal::HintList<Hints...> hints,
Functor functor,
vtkm::Id3 rangeMax)
{
vtkm::cont::TryExecute(detail::ScheduleFunctor{}, hints, functor, rangeMax);
}
template <typename Functor>
VTKM_CONT static void Schedule(Functor functor, vtkm::Id3 rangeMax)
{
Schedule(vtkm::cont::DeviceAdapterTagAny(), functor, rangeMax);

@ -283,6 +283,11 @@ vtkm_library( NAME vtkm_cont
DEVICE_SOURCES ${device_sources}
)
target_sources(vtkm_cont
PRIVATE
internal/Hints.h
)
add_subdirectory(internal)
add_subdirectory(arg)

@ -203,7 +203,8 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::CheckForErrors()
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThreads(
vtkm::UInt32& blocks,
vtkm::UInt32& threadsPerBlock,
vtkm::Id size)
vtkm::Id size,
vtkm::IdComponent maxThreadsPerBlock)
{
(void)size;
vtkm::cont::cuda::internal::SetupKernelSchedulingParameters();
@ -215,12 +216,17 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
const auto& params = cuda::internal::scheduling_1d_parameters[static_cast<size_t>(deviceId)];
blocks = static_cast<vtkm::UInt32>(params.first);
threadsPerBlock = static_cast<vtkm::UInt32>(params.second);
if ((maxThreadsPerBlock > 0) && (threadsPerBlock < static_cast<vtkm::UInt32>(maxThreadsPerBlock)))
{
threadsPerBlock = static_cast<vtkm::UInt32>(maxThreadsPerBlock);
}
}
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThreads(
vtkm::UInt32& blocks,
dim3& threadsPerBlock,
const dim3& size)
const dim3& size,
vtkm::IdComponent maxThreadsPerBlock)
{
vtkm::cont::cuda::internal::SetupKernelSchedulingParameters();
@ -240,6 +246,27 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
blocks = static_cast<vtkm::UInt32>(params.first);
threadsPerBlock = params.second;
}
if (maxThreadsPerBlock > 0)
{
while ((threadsPerBlock.x * threadsPerBlock.y * threadsPerBlock.z) >
static_cast<vtkm::UInt32>(maxThreadsPerBlock))
{
// Reduce largest element until threads are small enough.
if (threadsPerBlock.x > threadsPerBlock.y)
{
threadsPerBlock.x /= 2;
}
else if (threadsPerBlock.y > threadsPerBlock.z)
{
threadsPerBlock.y /= 2;
}
else
{
threadsPerBlock.z /= 2;
}
}
}
}
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::LogKernelLaunch(

@ -1654,10 +1654,24 @@ public:
VTKM_CONT_EXPORT
static void GetBlocksAndThreads(vtkm::UInt32& blocks,
vtkm::UInt32& threadsPerBlock,
vtkm::Id size);
vtkm::Id size,
vtkm::IdComponent maxThreadsPerBlock);
VTKM_CONT_EXPORT
static void GetBlocksAndThreads(vtkm::UInt32& blocks, dim3& threadsPerBlock, const dim3& size);
static void GetBlocksAndThreads(vtkm::UInt32& blocks,
dim3& threadsPerBlock,
const dim3& size,
vtkm::IdComponent maxThreadsPerBlock);
template <typename... Hints, typename... Args>
static void GetBlocksAndThreads(vtkm::cont::internal::HintList<Hints...>, Args&&... args)
{
using ThreadsPerBlock =
vtkm::cont::internal::HintFind<vtkm::cont::internal::HintList<Hints...>,
vtkm::cont::internal::HintThreadsPerBlock<0>,
vtkm::cont::DeviceAdapterTagCuda>;
GetBlocksAndThreads(std::forward<Args>(args)..., ThreadsPerBlock::MaxThreads);
}
VTKM_CONT_EXPORT
static void LogKernelLaunch(const cudaFuncAttributes& func_attrs,
@ -1674,8 +1688,8 @@ public:
const dim3& size);
public:
template <typename WType, typename IType>
static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided1D<WType, IType>& functor,
template <typename WType, typename IType, typename Hints>
static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided1D<WType, IType, Hints>& functor,
vtkm::Id numInstances)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@ -1691,12 +1705,12 @@ public:
SetupErrorBuffer(functor);
vtkm::UInt32 blocks, threadsPerBlock;
GetBlocksAndThreads(blocks, threadsPerBlock, numInstances);
GetBlocksAndThreads(Hints{}, blocks, threadsPerBlock, numInstances);
#ifdef VTKM_ENABLE_LOGGING
if (GetStderrLogLevel() >= vtkm::cont::LogLevel::KernelLaunches)
{
using FunctorType = vtkm::exec::cuda::internal::TaskStrided1D<WType, IType>;
using FunctorType = std::decay_t<decltype(functor)>;
cudaFuncAttributes empty_kernel_attrs;
VTKM_CUDA_CALL(cudaFuncGetAttributes(&empty_kernel_attrs,
cuda::internal::TaskStrided1DLaunch<FunctorType>));
@ -1708,8 +1722,8 @@ public:
functor, numInstances);
}
template <typename WType, typename IType>
static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided3D<WType, IType>& functor,
template <typename WType, typename IType, typename Hints>
static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided3D<WType, IType, Hints>& functor,
vtkm::Id3 rangeMax)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@ -1730,12 +1744,12 @@ public:
vtkm::UInt32 blocks;
dim3 threadsPerBlock;
GetBlocksAndThreads(blocks, threadsPerBlock, ranges);
GetBlocksAndThreads(Hints{}, blocks, threadsPerBlock, ranges);
#ifdef VTKM_ENABLE_LOGGING
if (GetStderrLogLevel() >= vtkm::cont::LogLevel::KernelLaunches)
{
using FunctorType = vtkm::exec::cuda::internal::TaskStrided3D<WType, IType>;
using FunctorType = std::decay_t<decltype(functor)>;
cudaFuncAttributes empty_kernel_attrs;
VTKM_CUDA_CALL(cudaFuncGetAttributes(&empty_kernel_attrs,
cuda::internal::TaskStrided3DLaunch<FunctorType>));
@ -1747,25 +1761,39 @@ public:
functor, rangeMax);
}
template <class Functor>
VTKM_CONT static void Schedule(Functor functor, vtkm::Id numInstances)
template <typename Hints, typename Functor>
VTKM_CONT static void Schedule(Hints, Functor functor, vtkm::Id numInstances)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::exec::cuda::internal::TaskStrided1D<Functor, vtkm::internal::NullType> kernel(functor);
vtkm::exec::cuda::internal::TaskStrided1D<Functor, vtkm::internal::NullType, Hints> kernel(
functor);
ScheduleTask(kernel, numInstances);
}
template <class Functor>
VTKM_CONT static void Schedule(Functor functor, const vtkm::Id3& rangeMax)
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id numInstances)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
}
template <typename Hints, typename Functor>
VTKM_CONT static void Schedule(Hints, Functor functor, const vtkm::Id3& rangeMax)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::exec::cuda::internal::TaskStrided3D<Functor, vtkm::internal::NullType> kernel(functor);
vtkm::exec::cuda::internal::TaskStrided3D<Functor, vtkm::internal::NullType, Hints> kernel(
functor);
ScheduleTask(kernel, rangeMax);
}
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 rangeMax)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, rangeMax);
}
template <typename T, class Storage>
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values)
{
@ -1894,20 +1922,26 @@ template <>
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagCuda>
{
public:
template <typename WorkletType, typename InvocationType>
static vtkm::exec::cuda::internal::TaskStrided1D<WorkletType, InvocationType>
MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id)
template <typename Hints, typename WorkletType, typename InvocationType>
static vtkm::exec::cuda::internal::TaskStrided1D<WorkletType, InvocationType, Hints>
MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id, Hints = Hints{})
{
using Task = vtkm::exec::cuda::internal::TaskStrided1D<WorkletType, InvocationType>;
return Task(worklet, invocation);
return { worklet, invocation };
}
template <typename WorkletType, typename InvocationType>
static vtkm::exec::cuda::internal::TaskStrided3D<WorkletType, InvocationType>
MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id3)
template <typename Hints, typename WorkletType, typename InvocationType>
static vtkm::exec::cuda::internal::TaskStrided3D<WorkletType, InvocationType, Hints>
MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id3, Hints = Hints{})
{
using Task = vtkm::exec::cuda::internal::TaskStrided3D<WorkletType, InvocationType>;
return Task(worklet, invocation);
return { worklet, invocation };
}
template <typename WorkletType, typename InvocationType, typename RangeType>
VTKM_CONT static auto MakeTask(WorkletType& worklet,
InvocationType& invocation,
const RangeType& range)
{
return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
}
};
}

@ -25,6 +25,7 @@ set(headers
DeviceAdapterListHelpers.h
FieldCollection.h
FunctorsGeneral.h
Hints.h
IteratorFromArrayPortal.h
KXSort.h
MapArrayPermutation.h

@ -20,6 +20,7 @@
#include <vtkm/cont/BitField.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
#include <vtkm/cont/internal/Hints.h>
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
#include <vtkm/exec/internal/TaskSingular.h>
@ -58,20 +59,30 @@ namespace internal
/// : DeviceAdapterAlgorithmGeneral<DeviceAdapterAlgorithm<DeviceAdapterTagFoo>,
/// DeviceAdapterTagFoo>
/// {
/// template<class Functor>
/// VTKM_CONT static void Schedule(Functor functor,
/// vtkm::Id numInstances)
/// template<typename Hints, typename Functor>
/// VTKM_CONT static void Schedule(Hints, Functor functor, vtkm::Id numInstances)
/// {
/// ...
/// }
///
/// template<class Functor>
/// VTKM_CONT static void Schedule(Functor functor,
/// vtkm::Id3 maxRange)
/// template<typename Functor>
/// VTKM_CONT static void Schedule(Functor&& functor, vtkm::Id numInstances)
/// {
/// Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
/// }
///
/// template<typename Hints, typename Functor>
/// VTKM_CONT static void Schedule(Hints, Functor functor, vtkm::Id3 maxRange)
/// {
/// ...
/// }
///
/// template<typename Functor>
/// VTKM_CONT static void Schedule(Functor&& functor, vtkm::Id3 maxRange)
/// {
/// Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
/// }
///
/// VTKM_CONT static void Synchronize()
/// {
/// ...

124
vtkm/cont/internal/Hints.h Normal file

@ -0,0 +1,124 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#ifndef vtk_m_cont_internal_Hints_h
#define vtk_m_cont_internal_Hints_h
#include <vtkm/Assert.h>
#include <vtkm/List.h>
#include <vtkm/cont/DeviceAdapterTag.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
/// @brief Representation of a hint for execution.
///
/// A hint is a (potentially) device independent parameter that can be used when
/// scheduling parallel execution on a device. Control-side code can provide hints
/// when scheduling parallel device execution to provide some context about what
/// is being run and potentially optimize the algorithm. An implementation for
/// a device adapter can choose to use or ignore hints. Likewise, a hint can be
/// attached to a specific list of devices.
///
/// This base class is not intended to be used directly. Use one of the
/// derived hint structures to specify a hint.
template <typename Derived_, typename Tag_, typename DeviceList_>
struct HintBase
{
using Derived = Derived_;
using Tag = Tag_;
using DeviceList = DeviceList_;
};
struct HintTagThreadsPerBlock
{
};
/// @brief Suggest the number of threads to use when scheduling blocks of threads.
///
/// Many accelerator devices, particularly GPUs, schedule threads in blocks. This
/// hint suggests the size of block to use during the scheduling.
template <vtkm::IdComponent MaxThreads_, typename DeviceList_ = vtkm::ListUniversal>
struct HintThreadsPerBlock
: HintBase<HintThreadsPerBlock<MaxThreads_, DeviceList_>, HintTagThreadsPerBlock, DeviceList_>
{
static constexpr vtkm::IdComponent MaxThreads = MaxThreads_;
};
/// @brief Container for hints.
///
/// When scheduling or invoking a parallel routine, the caller can provide a list
/// of hints to suggest the best way to execute the routine. This list is provided
/// as arguments to a `HintList` template and passed as an argument.
template <typename... Hints>
struct HintList : vtkm::List<Hints...>
{
using List = vtkm::List<Hints...>;
};
template <typename T>
struct IsHintList : std::false_type
{
};
template <typename... Hints>
struct IsHintList<HintList<Hints...>> : std::true_type
{
};
/// @brief Performs a static assert that the given object is a hint list.
///
/// If the provided type is a `vtkm::cont::internal::HintList`, then this macro
/// does nothing. If the type is anything else, a compile error will occur. This
/// macro is useful for checking that template arguments are an expected hint
/// list. This helps diagnose improper template use more easily.
#define VTKM_IS_HINT_LIST(T) VTKM_STATIC_ASSERT(::vtkm::cont::internal::IsHintList<T>::value)
namespace detail
{
template <typename Device, typename HintTag>
struct FindHintOperators
{
VTKM_IS_DEVICE_ADAPTER_TAG(Device);
template <typename Hint>
using HintMatches = vtkm::internal::meta::And<std::is_same<typename Hint::Tag, HintTag>,
vtkm::ListHas<typename Hint::DeviceList, Device>>;
template <typename Found, typename Next>
using ReduceOperator = typename std::conditional<HintMatches<Next>::value, Next, Found>::type;
};
} // namespace detail
/// @brief Find a hint of a particular type.
///
/// The `HintFind` template can be used to find a hint of a particular type.
/// `HintFind` is provided a default value to use for a hint, and it returns
/// a hint in the hint list that matches the type of the provided default and
/// applies to the provided device tag.
///
/// If multiple hints match the type and device, the _last_ one in the list
/// is returned. Thus, when constructing hint lists, but the more general hints
/// first and more specific ones last.
template <typename HList, typename DefaultHint, typename Device>
using HintFind = vtkm::ListReduce<
typename HList::List,
detail::FindHintOperators<Device, typename DefaultHint::Tag>::template ReduceOperator,
DefaultHint>;
}
}
} // namespace vtkm::cont::internal
#endif // vtk_m_cont_internal_Hints_h

@ -670,9 +670,9 @@ public:
}
//----------------------------------------------------------------------------
template <typename WType, typename IType>
template <typename WType, typename IType, typename Hints>
VTKM_CONT static void ScheduleTask(
vtkm::exec::kokkos::internal::TaskBasic1D<WType, IType>& functor,
vtkm::exec::kokkos::internal::TaskBasic1D<WType, IType, Hints>& functor,
vtkm::Id numInstances)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@ -685,15 +685,22 @@ public:
functor.SetErrorMessageBuffer(GetErrorMessageBufferInstance());
Kokkos::RangePolicy<vtkm::cont::kokkos::internal::ExecutionSpace, vtkm::Id> policy(
vtkm::cont::kokkos::internal::GetExecutionSpaceInstance(), 0, numInstances);
constexpr vtkm::IdComponent maxThreadsPerBlock =
vtkm::cont::internal::HintFind<Hints,
vtkm::cont::internal::HintThreadsPerBlock<0>,
vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads;
Kokkos::RangePolicy<vtkm::cont::kokkos::internal::ExecutionSpace,
Kokkos::LaunchBounds<maxThreadsPerBlock, 0>,
Kokkos::IndexType<vtkm::Id>>
policy(vtkm::cont::kokkos::internal::GetExecutionSpaceInstance(), 0, numInstances);
Kokkos::parallel_for(policy, functor);
CheckForErrors(); // synchronizes
}
template <typename WType, typename IType>
template <typename WType, typename IType, typename Hints>
VTKM_CONT static void ScheduleTask(
vtkm::exec::kokkos::internal::TaskBasic3D<WType, IType>& functor,
vtkm::exec::kokkos::internal::TaskBasic3D<WType, IType, Hints>& functor,
vtkm::Id3 rangeMax)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@ -706,7 +713,13 @@ public:
functor.SetErrorMessageBuffer(GetErrorMessageBufferInstance());
constexpr vtkm::IdComponent maxThreadsPerBlock =
vtkm::cont::internal::HintFind<Hints,
vtkm::cont::internal::HintThreadsPerBlock<0>,
vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads;
Kokkos::MDRangePolicy<vtkm::cont::kokkos::internal::ExecutionSpace,
Kokkos::LaunchBounds<maxThreadsPerBlock, 0>,
Kokkos::Rank<3>,
Kokkos::IndexType<vtkm::Id>>
policy(vtkm::cont::kokkos::internal::GetExecutionSpaceInstance(),
@ -729,24 +742,38 @@ public:
CheckForErrors(); // synchronizes
}
template <class Functor>
VTKM_CONT static void Schedule(Functor functor, vtkm::Id numInstances)
template <typename Hints, typename Functor>
VTKM_CONT static void Schedule(Hints, Functor functor, vtkm::Id numInstances)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::exec::kokkos::internal::TaskBasic1D<Functor, vtkm::internal::NullType> kernel(functor);
vtkm::exec::kokkos::internal::TaskBasic1D<Functor, vtkm::internal::NullType, Hints> kernel(
functor);
ScheduleTask(kernel, numInstances);
}
template <class Functor>
VTKM_CONT static void Schedule(Functor functor, const vtkm::Id3& rangeMax)
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id numInstances)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
}
template <typename Hints, typename Functor>
VTKM_CONT static void Schedule(Hints, Functor functor, const vtkm::Id3& rangeMax)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
vtkm::exec::kokkos::internal::TaskBasic3D<Functor, vtkm::internal::NullType> kernel(functor);
vtkm::exec::kokkos::internal::TaskBasic3D<Functor, vtkm::internal::NullType, Hints> kernel(
functor);
ScheduleTask(kernel, rangeMax);
}
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 rangeMax)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, rangeMax);
}
//----------------------------------------------------------------------------
private:
template <typename T>
@ -1020,20 +1047,28 @@ template <>
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagKokkos>
{
public:
template <typename WorkletType, typename InvocationType>
VTKM_CONT static vtkm::exec::kokkos::internal::TaskBasic1D<WorkletType, InvocationType>
MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id)
template <typename Hints, typename WorkletType, typename InvocationType>
VTKM_CONT static vtkm::exec::kokkos::internal::TaskBasic1D<WorkletType, InvocationType, Hints>
MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id, Hints = Hints{})
{
return vtkm::exec::kokkos::internal::TaskBasic1D<WorkletType, InvocationType>(worklet,
invocation);
return vtkm::exec::kokkos::internal::TaskBasic1D<WorkletType, InvocationType, Hints>(
worklet, invocation);
}
template <typename WorkletType, typename InvocationType>
VTKM_CONT static vtkm::exec::kokkos::internal::TaskBasic3D<WorkletType, InvocationType>
MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id3)
template <typename Hints, typename WorkletType, typename InvocationType>
VTKM_CONT static vtkm::exec::kokkos::internal::TaskBasic3D<WorkletType, InvocationType, Hints>
MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id3, Hints = {})
{
return vtkm::exec::kokkos::internal::TaskBasic3D<WorkletType, InvocationType>(worklet,
invocation);
return vtkm::exec::kokkos::internal::TaskBasic3D<WorkletType, InvocationType, Hints>(
worklet, invocation);
}
template <typename WorkletType, typename InvocationType, typename RangeType>
VTKM_CONT static auto MakeTask(WorkletType& worklet,
InvocationType& invocation,
const RangeType& range)
{
return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
}
};
}

@ -359,8 +359,8 @@ public:
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::openmp::internal::TaskTiling3D& functor,
vtkm::Id3 size);
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id numInstances)
template <typename Hints, typename FunctorType>
VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id numInstances)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@ -368,8 +368,14 @@ public:
ScheduleTask(kernel, numInstances);
}
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 rangeMax)
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id numInstances)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
}
template <typename Hints, typename FunctorType>
VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id3 rangeMax)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@ -377,6 +383,12 @@ public:
ScheduleTask(kernel, rangeMax);
}
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 rangeMax)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, rangeMax);
}
VTKM_CONT static void Synchronize()
{
// Nothing to do. This device schedules all of its operations using a
@ -390,21 +402,33 @@ template <>
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagOpenMP>
{
public:
template <typename WorkletType, typename InvocationType>
template <typename Hints, typename WorkletType, typename InvocationType>
static vtkm::exec::openmp::internal::TaskTiling1D MakeTask(const WorkletType& worklet,
const InvocationType& invocation,
vtkm::Id)
vtkm::Id,
Hints = Hints{})
{
// Currently ignoring hints.
return vtkm::exec::openmp::internal::TaskTiling1D(worklet, invocation);
}
template <typename WorkletType, typename InvocationType>
template <typename Hints, typename WorkletType, typename InvocationType>
static vtkm::exec::openmp::internal::TaskTiling3D MakeTask(const WorkletType& worklet,
const InvocationType& invocation,
vtkm::Id3)
vtkm::Id3,
Hints = Hints{})
{
// Currently ignoring hints.
return vtkm::exec::openmp::internal::TaskTiling3D(worklet, invocation);
}
template <typename WorkletType, typename InvocationType, typename RangeType>
VTKM_CONT static auto MakeTask(WorkletType& worklet,
InvocationType& invocation,
const RangeType& range)
{
return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
}
};
}
} // namespace vtkm::cont

@ -400,8 +400,8 @@ public:
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::serial::internal::TaskTiling3D& functor,
vtkm::Id3 size);
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id size)
template <typename Hints, typename FunctorType>
VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id size)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@ -409,8 +409,14 @@ public:
ScheduleTask(kernel, size);
}
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 size)
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id size)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, size);
}
template <typename Hints, typename FunctorType>
VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id3 size)
{
VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@ -418,6 +424,12 @@ public:
ScheduleTask(kernel, size);
}
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 size)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, size);
}
private:
template <typename Vin,
typename I,
@ -557,21 +569,33 @@ template <>
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagSerial>
{
public:
template <typename WorkletType, typename InvocationType>
template <typename Hints, typename WorkletType, typename InvocationType>
static vtkm::exec::serial::internal::TaskTiling1D MakeTask(WorkletType& worklet,
InvocationType& invocation,
vtkm::Id)
vtkm::Id,
Hints = Hints{})
{
// Currently ignoring hints.
return vtkm::exec::serial::internal::TaskTiling1D(worklet, invocation);
}
template <typename WorkletType, typename InvocationType>
template <typename Hints, typename WorkletType, typename InvocationType>
static vtkm::exec::serial::internal::TaskTiling3D MakeTask(WorkletType& worklet,
InvocationType& invocation,
vtkm::Id3)
vtkm::Id3,
Hints = Hints{})
{
// Currently ignoring hints.
return vtkm::exec::serial::internal::TaskTiling3D(worklet, invocation);
}
template <typename WorkletType, typename InvocationType, typename RangeType>
VTKM_CONT static auto MakeTask(WorkletType& worklet,
InvocationType& invocation,
const RangeType& range)
{
return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
}
};
}
} // namespace vtkm::cont

@ -259,8 +259,8 @@ public:
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::tbb::internal::TaskTiling3D& functor,
vtkm::Id3 size);
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id numInstances)
template <typename Hints, typename FunctorType>
VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id numInstances)
{
VTKM_LOG_SCOPE(vtkm::cont::LogLevel::Perf,
"Schedule TBB 1D: '%s'",
@ -270,8 +270,14 @@ public:
ScheduleTask(kernel, numInstances);
}
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 rangeMax)
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id numInstances)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
}
template <typename Hints, typename FunctorType>
VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id3 rangeMax)
{
VTKM_LOG_SCOPE(vtkm::cont::LogLevel::Perf,
"Schedule TBB 3D: '%s'",
@ -281,6 +287,12 @@ public:
ScheduleTask(kernel, rangeMax);
}
template <typename FunctorType>
VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 rangeMax)
{
Schedule(vtkm::cont::internal::HintList<>{}, functor, rangeMax);
}
//1. We need functions for each of the following
@ -421,21 +433,33 @@ template <>
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagTBB>
{
public:
template <typename WorkletType, typename InvocationType>
template <typename Hints, typename WorkletType, typename InvocationType>
static vtkm::exec::tbb::internal::TaskTiling1D MakeTask(WorkletType& worklet,
InvocationType& invocation,
vtkm::Id)
vtkm::Id,
Hints = Hints{})
{
// Currently ignoring hints.
return vtkm::exec::tbb::internal::TaskTiling1D(worklet, invocation);
}
template <typename WorkletType, typename InvocationType>
template <typename Hints, typename WorkletType, typename InvocationType>
static vtkm::exec::tbb::internal::TaskTiling3D MakeTask(WorkletType& worklet,
InvocationType& invocation,
vtkm::Id3)
vtkm::Id3,
Hints = Hints{})
{
// Currently ignoring hints.
return vtkm::exec::tbb::internal::TaskTiling3D(worklet, invocation);
}
template <typename WorkletType, typename InvocationType, typename RangeType>
VTKM_CONT static auto MakeTask(WorkletType& worklet,
InvocationType& invocation,
const RangeType& range)
{
return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
}
};
}
} // namespace vtkm::cont

@ -107,6 +107,7 @@ set(unit_tests_device
UnitTestDataSetPermutation.cxx
UnitTestDataSetSingleType.cxx
UnitTestDeviceAdapterAlgorithmDependency.cxx
UnitTestHints.cxx
UnitTestImplicitFunction.cxx
UnitTestParticleArrayCopy.cxx
UnitTestPointLocatorSparseGrid.cxx
@ -131,6 +132,11 @@ endif()
vtkm_unit_tests(SOURCES ${unit_tests} DEVICE_SOURCES ${unit_tests_device})
target_sources(UnitTests_vtkm_cont_testing
PRIVATE
UnitTestHints.cxx
)
#add distributed tests i.e.test to run with MPI
#if MPI is enabled.
set(mpi_unit_tests

@ -0,0 +1,108 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/internal/Hints.h>
#include <vtkm/cont/Algorithm.h>
#include <vtkm/cont/DeviceAdapter.h>
#include <vtkm/exec/FunctorBase.h>
#include <vtkm/cont/testing/Testing.h>
namespace UnitTestHintNamespace
{
void CheckFind()
{
std::cout << "Empty list returns default.\n";
VTKM_TEST_ASSERT(vtkm::cont::internal::HintFind<vtkm::cont::internal::HintList<>,
vtkm::cont::internal::HintThreadsPerBlock<128>,
vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads ==
128);
std::cout << "Find a hint that matches.\n";
VTKM_TEST_ASSERT(vtkm::cont::internal::HintFind<
vtkm::cont::internal::HintList<vtkm::cont::internal::HintThreadsPerBlock<128>>,
vtkm::cont::internal::HintThreadsPerBlock<0>,
vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads == 128);
VTKM_TEST_ASSERT(
vtkm::cont::internal::HintFind<
vtkm::cont::internal::HintList<
vtkm::cont::internal::HintThreadsPerBlock<128,
vtkm::List<vtkm::cont::DeviceAdapterTagKokkos>>>,
vtkm::cont::internal::HintThreadsPerBlock<0>,
vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads == 128);
std::cout << "Skip a hint that does not match.\n";
VTKM_TEST_ASSERT(
(vtkm::cont::internal::HintFind<
vtkm::cont::internal::HintList<
vtkm::cont::internal::HintThreadsPerBlock<128,
vtkm::List<vtkm::cont::DeviceAdapterTagKokkos>>>,
vtkm::cont::internal::HintThreadsPerBlock<0>,
vtkm::cont::DeviceAdapterTagSerial>::MaxThreads == 0));
std::cout << "Given a list of hints, pick the last one that matches\n";
{
using HList = vtkm::cont::internal::HintList<
vtkm::cont::internal::HintThreadsPerBlock<64>,
vtkm::cont::internal::HintThreadsPerBlock<128, vtkm::List<vtkm::cont::DeviceAdapterTagCuda>>,
vtkm::cont::internal::HintThreadsPerBlock<256,
vtkm::List<vtkm::cont::DeviceAdapterTagKokkos>>>;
using HInit = vtkm::cont::internal::HintThreadsPerBlock<0>;
VTKM_TEST_ASSERT((vtkm::cont::internal::
HintFind<HList, HInit, vtkm::cont::DeviceAdapterTagSerial>::MaxThreads ==
64));
VTKM_TEST_ASSERT(
(vtkm::cont::internal::HintFind<HList, HInit, vtkm::cont::DeviceAdapterTagCuda>::MaxThreads ==
128));
VTKM_TEST_ASSERT((vtkm::cont::internal::
HintFind<HList, HInit, vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads ==
256));
}
}
struct MyFunctor : vtkm::exec::FunctorBase
{
VTKM_EXEC void operator()(vtkm::Id vtkmNotUsed(index)) const
{
// NOP
}
VTKM_EXEC void operator()(vtkm::Id3 vtkmNotUsed(index)) const
{
// NOP
}
};
void CheckSchedule()
{
std::cout << "Schedule a functor using hints.\n";
// There is no good way to see if the device adapter got or used the hints
// as device adapters are free to ignore hints. This just tests that the
// hints can be passed.
using Hints = vtkm::cont::internal::HintList<vtkm::cont::internal::HintThreadsPerBlock<128>>;
vtkm::cont::Algorithm::Schedule(Hints{}, MyFunctor{}, 10);
vtkm::cont::Algorithm::Schedule(Hints{}, MyFunctor{}, vtkm::Id3{ 2 });
}
void Run()
{
CheckFind();
CheckSchedule();
}
} // anonymous UnitTestHintNamespace
int UnitTestHints(int argc, char* argv[])
{
return vtkm::cont::testing::Testing::Run(UnitTestHintNamespace::Run, argc, argv);
}

@ -12,6 +12,8 @@
#include <vtkm/Types.h>
#include <vtkm/cont/internal/Hints.h>
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
namespace vtkm

@ -50,9 +50,11 @@ protected:
SetErrorBufferSignature SetErrorBufferFunction = nullptr;
};
template <typename WType, typename IType>
template <typename WType, typename IType, typename Hints>
class TaskStrided1D : public TaskStrided
{
VTKM_IS_HINT_LIST(Hints);
public:
TaskStrided1D(const WType& worklet, const IType& invocation)
: TaskStrided()
@ -90,9 +92,11 @@ private:
const IType Invocation;
};
template <typename WType>
class TaskStrided1D<WType, vtkm::internal::NullType> : public TaskStrided
template <typename WType, typename Hints>
class TaskStrided1D<WType, vtkm::internal::NullType, Hints> : public TaskStrided
{
VTKM_IS_HINT_LIST(Hints);
public:
TaskStrided1D(WType& worklet)
: TaskStrided()
@ -116,9 +120,11 @@ private:
typename std::remove_const<WType>::type Worklet;
};
template <typename WType, typename IType>
template <typename WType, typename IType, typename Hints>
class TaskStrided3D : public TaskStrided
{
VTKM_IS_HINT_LIST(Hints);
public:
TaskStrided3D(const WType& worklet, const IType& invocation)
: TaskStrided()
@ -165,9 +171,11 @@ private:
const IType Invocation;
};
template <typename WType>
class TaskStrided3D<WType, vtkm::internal::NullType> : public TaskStrided
template <typename WType, typename Hints>
class TaskStrided3D<WType, vtkm::internal::NullType, Hints> : public TaskStrided
{
VTKM_IS_HINT_LIST(Hints);
public:
TaskStrided3D(WType& worklet)
: TaskStrided()

@ -342,8 +342,8 @@ void TestErrorFunctorInvoke()
TestExecObject(input.PrepareForInPlace(DeviceAdapter(), token)),
TestExecObject(output.PrepareForInPlace(DeviceAdapter(), token)));
using TaskStrided1 =
vtkm::exec::cuda::internal::TaskStrided1D<TestWorkletErrorProxy, InvocationType1>;
using TaskStrided1 = vtkm::exec::cuda::internal::
TaskStrided1D<TestWorkletErrorProxy, InvocationType1, vtkm::cont::internal::HintList<>>;
TestWorkletErrorProxy worklet;
InvocationType1 invocation(execObjects);

@ -24,9 +24,11 @@ namespace kokkos
namespace internal
{
template <typename WType, typename IType>
template <typename WType, typename IType, typename Hints>
class TaskBasic1D : public vtkm::exec::TaskBase
{
VTKM_IS_HINT_LIST(Hints);
public:
TaskBasic1D(const WType& worklet, const IType& invocation)
: Worklet(worklet)
@ -57,9 +59,11 @@ private:
IType Invocation;
};
template <typename WType>
class TaskBasic1D<WType, vtkm::internal::NullType> : public vtkm::exec::TaskBase
template <typename WType, typename Hints>
class TaskBasic1D<WType, vtkm::internal::NullType, Hints> : public vtkm::exec::TaskBase
{
VTKM_IS_HINT_LIST(Hints);
public:
explicit TaskBasic1D(const WType& worklet)
: Worklet(worklet)
@ -78,9 +82,11 @@ private:
typename std::remove_const<WType>::type Worklet;
};
template <typename WType, typename IType>
template <typename WType, typename IType, typename Hints>
class TaskBasic3D : public vtkm::exec::TaskBase
{
VTKM_IS_HINT_LIST(Hints);
public:
TaskBasic3D(const WType& worklet, const IType& invocation)
: Worklet(worklet)
@ -112,9 +118,11 @@ private:
IType Invocation;
};
template <typename WType>
class TaskBasic3D<WType, vtkm::internal::NullType> : public vtkm::exec::TaskBase
template <typename WType, typename Hints>
class TaskBasic3D<WType, vtkm::internal::NullType, Hints> : public vtkm::exec::TaskBase
{
VTKM_IS_HINT_LIST(Hints);
public:
explicit TaskBasic3D(const WType& worklet)
: Worklet(worklet)

@ -792,7 +792,8 @@ private:
// vtkm::exec::internal::TaskSingular
// vtkm::exec::internal::TaskTiling1D
// vtkm::exec::internal::TaskTiling3D
auto task = TaskTypes::MakeTask(this->Worklet, invocation, range);
auto task =
TaskTypes::MakeTask(this->Worklet, invocation, range, typename WorkletType::Hints{});
Algorithm::ScheduleTask(task, range);
}
};

@ -40,6 +40,8 @@
#include <vtkm/cont/arg/TypeCheckTagCellSet.h>
#include <vtkm/cont/arg/TypeCheckTagExecObject.h>
#include <vtkm/cont/internal/Hints.h>
#include <vtkm/worklet/MaskNone.h>
#include <vtkm/worklet/ScatterIdentity.h>
#include <vtkm/worklet/internal/Placeholders.h>
@ -136,6 +138,11 @@ public:
/// everything in the output domain.
using MaskType = vtkm::worklet::MaskNone;
/// Worklets can provide hints to the scheduler by defining a `Hints` type that
/// resolves to a `vtkm::cont::internal::HintList`. The default hint list is empty
/// so that scheduling uses all defaults.
using Hints = vtkm::cont::internal::HintList<>;
/// @brief `ControlSignature` tag for whole input arrays.
///
/// The `WholeArrayIn` control signature tag specifies a `vtkm::cont::ArrayHandle`