diff --git a/docs/changelog/hints.md b/docs/changelog/hints.md
new file mode 100644
index 000000000..10f9bf47a
--- /dev/null
+++ b/docs/changelog/hints.md
@@ -0,0 +1,32 @@
+# Add hints to device adapter scheduler
+
+The `DeviceAdapter` provides an abstract interface to the accelerator
+devices worklets and other algorithms run on. As such, the programmer has
+less control about how the device launches each worklet. Each device
+adapter has its own configuration parameters and other ways to attempt to
+optimize how things are run, but these are always a universal set of
+options that are applied to everything run on the device. There is no way
+to specify launch parameters for a particular worklet.
+
+To provide this information, VTK-m now supports `Hint`s to the device
+adapter. The `DeviceAdapterAlgorithm::Schedule` method takes a templated
+argument that is of the type `HintList`. This object contains a template
+list of `Hint` types that provide suggestions on how to launch the parallel
+execution. The device adapter will pick out hints that pertain to it and
+adjust its launching accordingly.
+
+These are called hints rather than, say, directives, because they don't
+force the device adapter to do anything. The device adapter is free to
+ignore any (and all) hints. The point is that the device adapter can take
+into account the information to try to optimize for itself.
+
+A provided hint can be tied to specific device adapters. In this way, an
+worklet can further optimize itself. If multiple hints match a device
+adapter, the last one in the list will be selected.
+
+The `Worklet` base now has an internal type named `Hints` that points to a
+`HintList` that is applied when the worklet is scheduled. Derived worklet
+classes can provide hints by simply defining their own `Hints` type.
+
+This feature is experimental and consequently hidden in an `internal`
+namespace.
diff --git a/vtkm/cont/Algorithm.h b/vtkm/cont/Algorithm.h
index e8002abfc..d197b8c89 100644
--- a/vtkm/cont/Algorithm.h
+++ b/vtkm/cont/Algorithm.h
@@ -17,6 +17,7 @@
 #include <vtkm/cont/ExecutionObjectBase.h>
 #include <vtkm/cont/Token.h>
 #include <vtkm/cont/TryExecute.h>
+#include <vtkm/cont/internal/Hints.h>
 
 
 namespace vtkm
@@ -932,29 +933,43 @@ struct Algorithm
     ScanExtended(vtkm::cont::DeviceAdapterTagAny(), input, output, binaryFunctor, initialValue);
   }
 
-
-  template <class Functor>
+  // Should this be deprecated in favor of `RuntimeDeviceTracker`?
+  template <typename Functor>
   VTKM_CONT static void Schedule(vtkm::cont::DeviceAdapterId devId,
                                  Functor functor,
                                  vtkm::Id numInstances)
   {
-    vtkm::cont::TryExecuteOnDevice(devId, detail::ScheduleFunctor(), functor, numInstances);
+    vtkm::cont::TryExecuteOnDevice(devId, detail::ScheduleFunctor{}, functor, numInstances);
   }
-  template <class Functor>
+  template <typename... Hints, typename Functor>
+  VTKM_CONT static void Schedule(vtkm::cont::internal::HintList<Hints...> hints,
+                                 Functor functor,
+                                 vtkm::Id numInstances)
+  {
+    vtkm::cont::TryExecute(detail::ScheduleFunctor{}, hints, functor, numInstances);
+  }
+  template <typename Functor>
   VTKM_CONT static void Schedule(Functor functor, vtkm::Id numInstances)
   {
-    Schedule(vtkm::cont::DeviceAdapterTagAny(), functor, numInstances);
+    Schedule(vtkm::cont::DeviceAdapterTagAny{}, functor, numInstances);
   }
 
 
-  template <class Functor>
+  template <typename Functor>
   VTKM_CONT static void Schedule(vtkm::cont::DeviceAdapterId devId,
                                  Functor functor,
                                  vtkm::Id3 rangeMax)
   {
     vtkm::cont::TryExecuteOnDevice(devId, detail::ScheduleFunctor(), functor, rangeMax);
   }
-  template <class Functor>
+  template <typename... Hints, typename Functor>
+  VTKM_CONT static void Schedule(vtkm::cont::internal::HintList<Hints...> hints,
+                                 Functor functor,
+                                 vtkm::Id3 rangeMax)
+  {
+    vtkm::cont::TryExecute(detail::ScheduleFunctor{}, hints, functor, rangeMax);
+  }
+  template <typename Functor>
   VTKM_CONT static void Schedule(Functor functor, vtkm::Id3 rangeMax)
   {
     Schedule(vtkm::cont::DeviceAdapterTagAny(), functor, rangeMax);
diff --git a/vtkm/cont/CMakeLists.txt b/vtkm/cont/CMakeLists.txt
index 5ff482a1f..0c19f40a6 100644
--- a/vtkm/cont/CMakeLists.txt
+++ b/vtkm/cont/CMakeLists.txt
@@ -283,6 +283,11 @@ vtkm_library( NAME vtkm_cont
               DEVICE_SOURCES ${device_sources}
             )
 
+target_sources(vtkm_cont
+  PRIVATE
+    internal/Hints.h
+)
+
 add_subdirectory(internal)
 add_subdirectory(arg)
 
diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.cu b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.cu
index beeeef528..f8915f162 100644
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.cu
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.cu
@@ -203,7 +203,8 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::CheckForErrors()
 void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThreads(
   vtkm::UInt32& blocks,
   vtkm::UInt32& threadsPerBlock,
-  vtkm::Id size)
+  vtkm::Id size,
+  vtkm::IdComponent maxThreadsPerBlock)
 {
   (void)size;
   vtkm::cont::cuda::internal::SetupKernelSchedulingParameters();
@@ -215,12 +216,17 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
   const auto& params = cuda::internal::scheduling_1d_parameters[static_cast<size_t>(deviceId)];
   blocks = static_cast<vtkm::UInt32>(params.first);
   threadsPerBlock = static_cast<vtkm::UInt32>(params.second);
+  if ((maxThreadsPerBlock > 0) && (threadsPerBlock < static_cast<vtkm::UInt32>(maxThreadsPerBlock)))
+  {
+    threadsPerBlock = static_cast<vtkm::UInt32>(maxThreadsPerBlock);
+  }
 }
 
 void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThreads(
   vtkm::UInt32& blocks,
   dim3& threadsPerBlock,
-  const dim3& size)
+  const dim3& size,
+  vtkm::IdComponent maxThreadsPerBlock)
 {
   vtkm::cont::cuda::internal::SetupKernelSchedulingParameters();
 
@@ -240,6 +246,27 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
     blocks = static_cast<vtkm::UInt32>(params.first);
     threadsPerBlock = params.second;
   }
+
+  if (maxThreadsPerBlock > 0)
+  {
+    while ((threadsPerBlock.x * threadsPerBlock.y * threadsPerBlock.z) >
+           static_cast<vtkm::UInt32>(maxThreadsPerBlock))
+    {
+      // Reduce largest element until threads are small enough.
+      if (threadsPerBlock.x > threadsPerBlock.y)
+      {
+        threadsPerBlock.x /= 2;
+      }
+      else if (threadsPerBlock.y > threadsPerBlock.z)
+      {
+        threadsPerBlock.y /= 2;
+      }
+      else
+      {
+        threadsPerBlock.z /= 2;
+      }
+    }
+  }
 }
 
 void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::LogKernelLaunch(
diff --git a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
index 21773c441..859a4be0e 100644
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
@@ -1654,10 +1654,24 @@ public:
   VTKM_CONT_EXPORT
   static void GetBlocksAndThreads(vtkm::UInt32& blocks,
                                   vtkm::UInt32& threadsPerBlock,
-                                  vtkm::Id size);
+                                  vtkm::Id size,
+                                  vtkm::IdComponent maxThreadsPerBlock);
 
   VTKM_CONT_EXPORT
-  static void GetBlocksAndThreads(vtkm::UInt32& blocks, dim3& threadsPerBlock, const dim3& size);
+  static void GetBlocksAndThreads(vtkm::UInt32& blocks,
+                                  dim3& threadsPerBlock,
+                                  const dim3& size,
+                                  vtkm::IdComponent maxThreadsPerBlock);
+
+  template <typename... Hints, typename... Args>
+  static void GetBlocksAndThreads(vtkm::cont::internal::HintList<Hints...>, Args&&... args)
+  {
+    using ThreadsPerBlock =
+      vtkm::cont::internal::HintFind<vtkm::cont::internal::HintList<Hints...>,
+                                     vtkm::cont::internal::HintThreadsPerBlock<0>,
+                                     vtkm::cont::DeviceAdapterTagCuda>;
+    GetBlocksAndThreads(std::forward<Args>(args)..., ThreadsPerBlock::MaxThreads);
+  }
 
   VTKM_CONT_EXPORT
   static void LogKernelLaunch(const cudaFuncAttributes& func_attrs,
@@ -1674,8 +1688,8 @@ public:
                               const dim3& size);
 
 public:
-  template <typename WType, typename IType>
-  static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided1D<WType, IType>& functor,
+  template <typename WType, typename IType, typename Hints>
+  static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided1D<WType, IType, Hints>& functor,
                            vtkm::Id numInstances)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@@ -1691,12 +1705,12 @@ public:
     SetupErrorBuffer(functor);
 
     vtkm::UInt32 blocks, threadsPerBlock;
-    GetBlocksAndThreads(blocks, threadsPerBlock, numInstances);
+    GetBlocksAndThreads(Hints{}, blocks, threadsPerBlock, numInstances);
 
 #ifdef VTKM_ENABLE_LOGGING
     if (GetStderrLogLevel() >= vtkm::cont::LogLevel::KernelLaunches)
     {
-      using FunctorType = vtkm::exec::cuda::internal::TaskStrided1D<WType, IType>;
+      using FunctorType = std::decay_t<decltype(functor)>;
       cudaFuncAttributes empty_kernel_attrs;
       VTKM_CUDA_CALL(cudaFuncGetAttributes(&empty_kernel_attrs,
                                            cuda::internal::TaskStrided1DLaunch<FunctorType>));
@@ -1708,8 +1722,8 @@ public:
       functor, numInstances);
   }
 
-  template <typename WType, typename IType>
-  static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided3D<WType, IType>& functor,
+  template <typename WType, typename IType, typename Hints>
+  static void ScheduleTask(vtkm::exec::cuda::internal::TaskStrided3D<WType, IType, Hints>& functor,
                            vtkm::Id3 rangeMax)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@@ -1730,12 +1744,12 @@ public:
 
     vtkm::UInt32 blocks;
     dim3 threadsPerBlock;
-    GetBlocksAndThreads(blocks, threadsPerBlock, ranges);
+    GetBlocksAndThreads(Hints{}, blocks, threadsPerBlock, ranges);
 
 #ifdef VTKM_ENABLE_LOGGING
     if (GetStderrLogLevel() >= vtkm::cont::LogLevel::KernelLaunches)
     {
-      using FunctorType = vtkm::exec::cuda::internal::TaskStrided3D<WType, IType>;
+      using FunctorType = std::decay_t<decltype(functor)>;
       cudaFuncAttributes empty_kernel_attrs;
       VTKM_CUDA_CALL(cudaFuncGetAttributes(&empty_kernel_attrs,
                                            cuda::internal::TaskStrided3DLaunch<FunctorType>));
@@ -1747,25 +1761,39 @@ public:
       functor, rangeMax);
   }
 
-  template <class Functor>
-  VTKM_CONT static void Schedule(Functor functor, vtkm::Id numInstances)
+  template <typename Hints, typename Functor>
+  VTKM_CONT static void Schedule(Hints, Functor functor, vtkm::Id numInstances)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
 
-    vtkm::exec::cuda::internal::TaskStrided1D<Functor, vtkm::internal::NullType> kernel(functor);
+    vtkm::exec::cuda::internal::TaskStrided1D<Functor, vtkm::internal::NullType, Hints> kernel(
+      functor);
 
     ScheduleTask(kernel, numInstances);
   }
 
-  template <class Functor>
-  VTKM_CONT static void Schedule(Functor functor, const vtkm::Id3& rangeMax)
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id numInstances)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
+  }
+
+  template <typename Hints, typename Functor>
+  VTKM_CONT static void Schedule(Hints, Functor functor, const vtkm::Id3& rangeMax)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
 
-    vtkm::exec::cuda::internal::TaskStrided3D<Functor, vtkm::internal::NullType> kernel(functor);
+    vtkm::exec::cuda::internal::TaskStrided3D<Functor, vtkm::internal::NullType, Hints> kernel(
+      functor);
     ScheduleTask(kernel, rangeMax);
   }
 
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 rangeMax)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, rangeMax);
+  }
+
   template <typename T, class Storage>
   VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values)
   {
@@ -1894,20 +1922,26 @@ template <>
 class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagCuda>
 {
 public:
-  template <typename WorkletType, typename InvocationType>
-  static vtkm::exec::cuda::internal::TaskStrided1D<WorkletType, InvocationType>
-  MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id)
+  template <typename Hints, typename WorkletType, typename InvocationType>
+  static vtkm::exec::cuda::internal::TaskStrided1D<WorkletType, InvocationType, Hints>
+  MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id, Hints = Hints{})
   {
-    using Task = vtkm::exec::cuda::internal::TaskStrided1D<WorkletType, InvocationType>;
-    return Task(worklet, invocation);
+    return { worklet, invocation };
   }
 
-  template <typename WorkletType, typename InvocationType>
-  static vtkm::exec::cuda::internal::TaskStrided3D<WorkletType, InvocationType>
-  MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id3)
+  template <typename Hints, typename WorkletType, typename InvocationType>
+  static vtkm::exec::cuda::internal::TaskStrided3D<WorkletType, InvocationType, Hints>
+  MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id3, Hints = Hints{})
   {
-    using Task = vtkm::exec::cuda::internal::TaskStrided3D<WorkletType, InvocationType>;
-    return Task(worklet, invocation);
+    return { worklet, invocation };
+  }
+
+  template <typename WorkletType, typename InvocationType, typename RangeType>
+  VTKM_CONT static auto MakeTask(WorkletType& worklet,
+                                 InvocationType& invocation,
+                                 const RangeType& range)
+  {
+    return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
   }
 };
 }
diff --git a/vtkm/cont/internal/CMakeLists.txt b/vtkm/cont/internal/CMakeLists.txt
index 9146f36ce..74c4459ce 100644
--- a/vtkm/cont/internal/CMakeLists.txt
+++ b/vtkm/cont/internal/CMakeLists.txt
@@ -25,6 +25,7 @@ set(headers
   DeviceAdapterListHelpers.h
   FieldCollection.h
   FunctorsGeneral.h
+  Hints.h
   IteratorFromArrayPortal.h
   KXSort.h
   MapArrayPermutation.h
diff --git a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h
index 9fc50061a..8f9cb794c 100644
--- a/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h
+++ b/vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h
@@ -20,6 +20,7 @@
 #include <vtkm/cont/BitField.h>
 #include <vtkm/cont/Logging.h>
 #include <vtkm/cont/internal/FunctorsGeneral.h>
+#include <vtkm/cont/internal/Hints.h>
 
 #include <vtkm/exec/internal/ErrorMessageBuffer.h>
 #include <vtkm/exec/internal/TaskSingular.h>
@@ -58,20 +59,30 @@ namespace internal
 ///    : DeviceAdapterAlgorithmGeneral<DeviceAdapterAlgorithm<DeviceAdapterTagFoo>,
 ///                                    DeviceAdapterTagFoo>
 /// {
-///   template<class Functor>
-///   VTKM_CONT static void Schedule(Functor functor,
-///                                        vtkm::Id numInstances)
+///   template<typename Hints, typename Functor>
+///   VTKM_CONT static void Schedule(Hints, Functor functor, vtkm::Id numInstances)
 ///   {
 ///     ...
 ///   }
 ///
-///   template<class Functor>
-///   VTKM_CONT static void Schedule(Functor functor,
-///                                        vtkm::Id3 maxRange)
+///   template<typename Functor>
+///   VTKM_CONT static void Schedule(Functor&& functor, vtkm::Id numInstances)
+///   {
+///     Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
+///   }
+///
+///   template<typename Hints, typename Functor>
+///   VTKM_CONT static void Schedule(Hints, Functor functor, vtkm::Id3 maxRange)
 ///   {
 ///     ...
 ///   }
 ///
+///   template<typename Functor>
+///   VTKM_CONT static void Schedule(Functor&& functor, vtkm::Id3 maxRange)
+///   {
+///     Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
+///   }
+///
 ///   VTKM_CONT static void Synchronize()
 ///   {
 ///     ...
diff --git a/vtkm/cont/internal/Hints.h b/vtkm/cont/internal/Hints.h
new file mode 100644
index 000000000..acd35a2f9
--- /dev/null
+++ b/vtkm/cont/internal/Hints.h
@@ -0,0 +1,124 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+#ifndef vtk_m_cont_internal_Hints_h
+#define vtk_m_cont_internal_Hints_h
+
+#include <vtkm/Assert.h>
+#include <vtkm/List.h>
+
+#include <vtkm/cont/DeviceAdapterTag.h>
+
+namespace vtkm
+{
+namespace cont
+{
+namespace internal
+{
+
+/// @brief Representation of a hint for execution.
+///
+/// A hint is a (potentially) device independent parameter that can be used when
+/// scheduling parallel execution on a device. Control-side code can provide hints
+/// when scheduling parallel device execution to provide some context about what
+/// is being run and potentially optimize the algorithm. An implementation for
+/// a device adapter can choose to use or ignore hints. Likewise, a hint can be
+/// attached to a specific list of devices.
+///
+/// This base class is not intended to be used directly. Use one of the
+/// derived hint structures to specify a hint.
+template <typename Derived_, typename Tag_, typename DeviceList_>
+struct HintBase
+{
+  using Derived = Derived_;
+  using Tag = Tag_;
+  using DeviceList = DeviceList_;
+};
+
+struct HintTagThreadsPerBlock
+{
+};
+
+/// @brief Suggest the number of threads to use when scheduling blocks of threads.
+///
+/// Many accelerator devices, particularly GPUs, schedule threads in blocks. This
+/// hint suggests the size of block to use during the scheduling.
+template <vtkm::IdComponent MaxThreads_, typename DeviceList_ = vtkm::ListUniversal>
+struct HintThreadsPerBlock
+  : HintBase<HintThreadsPerBlock<MaxThreads_, DeviceList_>, HintTagThreadsPerBlock, DeviceList_>
+{
+  static constexpr vtkm::IdComponent MaxThreads = MaxThreads_;
+};
+
+/// @brief Container for hints.
+///
+/// When scheduling or invoking a parallel routine, the caller can provide a list
+/// of hints to suggest the best way to execute the routine. This list is provided
+/// as arguments to a `HintList` template and passed as an argument.
+template <typename... Hints>
+struct HintList : vtkm::List<Hints...>
+{
+  using List = vtkm::List<Hints...>;
+};
+
+template <typename T>
+struct IsHintList : std::false_type
+{
+};
+template <typename... Hints>
+struct IsHintList<HintList<Hints...>> : std::true_type
+{
+};
+
+/// @brief Performs a static assert that the given object is a hint list.
+///
+/// If the provided type is a `vtkm::cont::internal::HintList`, then this macro
+/// does nothing. If the type is anything else, a compile error will occur. This
+/// macro is useful for checking that template arguments are an expected hint
+/// list. This helps diagnose improper template use more easily.
+#define VTKM_IS_HINT_LIST(T) VTKM_STATIC_ASSERT(::vtkm::cont::internal::IsHintList<T>::value)
+
+namespace detail
+{
+
+template <typename Device, typename HintTag>
+struct FindHintOperators
+{
+  VTKM_IS_DEVICE_ADAPTER_TAG(Device);
+
+  template <typename Hint>
+  using HintMatches = vtkm::internal::meta::And<std::is_same<typename Hint::Tag, HintTag>,
+                                                vtkm::ListHas<typename Hint::DeviceList, Device>>;
+  template <typename Found, typename Next>
+  using ReduceOperator = typename std::conditional<HintMatches<Next>::value, Next, Found>::type;
+};
+
+} // namespace detail
+
+/// @brief Find a hint of a particular type.
+///
+/// The `HintFind` template can be used to find a hint of a particular type.
+/// `HintFind` is provided a default value to use for a hint, and it returns
+/// a hint in the hint list that matches the type of the provided default and
+/// applies to the provided device tag.
+///
+/// If multiple hints match the type and device, the _last_ one in the list
+/// is returned. Thus, when constructing hint lists, but the more general hints
+/// first and more specific ones last.
+template <typename HList, typename DefaultHint, typename Device>
+using HintFind = vtkm::ListReduce<
+  typename HList::List,
+  detail::FindHintOperators<Device, typename DefaultHint::Tag>::template ReduceOperator,
+  DefaultHint>;
+
+}
+}
+} // namespace vtkm::cont::internal
+
+#endif // vtk_m_cont_internal_Hints_h
diff --git a/vtkm/cont/kokkos/internal/DeviceAdapterAlgorithmKokkos.h b/vtkm/cont/kokkos/internal/DeviceAdapterAlgorithmKokkos.h
index 1a16ef492..73ff14f70 100644
--- a/vtkm/cont/kokkos/internal/DeviceAdapterAlgorithmKokkos.h
+++ b/vtkm/cont/kokkos/internal/DeviceAdapterAlgorithmKokkos.h
@@ -670,9 +670,9 @@ public:
   }
 
   //----------------------------------------------------------------------------
-  template <typename WType, typename IType>
+  template <typename WType, typename IType, typename Hints>
   VTKM_CONT static void ScheduleTask(
-    vtkm::exec::kokkos::internal::TaskBasic1D<WType, IType>& functor,
+    vtkm::exec::kokkos::internal::TaskBasic1D<WType, IType, Hints>& functor,
     vtkm::Id numInstances)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@@ -685,15 +685,22 @@ public:
 
     functor.SetErrorMessageBuffer(GetErrorMessageBufferInstance());
 
-    Kokkos::RangePolicy<vtkm::cont::kokkos::internal::ExecutionSpace, vtkm::Id> policy(
-      vtkm::cont::kokkos::internal::GetExecutionSpaceInstance(), 0, numInstances);
+    constexpr vtkm::IdComponent maxThreadsPerBlock =
+      vtkm::cont::internal::HintFind<Hints,
+                                     vtkm::cont::internal::HintThreadsPerBlock<0>,
+                                     vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads;
+
+    Kokkos::RangePolicy<vtkm::cont::kokkos::internal::ExecutionSpace,
+                        Kokkos::LaunchBounds<maxThreadsPerBlock, 0>,
+                        Kokkos::IndexType<vtkm::Id>>
+      policy(vtkm::cont::kokkos::internal::GetExecutionSpaceInstance(), 0, numInstances);
     Kokkos::parallel_for(policy, functor);
     CheckForErrors(); // synchronizes
   }
 
-  template <typename WType, typename IType>
+  template <typename WType, typename IType, typename Hints>
   VTKM_CONT static void ScheduleTask(
-    vtkm::exec::kokkos::internal::TaskBasic3D<WType, IType>& functor,
+    vtkm::exec::kokkos::internal::TaskBasic3D<WType, IType, Hints>& functor,
     vtkm::Id3 rangeMax)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
@@ -706,7 +713,13 @@ public:
 
     functor.SetErrorMessageBuffer(GetErrorMessageBufferInstance());
 
+    constexpr vtkm::IdComponent maxThreadsPerBlock =
+      vtkm::cont::internal::HintFind<Hints,
+                                     vtkm::cont::internal::HintThreadsPerBlock<0>,
+                                     vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads;
+
     Kokkos::MDRangePolicy<vtkm::cont::kokkos::internal::ExecutionSpace,
+                          Kokkos::LaunchBounds<maxThreadsPerBlock, 0>,
                           Kokkos::Rank<3>,
                           Kokkos::IndexType<vtkm::Id>>
       policy(vtkm::cont::kokkos::internal::GetExecutionSpaceInstance(),
@@ -729,24 +742,38 @@ public:
     CheckForErrors(); // synchronizes
   }
 
-  template <class Functor>
-  VTKM_CONT static void Schedule(Functor functor, vtkm::Id numInstances)
+  template <typename Hints, typename Functor>
+  VTKM_CONT static void Schedule(Hints, Functor functor, vtkm::Id numInstances)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
 
-    vtkm::exec::kokkos::internal::TaskBasic1D<Functor, vtkm::internal::NullType> kernel(functor);
+    vtkm::exec::kokkos::internal::TaskBasic1D<Functor, vtkm::internal::NullType, Hints> kernel(
+      functor);
     ScheduleTask(kernel, numInstances);
   }
 
-  template <class Functor>
-  VTKM_CONT static void Schedule(Functor functor, const vtkm::Id3& rangeMax)
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id numInstances)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
+  }
+
+  template <typename Hints, typename Functor>
+  VTKM_CONT static void Schedule(Hints, Functor functor, const vtkm::Id3& rangeMax)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
 
-    vtkm::exec::kokkos::internal::TaskBasic3D<Functor, vtkm::internal::NullType> kernel(functor);
+    vtkm::exec::kokkos::internal::TaskBasic3D<Functor, vtkm::internal::NullType, Hints> kernel(
+      functor);
     ScheduleTask(kernel, rangeMax);
   }
 
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 rangeMax)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, rangeMax);
+  }
+
   //----------------------------------------------------------------------------
 private:
   template <typename T>
@@ -1020,20 +1047,28 @@ template <>
 class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagKokkos>
 {
 public:
-  template <typename WorkletType, typename InvocationType>
-  VTKM_CONT static vtkm::exec::kokkos::internal::TaskBasic1D<WorkletType, InvocationType>
-  MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id)
+  template <typename Hints, typename WorkletType, typename InvocationType>
+  VTKM_CONT static vtkm::exec::kokkos::internal::TaskBasic1D<WorkletType, InvocationType, Hints>
+  MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id, Hints = Hints{})
   {
-    return vtkm::exec::kokkos::internal::TaskBasic1D<WorkletType, InvocationType>(worklet,
-                                                                                  invocation);
+    return vtkm::exec::kokkos::internal::TaskBasic1D<WorkletType, InvocationType, Hints>(
+      worklet, invocation);
   }
 
-  template <typename WorkletType, typename InvocationType>
-  VTKM_CONT static vtkm::exec::kokkos::internal::TaskBasic3D<WorkletType, InvocationType>
-  MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id3)
+  template <typename Hints, typename WorkletType, typename InvocationType>
+  VTKM_CONT static vtkm::exec::kokkos::internal::TaskBasic3D<WorkletType, InvocationType, Hints>
+  MakeTask(WorkletType& worklet, InvocationType& invocation, vtkm::Id3, Hints = {})
   {
-    return vtkm::exec::kokkos::internal::TaskBasic3D<WorkletType, InvocationType>(worklet,
-                                                                                  invocation);
+    return vtkm::exec::kokkos::internal::TaskBasic3D<WorkletType, InvocationType, Hints>(
+      worklet, invocation);
+  }
+
+  template <typename WorkletType, typename InvocationType, typename RangeType>
+  VTKM_CONT static auto MakeTask(WorkletType& worklet,
+                                 InvocationType& invocation,
+                                 const RangeType& range)
+  {
+    return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
   }
 };
 }
diff --git a/vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h b/vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h
index 7b2079a93..beddb9f75 100644
--- a/vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h
+++ b/vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h
@@ -359,8 +359,8 @@ public:
   VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::openmp::internal::TaskTiling3D& functor,
                                             vtkm::Id3 size);
 
-  template <class FunctorType>
-  VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id numInstances)
+  template <typename Hints, typename FunctorType>
+  VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id numInstances)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
 
@@ -368,8 +368,14 @@ public:
     ScheduleTask(kernel, numInstances);
   }
 
-  template <class FunctorType>
-  VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 rangeMax)
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id numInstances)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
+  }
+
+  template <typename Hints, typename FunctorType>
+  VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id3 rangeMax)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
 
@@ -377,6 +383,12 @@ public:
     ScheduleTask(kernel, rangeMax);
   }
 
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 rangeMax)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, rangeMax);
+  }
+
   VTKM_CONT static void Synchronize()
   {
     // Nothing to do. This device schedules all of its operations using a
@@ -390,21 +402,33 @@ template <>
 class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagOpenMP>
 {
 public:
-  template <typename WorkletType, typename InvocationType>
+  template <typename Hints, typename WorkletType, typename InvocationType>
   static vtkm::exec::openmp::internal::TaskTiling1D MakeTask(const WorkletType& worklet,
                                                              const InvocationType& invocation,
-                                                             vtkm::Id)
+                                                             vtkm::Id,
+                                                             Hints = Hints{})
   {
+    // Currently ignoring hints.
     return vtkm::exec::openmp::internal::TaskTiling1D(worklet, invocation);
   }
 
-  template <typename WorkletType, typename InvocationType>
+  template <typename Hints, typename WorkletType, typename InvocationType>
   static vtkm::exec::openmp::internal::TaskTiling3D MakeTask(const WorkletType& worklet,
                                                              const InvocationType& invocation,
-                                                             vtkm::Id3)
+                                                             vtkm::Id3,
+                                                             Hints = Hints{})
   {
+    // Currently ignoring hints.
     return vtkm::exec::openmp::internal::TaskTiling3D(worklet, invocation);
   }
+
+  template <typename WorkletType, typename InvocationType, typename RangeType>
+  VTKM_CONT static auto MakeTask(WorkletType& worklet,
+                                 InvocationType& invocation,
+                                 const RangeType& range)
+  {
+    return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
+  }
 };
 }
 } // namespace vtkm::cont
diff --git a/vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h b/vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h
index cc187c68b..463b67cc6 100644
--- a/vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h
+++ b/vtkm/cont/serial/internal/DeviceAdapterAlgorithmSerial.h
@@ -400,8 +400,8 @@ public:
   VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::serial::internal::TaskTiling3D& functor,
                                             vtkm::Id3 size);
 
-  template <class FunctorType>
-  VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id size)
+  template <typename Hints, typename FunctorType>
+  VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id size)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
 
@@ -409,8 +409,14 @@ public:
     ScheduleTask(kernel, size);
   }
 
-  template <class FunctorType>
-  VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 size)
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id size)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, size);
+  }
+
+  template <typename Hints, typename FunctorType>
+  VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id3 size)
   {
     VTKM_LOG_SCOPE_FUNCTION(vtkm::cont::LogLevel::Perf);
 
@@ -418,6 +424,12 @@ public:
     ScheduleTask(kernel, size);
   }
 
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 size)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, size);
+  }
+
 private:
   template <typename Vin,
             typename I,
@@ -557,21 +569,33 @@ template <>
 class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagSerial>
 {
 public:
-  template <typename WorkletType, typename InvocationType>
+  template <typename Hints, typename WorkletType, typename InvocationType>
   static vtkm::exec::serial::internal::TaskTiling1D MakeTask(WorkletType& worklet,
                                                              InvocationType& invocation,
-                                                             vtkm::Id)
+                                                             vtkm::Id,
+                                                             Hints = Hints{})
   {
+    // Currently ignoring hints.
     return vtkm::exec::serial::internal::TaskTiling1D(worklet, invocation);
   }
 
-  template <typename WorkletType, typename InvocationType>
+  template <typename Hints, typename WorkletType, typename InvocationType>
   static vtkm::exec::serial::internal::TaskTiling3D MakeTask(WorkletType& worklet,
                                                              InvocationType& invocation,
-                                                             vtkm::Id3)
+                                                             vtkm::Id3,
+                                                             Hints = Hints{})
   {
+    // Currently ignoring hints.
     return vtkm::exec::serial::internal::TaskTiling3D(worklet, invocation);
   }
+
+  template <typename WorkletType, typename InvocationType, typename RangeType>
+  VTKM_CONT static auto MakeTask(WorkletType& worklet,
+                                 InvocationType& invocation,
+                                 const RangeType& range)
+  {
+    return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
+  }
 };
 }
 } // namespace vtkm::cont
diff --git a/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h b/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h
index 687d84a2b..43c00d925 100644
--- a/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h
+++ b/vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h
@@ -259,8 +259,8 @@ public:
   VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::tbb::internal::TaskTiling3D& functor,
                                             vtkm::Id3 size);
 
-  template <class FunctorType>
-  VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id numInstances)
+  template <typename Hints, typename FunctorType>
+  VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id numInstances)
   {
     VTKM_LOG_SCOPE(vtkm::cont::LogLevel::Perf,
                    "Schedule TBB 1D: '%s'",
@@ -270,8 +270,14 @@ public:
     ScheduleTask(kernel, numInstances);
   }
 
-  template <class FunctorType>
-  VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 rangeMax)
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id numInstances)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, numInstances);
+  }
+
+  template <typename Hints, typename FunctorType>
+  VTKM_CONT static inline void Schedule(Hints, FunctorType functor, vtkm::Id3 rangeMax)
   {
     VTKM_LOG_SCOPE(vtkm::cont::LogLevel::Perf,
                    "Schedule TBB 3D: '%s'",
@@ -281,6 +287,12 @@ public:
     ScheduleTask(kernel, rangeMax);
   }
 
+  template <typename FunctorType>
+  VTKM_CONT static inline void Schedule(FunctorType&& functor, vtkm::Id3 rangeMax)
+  {
+    Schedule(vtkm::cont::internal::HintList<>{}, functor, rangeMax);
+  }
+
   //1. We need functions for each of the following
 
 
@@ -421,21 +433,33 @@ template <>
 class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagTBB>
 {
 public:
-  template <typename WorkletType, typename InvocationType>
+  template <typename Hints, typename WorkletType, typename InvocationType>
   static vtkm::exec::tbb::internal::TaskTiling1D MakeTask(WorkletType& worklet,
                                                           InvocationType& invocation,
-                                                          vtkm::Id)
+                                                          vtkm::Id,
+                                                          Hints = Hints{})
   {
+    // Currently ignoring hints.
     return vtkm::exec::tbb::internal::TaskTiling1D(worklet, invocation);
   }
 
-  template <typename WorkletType, typename InvocationType>
+  template <typename Hints, typename WorkletType, typename InvocationType>
   static vtkm::exec::tbb::internal::TaskTiling3D MakeTask(WorkletType& worklet,
                                                           InvocationType& invocation,
-                                                          vtkm::Id3)
+                                                          vtkm::Id3,
+                                                          Hints = Hints{})
   {
+    // Currently ignoring hints.
     return vtkm::exec::tbb::internal::TaskTiling3D(worklet, invocation);
   }
+
+  template <typename WorkletType, typename InvocationType, typename RangeType>
+  VTKM_CONT static auto MakeTask(WorkletType& worklet,
+                                 InvocationType& invocation,
+                                 const RangeType& range)
+  {
+    return MakeTask<vtkm::cont::internal::HintList<>>(worklet, invocation, range);
+  }
 };
 }
 } // namespace vtkm::cont
diff --git a/vtkm/cont/testing/CMakeLists.txt b/vtkm/cont/testing/CMakeLists.txt
index 98ff167a1..850ecd497 100644
--- a/vtkm/cont/testing/CMakeLists.txt
+++ b/vtkm/cont/testing/CMakeLists.txt
@@ -107,6 +107,7 @@ set(unit_tests_device
   UnitTestDataSetPermutation.cxx
   UnitTestDataSetSingleType.cxx
   UnitTestDeviceAdapterAlgorithmDependency.cxx
+  UnitTestHints.cxx
   UnitTestImplicitFunction.cxx
   UnitTestParticleArrayCopy.cxx
   UnitTestPointLocatorSparseGrid.cxx
@@ -131,6 +132,11 @@ endif()
 
 vtkm_unit_tests(SOURCES ${unit_tests} DEVICE_SOURCES ${unit_tests_device})
 
+target_sources(UnitTests_vtkm_cont_testing
+  PRIVATE
+    UnitTestHints.cxx
+)
+
 #add distributed tests i.e.test to run with MPI
 #if MPI is enabled.
 set(mpi_unit_tests
diff --git a/vtkm/cont/testing/UnitTestHints.cxx b/vtkm/cont/testing/UnitTestHints.cxx
new file mode 100644
index 000000000..073005cce
--- /dev/null
+++ b/vtkm/cont/testing/UnitTestHints.cxx
@@ -0,0 +1,108 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+
+#include <vtkm/cont/internal/Hints.h>
+
+#include <vtkm/cont/Algorithm.h>
+#include <vtkm/cont/DeviceAdapter.h>
+
+#include <vtkm/exec/FunctorBase.h>
+
+#include <vtkm/cont/testing/Testing.h>
+
+namespace UnitTestHintNamespace
+{
+
+void CheckFind()
+{
+  std::cout << "Empty list returns default.\n";
+  VTKM_TEST_ASSERT(vtkm::cont::internal::HintFind<vtkm::cont::internal::HintList<>,
+                                                  vtkm::cont::internal::HintThreadsPerBlock<128>,
+                                                  vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads ==
+                   128);
+
+  std::cout << "Find a hint that matches.\n";
+  VTKM_TEST_ASSERT(vtkm::cont::internal::HintFind<
+                     vtkm::cont::internal::HintList<vtkm::cont::internal::HintThreadsPerBlock<128>>,
+                     vtkm::cont::internal::HintThreadsPerBlock<0>,
+                     vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads == 128);
+  VTKM_TEST_ASSERT(
+    vtkm::cont::internal::HintFind<
+      vtkm::cont::internal::HintList<
+        vtkm::cont::internal::HintThreadsPerBlock<128,
+                                                  vtkm::List<vtkm::cont::DeviceAdapterTagKokkos>>>,
+      vtkm::cont::internal::HintThreadsPerBlock<0>,
+      vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads == 128);
+
+  std::cout << "Skip a hint that does not match.\n";
+  VTKM_TEST_ASSERT(
+    (vtkm::cont::internal::HintFind<
+       vtkm::cont::internal::HintList<
+         vtkm::cont::internal::HintThreadsPerBlock<128,
+                                                   vtkm::List<vtkm::cont::DeviceAdapterTagKokkos>>>,
+       vtkm::cont::internal::HintThreadsPerBlock<0>,
+       vtkm::cont::DeviceAdapterTagSerial>::MaxThreads == 0));
+
+  std::cout << "Given a list of hints, pick the last one that matches\n";
+  {
+    using HList = vtkm::cont::internal::HintList<
+      vtkm::cont::internal::HintThreadsPerBlock<64>,
+      vtkm::cont::internal::HintThreadsPerBlock<128, vtkm::List<vtkm::cont::DeviceAdapterTagCuda>>,
+      vtkm::cont::internal::HintThreadsPerBlock<256,
+                                                vtkm::List<vtkm::cont::DeviceAdapterTagKokkos>>>;
+    using HInit = vtkm::cont::internal::HintThreadsPerBlock<0>;
+    VTKM_TEST_ASSERT((vtkm::cont::internal::
+                        HintFind<HList, HInit, vtkm::cont::DeviceAdapterTagSerial>::MaxThreads ==
+                      64));
+    VTKM_TEST_ASSERT(
+      (vtkm::cont::internal::HintFind<HList, HInit, vtkm::cont::DeviceAdapterTagCuda>::MaxThreads ==
+       128));
+    VTKM_TEST_ASSERT((vtkm::cont::internal::
+                        HintFind<HList, HInit, vtkm::cont::DeviceAdapterTagKokkos>::MaxThreads ==
+                      256));
+  }
+}
+
+struct MyFunctor : vtkm::exec::FunctorBase
+{
+  VTKM_EXEC void operator()(vtkm::Id vtkmNotUsed(index)) const
+  {
+    // NOP
+  }
+
+  VTKM_EXEC void operator()(vtkm::Id3 vtkmNotUsed(index)) const
+  {
+    // NOP
+  }
+};
+
+void CheckSchedule()
+{
+  std::cout << "Schedule a functor using hints.\n";
+  // There is no good way to see if the device adapter got or used the hints
+  // as device adapters are free to ignore hints. This just tests that the
+  // hints can be passed.
+  using Hints = vtkm::cont::internal::HintList<vtkm::cont::internal::HintThreadsPerBlock<128>>;
+  vtkm::cont::Algorithm::Schedule(Hints{}, MyFunctor{}, 10);
+  vtkm::cont::Algorithm::Schedule(Hints{}, MyFunctor{}, vtkm::Id3{ 2 });
+}
+
+void Run()
+{
+  CheckFind();
+  CheckSchedule();
+}
+
+} // anonymous UnitTestHintNamespace
+
+int UnitTestHints(int argc, char* argv[])
+{
+  return vtkm::cont::testing::Testing::Run(UnitTestHintNamespace::Run, argc, argv);
+}
diff --git a/vtkm/exec/TaskBase.h b/vtkm/exec/TaskBase.h
index 18bd97339..9de749f4d 100644
--- a/vtkm/exec/TaskBase.h
+++ b/vtkm/exec/TaskBase.h
@@ -12,6 +12,8 @@
 
 #include <vtkm/Types.h>
 
+#include <vtkm/cont/internal/Hints.h>
+
 #include <vtkm/exec/internal/ErrorMessageBuffer.h>
 
 namespace vtkm
diff --git a/vtkm/exec/cuda/internal/TaskStrided.h b/vtkm/exec/cuda/internal/TaskStrided.h
index ee9b5818e..98f55e292 100644
--- a/vtkm/exec/cuda/internal/TaskStrided.h
+++ b/vtkm/exec/cuda/internal/TaskStrided.h
@@ -50,9 +50,11 @@ protected:
   SetErrorBufferSignature SetErrorBufferFunction = nullptr;
 };
 
-template <typename WType, typename IType>
+template <typename WType, typename IType, typename Hints>
 class TaskStrided1D : public TaskStrided
 {
+  VTKM_IS_HINT_LIST(Hints);
+
 public:
   TaskStrided1D(const WType& worklet, const IType& invocation)
     : TaskStrided()
@@ -90,9 +92,11 @@ private:
   const IType Invocation;
 };
 
-template <typename WType>
-class TaskStrided1D<WType, vtkm::internal::NullType> : public TaskStrided
+template <typename WType, typename Hints>
+class TaskStrided1D<WType, vtkm::internal::NullType, Hints> : public TaskStrided
 {
+  VTKM_IS_HINT_LIST(Hints);
+
 public:
   TaskStrided1D(WType& worklet)
     : TaskStrided()
@@ -116,9 +120,11 @@ private:
   typename std::remove_const<WType>::type Worklet;
 };
 
-template <typename WType, typename IType>
+template <typename WType, typename IType, typename Hints>
 class TaskStrided3D : public TaskStrided
 {
+  VTKM_IS_HINT_LIST(Hints);
+
 public:
   TaskStrided3D(const WType& worklet, const IType& invocation)
     : TaskStrided()
@@ -165,9 +171,11 @@ private:
   const IType Invocation;
 };
 
-template <typename WType>
-class TaskStrided3D<WType, vtkm::internal::NullType> : public TaskStrided
+template <typename WType, typename Hints>
+class TaskStrided3D<WType, vtkm::internal::NullType, Hints> : public TaskStrided
 {
+  VTKM_IS_HINT_LIST(Hints);
+
 public:
   TaskStrided3D(WType& worklet)
     : TaskStrided()
diff --git a/vtkm/exec/cuda/testing/UnitTestTaskStrided.cu b/vtkm/exec/cuda/testing/UnitTestTaskStrided.cu
index c79ac2924..76d960a3d 100644
--- a/vtkm/exec/cuda/testing/UnitTestTaskStrided.cu
+++ b/vtkm/exec/cuda/testing/UnitTestTaskStrided.cu
@@ -342,8 +342,8 @@ void TestErrorFunctorInvoke()
       TestExecObject(input.PrepareForInPlace(DeviceAdapter(), token)),
       TestExecObject(output.PrepareForInPlace(DeviceAdapter(), token)));
 
-  using TaskStrided1 =
-    vtkm::exec::cuda::internal::TaskStrided1D<TestWorkletErrorProxy, InvocationType1>;
+  using TaskStrided1 = vtkm::exec::cuda::internal::
+    TaskStrided1D<TestWorkletErrorProxy, InvocationType1, vtkm::cont::internal::HintList<>>;
   TestWorkletErrorProxy worklet;
   InvocationType1 invocation(execObjects);
 
diff --git a/vtkm/exec/kokkos/internal/TaskBasic.h b/vtkm/exec/kokkos/internal/TaskBasic.h
index 8ce8e6fdb..48ca6d86d 100644
--- a/vtkm/exec/kokkos/internal/TaskBasic.h
+++ b/vtkm/exec/kokkos/internal/TaskBasic.h
@@ -24,9 +24,11 @@ namespace kokkos
 namespace internal
 {
 
-template <typename WType, typename IType>
+template <typename WType, typename IType, typename Hints>
 class TaskBasic1D : public vtkm::exec::TaskBase
 {
+  VTKM_IS_HINT_LIST(Hints);
+
 public:
   TaskBasic1D(const WType& worklet, const IType& invocation)
     : Worklet(worklet)
@@ -57,9 +59,11 @@ private:
   IType Invocation;
 };
 
-template <typename WType>
-class TaskBasic1D<WType, vtkm::internal::NullType> : public vtkm::exec::TaskBase
+template <typename WType, typename Hints>
+class TaskBasic1D<WType, vtkm::internal::NullType, Hints> : public vtkm::exec::TaskBase
 {
+  VTKM_IS_HINT_LIST(Hints);
+
 public:
   explicit TaskBasic1D(const WType& worklet)
     : Worklet(worklet)
@@ -78,9 +82,11 @@ private:
   typename std::remove_const<WType>::type Worklet;
 };
 
-template <typename WType, typename IType>
+template <typename WType, typename IType, typename Hints>
 class TaskBasic3D : public vtkm::exec::TaskBase
 {
+  VTKM_IS_HINT_LIST(Hints);
+
 public:
   TaskBasic3D(const WType& worklet, const IType& invocation)
     : Worklet(worklet)
@@ -112,9 +118,11 @@ private:
   IType Invocation;
 };
 
-template <typename WType>
-class TaskBasic3D<WType, vtkm::internal::NullType> : public vtkm::exec::TaskBase
+template <typename WType, typename Hints>
+class TaskBasic3D<WType, vtkm::internal::NullType, Hints> : public vtkm::exec::TaskBase
 {
+  VTKM_IS_HINT_LIST(Hints);
+
 public:
   explicit TaskBasic3D(const WType& worklet)
     : Worklet(worklet)
diff --git a/vtkm/worklet/internal/DispatcherBase.h b/vtkm/worklet/internal/DispatcherBase.h
index 03fbe1bc2..92e1b2fa1 100644
--- a/vtkm/worklet/internal/DispatcherBase.h
+++ b/vtkm/worklet/internal/DispatcherBase.h
@@ -792,7 +792,8 @@ private:
     // vtkm::exec::internal::TaskSingular
     // vtkm::exec::internal::TaskTiling1D
     // vtkm::exec::internal::TaskTiling3D
-    auto task = TaskTypes::MakeTask(this->Worklet, invocation, range);
+    auto task =
+      TaskTypes::MakeTask(this->Worklet, invocation, range, typename WorkletType::Hints{});
     Algorithm::ScheduleTask(task, range);
   }
 };
diff --git a/vtkm/worklet/internal/WorkletBase.h b/vtkm/worklet/internal/WorkletBase.h
index cdf669cbf..2a619719a 100644
--- a/vtkm/worklet/internal/WorkletBase.h
+++ b/vtkm/worklet/internal/WorkletBase.h
@@ -40,6 +40,8 @@
 #include <vtkm/cont/arg/TypeCheckTagCellSet.h>
 #include <vtkm/cont/arg/TypeCheckTagExecObject.h>
 
+#include <vtkm/cont/internal/Hints.h>
+
 #include <vtkm/worklet/MaskNone.h>
 #include <vtkm/worklet/ScatterIdentity.h>
 #include <vtkm/worklet/internal/Placeholders.h>
@@ -136,6 +138,11 @@ public:
   /// everything in the output domain.
   using MaskType = vtkm::worklet::MaskNone;
 
+  /// Worklets can provide hints to the scheduler by defining a `Hints` type that
+  /// resolves to a `vtkm::cont::internal::HintList`. The default hint list is empty
+  /// so that scheduling uses all defaults.
+  using Hints = vtkm::cont::internal::HintList<>;
+
   /// @brief `ControlSignature` tag for whole input arrays.
   ///
   /// The `WholeArrayIn` control signature tag specifies a `vtkm::cont::ArrayHandle`