Merge remote-tracking branch 'origin/master' into update-to-v1.9.0

* origin/master: (331 commits) Deprecate old filter base classes and supporting classes Remove use of deprecated features in TransferToOpenGL Do not overthink ComputeNumberOfBlocksPerAxis Revised ComputeNumberOfBlocksPerAxis inspired by Ken Moreland's suggestion Replace the way data is split in contour tree augmented example diy 2022-09-14 (496253d7) Update diy/update.sh Fix Fields and CoordinateSystems of Lagrangian filter outputs Add more features to Threshold Convert LagrangianStructures filter to NewFilter Merge branch 'shading_scalarRenderer' of https://gitlab.kitware.com/nicolemarsaglia/vtk-m into shading_scalarRenderer Limit arguments of ArrayHandle to to type vtkm::Id Fix compile error in debug code due to member name change Explicit comparator for moving NO_SUCH_ELEMENT to end of array Slice should support slicing along non-zero values Switch how InSitu benchmark iterates Add support for Offset in ClipWithImplicitFunction Update paramter constness to follow vtk-m style in Clip.h Fix doxygen groups Fix example of gitlab-sync ...
2024-09-16 17:22:55 +00:00 · 2022-09-27 12:19:48 -04:00 · 2022-09-27 12:19:48 -04:00 · d049e27edc
commit d049e27edc
parent 19bca428a4 af52d63296
970 changed files with 26591 additions and 18420 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -106,7 +106,7 @@
    - .docker_image

 .ubuntu2004_hip_kokkos: &ubuntu2004_hip_kokkos
-  image: "kitware/vtkm:ci-ubuntu2004_hip_kokkos-20210827"
+  image: "kitware/vtkm:ci-ubuntu2004_hip_kokkos-20220620"
  extends:
    - .docker_image

@ -126,9 +126,9 @@
      when: on_success
    - when: never

-.run_master: &run_master
+.run_upstream_branches: &run_upstream_branches
  rules:
-    - if: '$CI_PROJECT_PATH == "vtk/vtk-m" && $CI_COMMIT_BRANCH == "master"'
+    - if: '$CI_PROJECT_PATH == "vtk/vtk-m" && $CI_MERGE_REQUEST_ID == null'
      when: on_success
    - when: never

@ -164,8 +164,9 @@ stages:
  interruptible: true
  before_script:
    - *install_cmake
-    - .gitlab/ci/config/sccache.sh
+    - "cmake -VV -P .gitlab/ci/config/ninja.cmake"
    - export PATH=$PWD/.gitlab:$PATH
+    - .gitlab/ci/config/sccache.sh
    - SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
    - sccache --show-stats
    - .gitlab/ci/config/google_benchmarks.sh
@ -185,6 +186,8 @@ stages:
  interruptible: true
  before_script:
    - *install_cmake
+    - "cmake -VV -P .gitlab/ci/config/ninja.cmake"
+    - export PATH=$PWD/.gitlab:$PATH
  script:
    - "ctest $CTEST_TIMEOUT -VV -S .gitlab/ci/ctest_test.cmake"
  extends:
@ -206,9 +209,9 @@ stages:
      - build/*.png
      - build/*.pnm
      - build/*.pmm
+      - build/junit.xml
    reports:
-      junit:
-        - build/junit.xml
+      junit: build/junit.xml

 .cmake_build_artifacts: &cmake_build_artifacts
  artifacts:
@ -223,17 +226,9 @@ stages:
      - build/config/

      # CTest and CMake install files.
-      # XXX(globbing): Can be simplified with support from
-      # https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
-      #
      # Note: this also captures our CIState.cmake file
      - build/CMakeCache.txt
-      - build/*.cmake
-      - build/*/*.cmake
-      - build/*/*/*.cmake
-      - build/*/*/*/*.cmake
-      - build/*/*/*/*/*.cmake
-      - build/*/*/*/*/*/*.cmake
+      - build/**/*.cmake
      - build/Testing/

      # CDash files.
@ -245,30 +240,22 @@ stages:
    when: always
    paths:
      # The generated regression testing images
-      - build/*.png
-      - build/*.pnm
-      - build/*.pmm
-      - build/*/*.png
-      - build/*/*.pnm
-      - build/*/*.pmm
-      - build/*/*/*.png
-      - build/*/*/*.pnm
-      - build/*/*/*.pmm
-      - build/*/*/*/*.png
-      - build/*/*/*/*.pnm
-      - build/*/*/*/*.pmm
+      - build/**/*.png
+      - build/**/*.pnm
+      - build/**/*.pmm
+      - build/junit.xml
    reports:
-      junit:
-        - build/junit.xml
+      junit: build/junit.xml


 include:
+  - local: '/.gitlab/ci/ascent.yml'
  - local: '/.gitlab/ci/centos7.yml'
  - local: '/.gitlab/ci/centos8.yml'
  - local: '/.gitlab/ci/doxygen.yml'
+  - local: '/.gitlab/ci/macos.yml'
  - local: '/.gitlab/ci/rhel8.yml'
  - local: '/.gitlab/ci/ubuntu1604.yml'
  - local: '/.gitlab/ci/ubuntu1804.yml'
  - local: '/.gitlab/ci/ubuntu2004.yml'
  - local: '/.gitlab/ci/windows10.yml'
-  - local: '/.gitlab/ci/ascent.yml'
--- a/.gitlab/ci/ascent.yml
+++ b/.gitlab/ci/ascent.yml
@ -74,14 +74,6 @@ test:ascent_gcc_cuda:
    # Tests errors to address due to different env/arch in Ascent
    # Refer to issue: https://gitlab.kitware.com/vtk/vtk-m/-/issues/652
    CTEST_EXCLUSIONS: >-
-      UnitTestMathSERIAL
-      UnitTestMathCUDA
-      UnitTestSerialDeviceAdapter
-      UnitTestAverageByKeySERIAL
-      UnitTestKeysSERIAL
-      UnitTestWorkletReduceByKeySERIAL
-      RegressionTestAmrArraysSERIAL
-      RegressionTestAmrArraysCUDA

  before_script:
    # Prep the environment
--- a/.gitlab/ci/config/ccache.cmake
+++ b/.gitlab/ci/config/ccache.cmake
@ -0,0 +1,61 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+
+cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
+
+set(version 4.6.1)
+set(arch x86_64)
+
+if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux")
+  set(sha256sum da1e1781bc1c4b019216fa16391af3e1daaee7e7f49a8ec9b0cdc8a1d05c50e2)
+  set(base_url https://github.com/ccache/ccache/releases/download)
+  set(platform linux)
+  set(extension tar.xz)
+elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin")
+  set(sha256sum 3e36ba8c80fbf7f2b95fe0227b9dd1ca6143d721aab052caf0d5729769138059)
+  set(full_url https://gitlab.kitware.com/utils/ci-utilities/-/package_files/534/download)
+  set(filename ccache)
+  set(extension tar.gz)
+elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
+  set(sha256sum a6c6311973aa3d2aae22424895f2f968e5d661be003b25f1bd854a5c0cd57563)
+  set(base_url https://github.com/ccache/ccache/releases/download)
+  set(platform windows)
+  set(extension zip)
+else()
+  message(FATAL_ERROR "Unrecognized platform ${CMAKE_HOST_SYSTEM_NAME}")
+endif()
+
+if(NOT DEFINED filename)
+  set(filename "ccache-${version}-${platform}-${arch}")
+endif()
+
+set(tarball "${filename}.${extension}")
+
+if(NOT DEFINED full_url)
+  set(full_url "${base_url}/v${version}/${tarball}")
+endif()
+
+file(DOWNLOAD
+  "${full_url}" .gitlab/${tarball}
+  EXPECTED_HASH SHA256=${sha256sum}
+  SHOW_PROGRESS
+  )
+
+execute_process(
+  COMMAND ${CMAKE_COMMAND} -E tar xf ${tarball}
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/.gitlab
+  RESULT_VARIABLE extract_results
+  )
+
+if(extract_results)
+  message(FATAL_ERROR "Extracting `${tarball}` failed: ${extract_results}.")
+endif()
+
+file(RENAME .gitlab/${filename} .gitlab/ccache)
--- a/.gitlab/ci/config/cmake.sh
+++ b/.gitlab/ci/config/cmake.sh
@ -5,28 +5,27 @@ set -x

 version="${1:-3.21.1}"

-# We require CMake >= 3.13 in the CI to support CUDA builds
-readonly -A linuxParamsByVersion=(
-['3.13.5']='e2fd0080a6f0fc1ec84647acdcd8e0b4019770f48d83509e6a5b0b6ea27e5864	Linux'
-['3.21.1']='bf496ce869d0aa8c1f57e4d1a2e50c8f2fb12a6cd7ccb37ad743bb88f6b76a1e	linux'
-)
-
-if [ -z "${linuxParamsByVersion[$version]}" ]
-then
-  echo "Given version ($version) is unsupported"
-  exit 1
-fi
-
 case "$( uname -s )" in
    Linux)
        shatool="sha256sum"
+        # We require CMake >= 3.13 in the CI to support CUDA builds
+        readonly -A linuxParamsByVersion=(
+        ['3.13.5']='e2fd0080a6f0fc1ec84647acdcd8e0b4019770f48d83509e6a5b0b6ea27e5864	Linux'
+        ['3.21.1']='bf496ce869d0aa8c1f57e4d1a2e50c8f2fb12a6cd7ccb37ad743bb88f6b76a1e	linux'
+        )
+
+        if [ -z "${linuxParamsByVersion[$version]}" ]
+        then
+          echo "Given version ($version) is unsupported"
+          exit 1
+        fi
        sha256sum=$(cut -f 1 <<<"${linuxParamsByVersion[$version]}")
        platform=$(cut -f 2 <<<"${linuxParamsByVersion[$version]}")
        arch="x86_64"
        ;;
    Darwin)
        shatool="shasum -a 256"
-        sha256sum="20dbede1d80c1ac80be2966172f8838c3d899951ac4467372f806b386d42ad3c"
+        sha256sum="9dc2978c4d94a44f71336fa88c15bb0eee47cf44b6ece51b10d1dfae95f82279"
        platform="macos"
        arch="universal"
        ;;
--- a/.gitlab/ci/config/initial_config.cmake
+++ b/.gitlab/ci/config/initial_config.cmake
@ -101,7 +101,11 @@ foreach(option IN LISTS options)
  # From turing we set the architecture using the cannonical
  # CMAKE_CUDA_ARCHITECTURES
  elseif(turing STREQUAL option)
-    set(CMAKE_CUDA_ARCHITECTURES "75" CACHE STRING "")
+    if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
+      set(CMAKE_CUDA_ARCHITECTURES "75" CACHE STRING "")
+    else()
+      set(VTKm_CUDA_Architecture "turing" CACHE STRING "")
+    endif()

  elseif(hip STREQUAL option)
    if(CMAKE_VERSION VERSION_LESS_EQUAL 3.20)
@ -140,6 +144,9 @@ foreach(option IN LISTS options)
      if(VTKm_ENABLE_CUDA)
        set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_COMMAND}" CACHE STRING "")
      endif()
+      if(VTKm_ENABLE_KOKKOS_HIP)
+        set(CMAKE_HIP_COMPILER_LAUNCHER "${CCACHE_COMMAND}" CACHE STRING "")
+      endif()
    else()
      message(FATAL_ERROR "CCACHE version [${CCACHE_VERSION}] is <= 4")
    endif()
@ -161,10 +168,6 @@ if(SCCACHE_COMMAND)
  set(CMAKE_C_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
  set(CMAKE_CXX_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")

-  if(DEFINED VTKm_ENABLE_KOKKOS_HIP)
-    set(CMAKE_HIP_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
-  endif()
-
  # Use VTKm_CUDA_Architecture to determine if we need CUDA sccache setup
  # since this will also capture when kokkos is being used with CUDA backing
  if(DEFINED VTKm_CUDA_Architecture OR DEFINED CMAKE_CUDA_ARCHITECTURES)
--- a/.gitlab/ci/config/ninja.cmake
+++ b/.gitlab/ci/config/ninja.cmake
@ -0,0 +1,44 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+
+cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
+
+set(version 1.11.0)
+
+if(CMAKE_HOST_SYSTEM_NAME STREQUAL "Linux")
+  set(sha256sum 9726e730d5b8599f82654dc80265e64a10a8a817552c34153361ed0c017f9f02)
+  set(platform linux)
+elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL "Darwin")
+  set(sha256sum 21915277db59756bfc61f6f281c1f5e3897760b63776fd3d360f77dd7364137f)
+  set(platform mac)
+elseif(CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
+  set(sha256sum d0ee3da143211aa447e750085876c9b9d7bcdd637ab5b2c5b41349c617f22f3b)
+  set(platform win)
+else()
+  message(FATAL_ERROR "Unrecognized platform ${CMAKE_HOST_SYSTEM_NAME}")
+endif()
+
+set(tarball "ninja-${platform}.zip")
+
+file(DOWNLOAD
+  "https://github.com/ninja-build/ninja/releases/download/v${version}/${tarball}" .gitlab/${tarball}
+  EXPECTED_HASH SHA256=${sha256sum}
+  SHOW_PROGRESS
+  )
+
+execute_process(
+  COMMAND ${CMAKE_COMMAND} -E tar xf ${tarball}
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/.gitlab
+  RESULT_VARIABLE extract_results
+  )
+
+if(extract_results)
+  message(FATAL_ERROR "Extracting `${tarball}` failed: ${extract_results}.")
+endif()
--- a/.gitlab/ci/ctest_memcheck.cmake
+++ b/.gitlab/ci/ctest_memcheck.cmake
@ -42,12 +42,18 @@ if (test_exclusions)
  set(test_exclusions "(${test_exclusions})")
 endif ()

+if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.21)
+  set(junit_args OUTPUT_JUNIT "${CTEST_BINARY_DIRECTORY}/junit.xml")
+endif()
+
 # reduced parallel level so we don't exhaust system resources
 ctest_memcheck(
  PARALLEL_LEVEL "4"
  RETURN_VALUE test_result
  EXCLUDE "${test_exclusions}"
-  DEFECT_COUNT defects)
+  DEFECT_COUNT defects
+  ${junit_args}
+  )

 ctest_submit(PARTS Memcheck BUILD_ID build_id)
  message(STATUS "Memcheck submission build_id: ${build_id}")
--- a/.gitlab/ci/ctest_test.cmake
+++ b/.gitlab/ci/ctest_test.cmake
@ -32,7 +32,7 @@ if (test_exclusions)
  set(test_exclusions "(${test_exclusions})")
 endif ()

-if (CMAKE_VERSION VERSION_GREATER 3.21.0)
+if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.21)
  set(junit_args OUTPUT_JUNIT "${CTEST_BINARY_DIRECTORY}/junit.xml")
 endif()

--- a/.gitlab/ci/docker/ubuntu2004/kokkos-hip/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu2004/kokkos-hip/Dockerfile
@ -2,24 +2,35 @@ FROM rocm/dev-ubuntu-20.04
 LABEL maintainer "Vicente Adolfo Bolea Sanchez<vicente.bolea@kitware.com>"

 # Base dependencies for building VTK-m projects
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-      cmake \
-      curl \
-      g++ \
-      git \
-      git-lfs \
-      libmpich-dev \
-      libomp-dev \
-      mpich \
-      ninja-build \
-      rsync \
-      ssh \
-      software-properties-common
+RUN apt update && \
+    DEBIAN_FRONTEND=noninteractive apt install -y --no-install-recommends \
+    curl \
+    g++ \
+    git \
+    git-lfs \
+    libmpich-dev \
+    libomp-dev \
+    mpich \
+    ninja-build \
+    rsync \
+    ssh \
+    && \
+    apt clean

 # Need to run git-lfs install manually on ubuntu based images when using the
 # system packaged version
 RUN git-lfs install

+# Provide CCACHE
+ENV CCACHE_DIR "/ccache"
+ENV PATH "/opt/ccache/bin:${PATH}"
+ARG CCACHE_VERSION=4.6.1
+RUN mkdir /opt/ccache/ && \
+    curl -L https://github.com/ccache/ccache/releases/download/v$CCACHE_VERSION/ccache-$CCACHE_VERSION-linux-x86_64.tar.xz | tar -vxJ && \
+    make -C ccache-$CCACHE_VERSION-linux-x86_64 install prefix=/opt/ccache && \
+    rm -rf ccache-$CCACHE_VERSION-linux-x86_64 && \
+    ccache -z && ccache -s
+
 # Provide CMake
 ARG CMAKE_VERSION=3.21.1
 RUN mkdir /opt/cmake/ && \
@ -33,11 +44,10 @@ ENV CMAKE_PREFIX_PATH "/opt/rocm/lib/cmake:/opt/rocm/lib:${CMAKE_PREFIX_PATH}"
 ENV CMAKE_GENERATOR "Ninja"

 # Build and install Kokkos
-ARG KOKKOS_VERSION=3.4.01
+ARG KOKKOS_VERSION=3.6.00
 COPY kokkos_cmake_config.cmake kokkos_cmake_config.cmake
 RUN curl -L https://github.com/kokkos/kokkos/archive/refs/tags/$KOKKOS_VERSION.tar.gz | tar -xzf - && \
-		cmake -S kokkos-$KOKKOS_VERSION -B build -C kokkos_cmake_config.cmake                          && \
-		cmake --build build -v                                                                         && \
-		sudo cmake --install build
-
-RUN rm -rf build
+    cmake -S kokkos-$KOKKOS_VERSION -B build -C kokkos_cmake_config.cmake                          && \
+    cmake --build build -v                                                                         && \
+    cmake --install build                                                                          && \
+    rm -rf build kokkos-$KOKKOS_VERSION
--- a/.gitlab/ci/macos.yml
+++ b/.gitlab/ci/macos.yml
@ -0,0 +1,83 @@
+# Ad-hoc build that runs in macos machines
+build:macos_xcode13:
+  extends:
+    - .macos_xcode13
+    - .macos_build_tags
+    - .cmake_build_macos
+    - .run_automatically
+
+test:macos_xcode13:
+  extends:
+    - .macos_xcode13
+    - .macos_build_tags
+    - .cmake_test_macos
+    - .run_automatically
+  needs:
+    - build:macos_xcode13
+  dependencies:
+    - build:macos_xcode13
+
+.macos_xcode13:
+  variables:
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    CMAKE_GENERATOR: Ninja
+
+    CC: gcc
+    CXX: g++
+    DEVELOPER_DIR: "/Applications/Xcode-13.3.app/Contents/Developer"
+    VTKM_SETTINGS: "64bit_floats+shared+ccache"
+
+.cmake_build_macos:
+  stage: build
+  interruptible: true
+  variables:
+    CCACHE_BASEDIR: "$CI_PROJECT_DIR"
+    CCACHE_DIR: "$CI_PROJECT_DIR/ccache"
+
+    # -isystem= is not affected by CCACHE_BASEDIR, thus we must ignore it
+    CCACHE_IGNOREOPTIONS: "-isystem=*"
+    CCACHE_COMPILERCHECK: "content"
+    CCACHE_NOHASHDIR: "true"
+    CCACHE_RESHARE: "true"
+  before_script:
+    - .gitlab/ci/config/cmake.sh
+    - export PATH=$PWD/.gitlab/cmake/bin:$PATH
+    - "cmake -VV -P .gitlab/ci/config/ccache.cmake"
+    - export PATH=$PWD/.gitlab/ccache:$PATH
+    - "cmake -VV -P .gitlab/ci/config/ninja.cmake"
+    - export PATH=$PWD/.gitlab:$PATH
+    - "cmake --version"
+    - "ccache --version"
+    - "ninja --version"
+    - "cmake -V -P .gitlab/ci/config/fetch_vtkm_tags.cmake"
+    - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
+    - "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
+  script:
+    - "ctest -VV -S .gitlab/ci/ctest_build.cmake"
+  after_script:
+    - ccache -v -s
+    - ccache -z
+  extends:
+    - .cmake_build_artifacts
+
+.cmake_test_macos:
+  stage: test
+  interruptible: true
+  before_script:
+    - .gitlab/ci/config/cmake.sh
+    - export PATH=.gitlab/cmake/bin:$PATH
+    - "cmake -VV -P .gitlab/ci/config/ninja.cmake"
+    - export PATH=$PWD/.gitlab:$PATH
+    - cmake --version
+    - ninja --version
+  script:
+    - "ctest $CTEST_TIMEOUT -VV -S .gitlab/ci/ctest_test.cmake"
+  extends:
+    - .cmake_test_artifacts
+
+.macos_build_tags:
+  tags:
+    - vtk-m
+    - macos
+    - xcode-13.3
+    - nonconcurrent
--- a/.gitlab/ci/ubuntu1604.yml
+++ b/.gitlab/ci/ubuntu1604.yml
@ -50,7 +50,7 @@ build:ubuntu1604_gcc5_2:
  extends:
    - .ubuntu1604_cuda
    - .cmake_build_linux
-    - .run_master
+    - .run_upstream_branches
    - .use_minimum_supported_cmake
  variables:
    CC: "gcc-5"
@ -69,7 +69,7 @@ test:ubuntu1804_test_ubuntu1604_gcc5_2:
  extends:
    - .ubuntu1804_cuda
    - .cmake_test_linux
-    - .run_master
+    - .run_upstream_branches
  variables:
      CTEST_EXCLUSIONS: "built_against_test_install"
  dependencies:
--- a/.gitlab/ci/ubuntu1804.yml
+++ b/.gitlab/ci/ubuntu1804.yml
@ -91,7 +91,7 @@ build:ubuntu1804_clang_cuda:
    - .ubuntu1804_cuda
    - .cmake_build_linux
    - .run_automatically
-    # - .run_master
+    # - .run_upstream_branches
  variables:
    CC: "clang-8"
    CXX: "clang++-8"
@ -110,7 +110,7 @@ test:ubuntu1804_clang_cuda:
    - .ubuntu1804_cuda
    - .cmake_test_linux
    - .run_automatically
-    # - .run_master
+    # - .run_upstream_branches
  dependencies:
    - build:ubuntu1804_clang_cuda
  needs:
--- a/.gitlab/ci/ubuntu2004.yml
+++ b/.gitlab/ci/ubuntu2004.yml
@ -69,13 +69,23 @@ build:ubuntu2004_hip_kokkos:
  extends:
    - .ubuntu2004_hip_kokkos
    - .cmake_build_linux
-    - .run_scheduled
+    - .run_upstream_branches
  variables:
    CMAKE_BUILD_TYPE: RelWithDebInfo
-    VTKM_SETTINGS: "benchmarks+kokkos+hip+no_virtual+no_rendering"
+    VTKM_SETTINGS: "benchmarks+kokkos+hip+no_virtual+no_rendering+ccache"
    CMAKE_PREFIX_PATH: "/opt/rocm/lib/cmake"
-    CTEST_MAX_PARALLELISM: "1"
-  timeout: 12 hours
+
+    CCACHE_BASEDIR: "$CI_PROJECT_DIR"
+
+    # -isystem= is not affected by CCACHE_BASEDIR, thus we must ignore it
+    CCACHE_IGNOREOPTIONS: "-isystem=*"
+    CCACHE_COMPILERCHECK: "content"
+    CCACHE_NOHASHDIR: "true"
+    CCACHE_RESHARE: "true"
+  after_script:
+    - ccache -v -s
+    - ccache -z
+  timeout: 10 hours

 test:ubuntu2004_hip_kokkos:
  tags:
@ -86,7 +96,7 @@ test:ubuntu2004_hip_kokkos:
  extends:
    - .ubuntu2004_hip_kokkos
    - .cmake_test_linux
-    - .run_scheduled
+    - .run_upstream_branches
  variables:
    CTEST_TIMEOUT: "30"
  dependencies:
--- a/.gitlab/ci/windows10.yml
+++ b/.gitlab/ci/windows10.yml
@ -29,8 +29,12 @@
    - Invoke-Expression -Command .gitlab/ci/config/cmake.ps1
    - Invoke-Expression -Command .gitlab/ci/config/vcvarsall.ps1
    - $pwdpath = $pwd.Path
-    - Set-Item -Force -Path "env:PATH" -Value "$pwdpath\.gitlab;$pwdpath\.gitlab\cmake\bin;$env:PATH"
+    - Set-Item -Force -Path "env:PATH" -Value "$pwdpath\.gitlab\cmake\bin;$env:PATH"
    - "cmake --version"
+    - "cmake -V -P .gitlab/ci/config/ccache.cmake"
+    - Set-Item -Force -Path "env:PATH" -Value "$pwdpath\.gitlab\ccache;$env:PATH"
+    - "cmake -V -P .gitlab/ci/config/ninja.cmake"
+    - Set-Item -Force -Path "env:PATH" -Value "$pwdpath\.gitlab;$env:PATH"
    - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
    - "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
  script:
@ -47,17 +51,9 @@
      - build/config/

      # CTest and CMake install files.
-      # XXX(globbing): Can be simplified with support from
-      # https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
-      #
      # Note: this also captures our CIState.cmake file
      - build/CMakeCache.txt
-      - build/*.cmake
-      - build/*/*.cmake
-      - build/*/*/*.cmake
-      - build/*/*/*/*.cmake
-      - build/*/*/*/*/*.cmake
-      - build/*/*/*/*/*/*.cmake
+      - build/**/*.cmake
      - build/Testing/

      # CDash files.
@ -75,7 +71,10 @@
    - Invoke-Expression -Command .gitlab/ci/config/cmake.ps1
    - Invoke-Expression -Command .gitlab/ci/config/vcvarsall.ps1
    - $pwdpath = $pwd.Path
-    - Set-Item -Force -Path "env:PATH" -Value "$pwdpath\.gitlab;$pwdpath\.gitlab\cmake\bin;$env:PATH"
+    - Set-Item -Force -Path "env:PATH" -Value "$pwdpath\.gitlab\cmake\bin;$env:PATH"
+    - "cmake --version"
+    - "cmake -V -P .gitlab/ci/config/ninja.cmake"
+    - Set-Item -Force -Path "env:PATH" -Value "$pwdpath\.gitlab;$env:PATH"
  script:
    - "ctest -VV -S .gitlab/ci/ctest_test.cmake"

--- a/.gitlab/issue_templates/NewRelease.md
+++ b/.gitlab/issue_templates/NewRelease.md
@ -21,46 +21,64 @@ git submodule update --recursive --init
 ## Create update branch

  - [ ] Create update branch `git checkout -b update-to-v@VERSION@`
-<!-- if @RC@ == "-rc1"
+<!-- if @RC@ == "-rc1"-->
  - [ ] Bring as a second parent the history of master (Solve conflicts always
        taking master's version)
 ```
 	git merge --no-ff origin/master
 ```
-->
+<!-- endif -->

-<!-- Do we have new release notes?
+<!-- if not a patch release -->
+  - [ ] Update the major and minor version in `version.txt`:
+```
+echo "@MAJOR@.@MINOR@.9999" > version.txt
+git add version.txt`
+```
+<!-- endif -->
+
+  - [ ] Update the version (not in patch releases) and date in the LICENSE.md
+        file `git add LICENSE.md`.
+  - [ ] Create commit that updates the License (and version.txt if modified):
+```
+git commit -m 'release: update version and License'
+```
+
+<!-- Do we have new release notes? -->
  - [ ] Craft or update [changelog](#generate-change-log)
        `docs/changelog/@VERSION@/release-notes.md` file.
  - [ ] Create release notes commit.
 ```
 git add docs/changelog/@VERSION@/release-notes.md
 git rm docs/changelog/*.md
-git commit -m 'Add release notes for @VERSION@@RC@'
+git commit -m 'release: @VERSION@@RC@ release notes'
 ```
-->
-
-  - [ ] Update the version (not in patch releases) and date in the LICENSE.md
-        file.
+<!-- endif -->
  - [ ] Create update version commit:

 ```
-# Create branch
-git checkout -b update-to-v@VERSION@@RC@
 echo @VERSION@@RC@ > version.txt
+git add version.txt

 # Create commit with the following template
-# Nth is counted by the number of tags
+# Nth is counted by the number of final release tags
 git commit -m '@VERSION@@RC@ is our Nth official release of VTK-m.

 The major changes to VTK-m from (previous release) can be found in:
-  docs/changelog/@VERSION@/release-notes.md' version.txt'
+  docs/changelog/@VERSION@/release-notes.md' version.txt
 ```

  - [ ] `git tag -a -m 'VTKm @VERSION@@RC@' v@VERSION@@RC@ HEAD`
  - Integrate changes to `release` branch
    - [ ] Create a MR using the [release-mr script][1]
          (see [notes](#notes-about-update-mr)).
+<!-- if not patch release -->
+    - [ ] Add (or ensure) at the bottom of the description of the merge request:
+          `Backport: master:HEAD~1`
+<!-- elseif patch release -->
+    - [ ] Remove (or ensure) that at the bottom of the description of the merge
+          request there is no `Backport` instruction.
+<!-- endif -->
    - [ ] Get +1
    - [ ] `Do: merge`
  - Push tags
@ -76,10 +94,14 @@ The major changes to VTK-m from (previous release) can be found in:
  - [ ] Tag new version of the [VTK-m User Guide][2].
 <!-- endif -->
  - [ ] Post an [Email Announcements](#email-announcements) VTK-m mailing list.
+<!-- if not patch release -->
+  - [ ] Ensure that the content of `version.txt` in master is
+        `[@MAJOR@ @MINOR@](@MAJOR@.@MINOR@.9999)`.
+<!-- endif release -->

 ---

-# Annex 
+# Annex

 ## Generate change log
 Construct a `docs/changelog/@VERSION@/` folder.
@ -157,6 +179,16 @@ to the relevant `release-notes` section.
 Lastly, `update-mr` can be used multiple times with different commit in the same
 branch.

+## Notes about version.txt
+
+Master and release branch do not share the same version.txt scheme. In the
+release branch the patch and release-candidate version is observed whereas in
+master the patch field is fixed to _9999_ indicating that each of its commit is
+a developing release.
+
+- Master:  `@MAJOR@.@MINOR@.9999`
+- Release: `@MAJOR@.@MINOR@.@PATCH@@RC@`
+
 ## Email Announcements

 Announce the new VTK-m release on the mailing list. You will need to compute
--- a/CMake/testing/VTKmCheckSourceInInstall.cmake
+++ b/CMake/testing/VTKmCheckSourceInInstall.cmake
@ -114,6 +114,8 @@ function(do_verify root_dir prefix)

  set(file_exceptions
    thirdparty/diy/vtkmdiy/cmake/mpi_types.h
+    thirdparty/lodepng/vtkmlodepng/lodepng.h
+    thirdparty/loguru/vtkmloguru/loguru.hpp

    # Ignore deprecated virtual classes (which are not installed if VTKm_NO_DEPRECATED_VIRTUAL
    # is on). These exceptions can be removed when these files are completely removed.
--- a/CMake/testing/VTKmTestWrappers.cmake
+++ b/CMake/testing/VTKmTestWrappers.cmake
@ -13,11 +13,11 @@ include(VTKmWrappers)
 function(vtkm_create_test_executable
  prog_name
  sources
+  device_sources
  libraries
  defines
  is_mpi_test
  use_mpi
-  enable_all_backends
  use_job_pool)

  vtkm_diy_use_mpi_push()
@ -41,23 +41,12 @@ function(vtkm_create_test_executable

  #the creation of the test source list needs to occur before the labeling as
  #cuda. This is so that we get the correctly named entry points generated
-  create_test_sourcelist(test_sources ${prog}.cxx ${sources} ${extraArgs})
+  create_test_sourcelist(test_sources ${prog}.cxx ${sources} ${device_sources} ${extraArgs})

-  add_executable(${prog} ${prog}.cxx ${sources})
+  add_executable(${prog} ${test_sources})
  vtkm_add_drop_unused_function_flags(${prog})
  target_compile_definitions(${prog} PRIVATE ${defines})

-  #determine if we have a device that requires a separate compiler enabled
-  set(device_lang_enabled FALSE)
-  if( (TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda) OR (TARGET vtkm::kokkos_hip))
-    set(device_lang_enabled TRUE)
-  endif()
-
-  #if all backends are enabled, we can use the device compiler to handle all possible backends.
-  set(device_sources)
-  if(device_lang_enabled AND (enable_all_backends OR (TARGET vtkm::kokkos_hip)))
-    set(device_sources ${sources})
-  endif()
  vtkm_add_target_information(${prog} DEVICE_SOURCES ${device_sources})

  if(NOT VTKm_USE_DEFAULT_SYMBOL_VISIBILITY)
@ -83,68 +72,89 @@ endfunction()
 # (package, module, whatever you call it).  Usage:
 #
 # vtkm_unit_tests(
-#   NAME
+#   [ NAME <name> ]
 #   SOURCES <source_list>
-#   LIBRARIES <dependent_library_list>
-#   DEFINES <target_compile_definitions>
-#   TEST_ARGS <argument_list>
-#   MPI
-#   ALL_BACKENDS
-#   USE_VTKM_JOB_POOL
-#   <options>
+#   [ DEVICE_SOURCES <source_list> ]
+#   [ LIBRARIES <dependent_library_list> ]
+#   [ DEFINES <target_compile_definitions> ]
+#   [ TEST_ARGS <argument_list> ]
+#   [ MPI ]
+#   [ BACKEND <device> ]
+#   [ ALL_BACKENDS ]
+#   [ USE_VTKM_JOB_POOL ]
 #   )
 #
-# [LIBRARIES] : extra libraries that this set of tests need to link too
+# NAME : Specify the name of the testing executable. If not specified,
+# UnitTests_<kitname> is used.
 #
-# [DEFINES]   : extra defines that need to be set for all unit test sources
+# SOURCES: A list of the source files. Each file is expected to contain a
+# function with the same name as the source file. For example, if SOURCES
+# contains `UnitTestFoo.cxx`, then `UnitTestFoo.cxx` should contain a
+# function named `UnitTestFoo`. A test with this name is also added to ctest.
 #
-# [LABEL]     : CTest Label to associate to this set of tests
+# LIBRARIES: Extra libraries that this set of tests need to link to.
 #
-# [TEST_ARGS] : arguments that should be passed on the command line to the
-#               test executable
+# DEFINES: Extra defines to be set for all unit test sources.
 #
-# [MPI]       : when specified, the tests should be run in parallel if
-#               MPI is enabled. The tests should also be able to build and run
-#               When MPI is not available, i.e., they should not make explicit
-#               use of MPI and instead completely rely on DIY.
-# [ALL_BACKENDS] : when specified, the tests would test against all enabled
-#                  backends. Otherwise we expect the tests to manage the
-#                  backends at runtime.
+# TEST_ARGS: Arguments that should be passed on the command line to the
+# test executable when executed by ctest.
+#
+# MPI: When specified, the tests should be run in parallel if MPI is enabled.
+# The tests should also be able to build and run when MPI is not available,
+# i.e., they should not make explicit use of MPI and instead completely rely
+# on DIY.
+#
+# BACKEND: When used, a specific backend will be forced for the device.
+# A `--vtkm-device` flag will be given to the command line argument with the
+# specified device. When not used, a backend will be chosen.
+#
+# ALL_BACKENDS: When used, a separate ctest test is created for each device
+# that VTK-m is compiled for. The call will add the `--vtkm-device` flag when
+# running the test to force the test for a particular backend.
 #
 function(vtkm_unit_tests)
  set(options)
  set(global_options ${options} USE_VTKM_JOB_POOL MPI ALL_BACKENDS)
  set(oneValueArgs BACKEND NAME LABEL)
-  set(multiValueArgs SOURCES LIBRARIES DEFINES TEST_ARGS)
+  set(multiValueArgs SOURCES DEVICE_SOURCES LIBRARIES DEFINES TEST_ARGS)
  cmake_parse_arguments(VTKm_UT
    "${global_options}" "${oneValueArgs}" "${multiValueArgs}"
    ${ARGN}
    )
  vtkm_parse_test_options(VTKm_UT_SOURCES "${options}" ${VTKm_UT_SOURCES})

-  set(per_device_command_line_arguments "NONE")
-  set(per_device_suffix "")
-  set(per_device_timeout 180)
-  set(per_device_serial FALSE)
+  set(per_device_command_line_arguments)
+  set(per_device_suffix)
+  set(per_device_timeout)
+  set(per_device_serial)

-  set(enable_all_backends ${VTKm_UT_ALL_BACKENDS})
-  if(enable_all_backends)
-    set(per_device_command_line_arguments --vtkm-device=serial)
-    set(per_device_suffix "SERIAL")
-    if (VTKm_ENABLE_CUDA)
+  if(NOT VTKm_UT_BACKEND)
+    set(enable_all_backends ${VTKm_UT_ALL_BACKENDS})
+    # If ALL_BACKENDS is specified, add a test for each backend. If it is not
+    # specified, pick a backend to use. Pick the most "specific" backend so
+    # that different CI builds will use different backends. This ensures that
+    # we do not have a test that always drops down to serial.
+    if(VTKm_ENABLE_CUDA AND (enable_all_backends OR NOT per_device_suffix))
      list(APPEND per_device_command_line_arguments --vtkm-device=cuda)
      list(APPEND per_device_suffix "CUDA")
      #CUDA tests generally require more time because of kernel generation.
      list(APPEND per_device_timeout 1500)
      list(APPEND per_device_serial FALSE)
    endif()
-    if (VTKm_ENABLE_TBB)
+    if(VTKm_ENABLE_KOKKOS AND (enable_all_backends OR NOT per_device_suffix))
+      list(APPEND per_device_command_line_arguments --vtkm-device=kokkos)
+      list(APPEND per_device_suffix "KOKKOS")
+      #may require more time because of kernel generation.
+      list(APPEND per_device_timeout 1500)
+      list(APPEND per_device_serial FALSE)
+    endif()
+    if(VTKm_ENABLE_TBB AND (enable_all_backends OR NOT per_device_suffix))
      list(APPEND per_device_command_line_arguments --vtkm-device=tbb)
      list(APPEND per_device_suffix "TBB")
      list(APPEND per_device_timeout 180)
      list(APPEND per_device_serial FALSE)
    endif()
-    if (VTKm_ENABLE_OPENMP)
+    if(VTKm_ENABLE_OPENMP AND (enable_all_backends OR NOT per_device_suffix))
      list(APPEND per_device_command_line_arguments --vtkm-device=openmp)
      list(APPEND per_device_suffix "OPENMP")
      list(APPEND per_device_timeout 180)
@ -154,13 +164,26 @@ function(vtkm_unit_tests)
      #serially
      list(APPEND per_device_serial TRUE)
    endif()
-    if (VTKm_ENABLE_KOKKOS)
-      list(APPEND per_device_command_line_arguments --vtkm-device=kokkos)
-      list(APPEND per_device_suffix "KOKKOS")
-      #may require more time because of kernel generation.
-      list(APPEND per_device_timeout 1500)
+    if(enable_all_backends OR NOT per_device_suffix)
+      list(APPEND per_device_command_line_arguments --vtkm-device=serial)
+      list(APPEND per_device_suffix "SERIAL")
+      list(APPEND per_device_timeout 180)
      list(APPEND per_device_serial FALSE)
    endif()
+    if(NOT enable_all_backends)
+      # If not enabling all backends, exactly one backend should have been added.
+      list(LENGTH per_device_suffix number_of_devices)
+      if(NOT number_of_devices EQUAL 1)
+        message(FATAL_ERROR "Expected to pick one backend")
+      endif()
+    endif()
+  else()
+    # A specific backend was requested.
+    set(per_device_command_line_arguments --vtkm-device=${VTKm_UT_BACKEND})
+    set(per_device_suffix ${VTKm_UT_BACKEND})
+    set(per_device_timeout 180)
+    # Some devices don't like multiple tests run at the same time.
+    set(per_device_serial TRUE)
  endif()

  set(test_prog)
@ -188,33 +211,33 @@ function(vtkm_unit_tests)
      vtkm_create_test_executable(
        ${test_prog}
        "${VTKm_UT_SOURCES}"
+        "${VTKm_UT_DEVICE_SOURCES}"
        "${VTKm_UT_LIBRARIES}"
        "${VTKm_UT_DEFINES}"
        ON   # is_mpi_test
        ON   # use_mpi
-        ${enable_all_backends}
        ${VTKm_UT_USE_VTKM_JOB_POOL})
    endif()
    if ((NOT VTKm_ENABLE_MPI) OR VTKm_ENABLE_DIY_NOMPI)
      vtkm_create_test_executable(
        ${test_prog}
        "${VTKm_UT_SOURCES}"
+        "${VTKm_UT_DEVICE_SOURCES}"
        "${VTKm_UT_LIBRARIES}"
        "${VTKm_UT_DEFINES}"
        ON   # is_mpi_test
        OFF  # use_mpi
-        ${enable_all_backends}
        ${VTKm_UT_USE_VTKM_JOB_POOL})
    endif()
  else()
    vtkm_create_test_executable(
      ${test_prog}
      "${VTKm_UT_SOURCES}"
+      "${VTKm_UT_DEVICE_SOURCES}"
      "${VTKm_UT_LIBRARIES}"
      "${VTKm_UT_DEFINES}"
      OFF   # is_mpi_test
      OFF   # use_mpi
-      ${enable_all_backends}
      ${VTKm_UT_USE_VTKM_JOB_POOL})
  endif()

@ -225,19 +248,16 @@ function(vtkm_unit_tests)
      #exclusive on the end ( e.g. for(i=0; i < n; ++i))
      break()
    endif()
-    if(per_device_command_line_arguments STREQUAL "NONE")
-      set(device_command_line_argument)
-      set(upper_backend ${per_device_suffix})
-      set(timeout       ${per_device_timeout})
-      set(run_serial    ${per_device_serial})
-    else()
-      list(GET per_device_command_line_arguments ${index} device_command_line_argument)
+    if(enable_all_backends)
      list(GET per_device_suffix  ${index}  upper_backend)
-      list(GET per_device_timeout ${index}  timeout)
-      list(GET per_device_serial  ${index}  run_serial)
+    else()
+      set(upper_backend)
    endif()
+    list(GET per_device_command_line_arguments ${index} device_command_line_argument)
+    list(GET per_device_timeout ${index}  timeout)
+    list(GET per_device_serial  ${index}  run_serial)

-    foreach (test ${VTKm_UT_SOURCES})
+    foreach (test ${VTKm_UT_SOURCES} ${VTKm_UT_DEVICE_SOURCES})
      get_filename_component(tname ${test} NAME_WE)
      if(VTKm_UT_MPI)
        if (VTKm_ENABLE_MPI)
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -413,7 +413,7 @@ extend or revise the topic.

 2.  Get the new version from gitlab

-        $ git gitlab-sync -f
+        $ git gitlab-sync


 If you do not wish to have the "Kitware Robot" automatically reformat your
--- a/benchmarking/BenchmarkFilters.cxx
+++ b/benchmarking/BenchmarkFilters.cxx
@ -29,7 +29,6 @@
 #include <vtkm/cont/internal/OptionParser.h>

 #include <vtkm/filter/FieldSelection.h>
-#include <vtkm/filter/PolicyBase.h>
 #include <vtkm/filter/contour/Contour.h>
 #include <vtkm/filter/entity_extraction/ExternalFaces.h>
 #include <vtkm/filter/entity_extraction/Threshold.h>
--- a/benchmarking/BenchmarkInSitu.cxx
+++ b/benchmarking/BenchmarkInSitu.cxx
@ -10,11 +10,16 @@

 #include "Benchmarker.h"

+#include <cstddef>
 #include <random>
 #include <sstream>
+#include <unordered_map>

 #include <vtkm/ImplicitFunction.h>
+#include <vtkm/Particle.h>

+#include <vtkm/cont/Algorithm.h>
+#include <vtkm/cont/ArrayCopy.h>
 #include <vtkm/cont/BoundsCompute.h>
 #include <vtkm/cont/DataSet.h>
 #include <vtkm/cont/FieldRangeCompute.h>
@ -25,9 +30,9 @@

 #include <vtkm/cont/internal/OptionParser.h>

-#include <vtkm/filter/Streamline.h>
 #include <vtkm/filter/contour/Contour.h>
 #include <vtkm/filter/contour/Slice.h>
+#include <vtkm/filter/flow/Streamline.h>
 #include <vtkm/filter/geometry_refinement/Tetrahedralize.h>
 #include <vtkm/filter/geometry_refinement/Tube.h>
 #include <vtkm/filter/vector_analysis/Gradient.h>
@ -45,7 +50,6 @@
 namespace
 {

-const uint32_t DEFAULT_NUM_CYCLES = 20;
 const vtkm::Id DEFAULT_DATASET_DIM = 128;
 const vtkm::Id DEFAULT_IMAGE_SIZE = 1024;

@ -63,6 +67,29 @@ std::string PointScalarsName;
 // The point vectors to use:
 std::string PointVectorsName;

+// Global counters for number of cycles
+// These are globals because google benchmarks restarts the test for every
+// repetition when using --benchmark_repetitions
+
+// Additionlly, we need this global flag for when not doing repetitions,
+// as benchmark will repeatedly drop in and out of the measured function
+// and report the number of iterations for the last run of the function.
+// Thus, we'll have way more output images than what the iteration number
+// would lead you to believe (maybe fixed in > 1.7 with warmup time specifier)
+bool benchmark_repetitions = false;
+
+inline void hash_combine(std::size_t& vtkmNotUsed(seed)) {}
+
+template <typename T, typename... Rest>
+inline void hash_combine(std::size_t& seed, const T& v, Rest... rest)
+{
+  std::hash<T> hasher;
+  seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+  hash_combine(seed, rest...);
+}
+
+std::unordered_map<size_t, int> bench_cycles;
+
 enum class RenderingMode
 {
  None = 0,
@ -282,33 +309,43 @@ void BenchContour(::benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;

-  const uint32_t cycle = static_cast<uint32_t>(state.range(0));
-  const vtkm::Id numIsoVals = static_cast<vtkm::Id>(state.range(1));
-  const bool isStructured = static_cast<bool>(state.range(2));
-  const bool isMultiBlock = static_cast<bool>(state.range(3));
-  const RenderingMode renderAlgo = static_cast<RenderingMode>(state.range(4));
-
-  vtkm::cont::Timer inputGenTimer{ device };
-  inputGenTimer.Start();
-  BuildInputDataSet(cycle, isStructured, isMultiBlock, DataSetDim);
-  inputGenTimer.Stop();
-
-  vtkm::filter::contour::Contour filter;
-  filter.SetActiveField(PointScalarsName, vtkm::cont::Field::Association::Points);
-  filter.SetMergeDuplicatePoints(true);
-  filter.SetGenerateNormals(true);
-  filter.SetComputeFastNormalsForStructured(true);
-  filter.SetComputeFastNormalsForUnstructured(true);
+  const vtkm::Id numIsoVals = static_cast<vtkm::Id>(state.range(0));
+  const bool isStructured = static_cast<bool>(state.range(1));
+  const bool isMultiBlock = static_cast<bool>(state.range(2));
+  const RenderingMode renderAlgo = static_cast<RenderingMode>(state.range(3));

  vtkm::cont::Timer totalTimer{ device };
  vtkm::cont::Timer filterTimer{ device };
  vtkm::cont::Timer renderTimer{ device };
  vtkm::cont::Timer writeTimer{ device };

+  size_t hash_val = 0;
+  hash_combine(hash_val, std::string("BenchContour"), isStructured, isMultiBlock, renderAlgo);
+
+  int* cycles = &(bench_cycles[hash_val]);
+  if (!benchmark_repetitions)
+    *cycles = 0;
+
  for (auto _ : state)
  {
    (void)_;
    totalTimer.Start();
+
+    // Disable the benchmark timers for the updating/creation of the datasets
+    state.PauseTiming(); // Stop timers.
+    vtkm::cont::Timer inputGenTimer{ device };
+    inputGenTimer.Start();
+    BuildInputDataSet(*cycles, isStructured, isMultiBlock, DataSetDim);
+    inputGenTimer.Stop();
+
+    vtkm::filter::contour::Contour filter;
+    filter.SetActiveField(PointScalarsName, vtkm::cont::Field::Association::Points);
+    filter.SetMergeDuplicatePoints(true);
+    filter.SetGenerateNormals(true);
+    filter.SetComputeFastNormalsForStructured(true);
+    filter.SetComputeFastNormalsForUnstructured(true);
+    state.ResumeTiming(); // And resume timers.
+
    filterTimer.Start();
    std::vector<vtkm::cont::DataSet> dataSets;
    if (isMultiBlock)
@ -330,11 +367,13 @@ void BenchContour(::benchmark::State& state)
    renderTimer.Stop();

    writeTimer.Start();
-    WriteToDisk(*canvas, renderAlgo, "contour", isStructured, isMultiBlock, cycle);
+    WriteToDisk(*canvas, renderAlgo, "contour", isStructured, isMultiBlock, *cycles);
    writeTimer.Stop();

    totalTimer.Stop();

+    (*cycles)++;
+
    state.SetIterationTime(totalTimer.GetElapsedTime());
    state.counters.insert(
      { { "InputGenTime", static_cast<uint32_t>(inputGenTimer.GetElapsedTime() * 1000) },
@ -346,21 +385,24 @@ void BenchContour(::benchmark::State& state)

 void BenchContourGenerator(::benchmark::internal::Benchmark* bm)
 {
-  bm->ArgNames({ "Cycle", "NIsos", "IsStructured", "IsMultiBlock", "RenderingMode" });
+  bm->ArgNames({ "NIsos", "IsStructured", "IsMultiBlock", "RenderingMode" });

  std::vector<uint32_t> isStructureds{ false, true };
  std::vector<uint32_t> isMultiBlocks{ false, true };
  std::vector<RenderingMode> renderingModes{ RenderingMode::RayTrace };
-  for (uint32_t cycle = 1; cycle <= DEFAULT_NUM_CYCLES; ++cycle)
+  for (auto& isStructured : isStructureds)
  {
-    for (auto& isStructured : isStructureds)
+    for (auto& isMultiBlock : isMultiBlocks)
    {
-      for (auto& isMultiBlock : isMultiBlocks)
+      for (auto& mode : renderingModes)
      {
-        for (auto& mode : renderingModes)
-        {
-          bm->Args({ cycle, 10, isStructured, isMultiBlock, static_cast<int>(mode) });
-        }
+        size_t hash_val = 0;
+        hash_combine(hash_val, std::string("BenchContour"), isStructured, isMultiBlock, mode);
+        auto search = bench_cycles.find(hash_val);
+        // If we can't find the hash, or we're not doing repetitions, reset to 0
+        if (search == bench_cycles.end())
+          bench_cycles[hash_val] = 0;
+        bm->Args({ 10, isStructured, isMultiBlock, static_cast<int>(mode) });
      }
    }
  }
@ -423,7 +465,7 @@ void AddField(vtkm::cont::PartitionedDataSet& input,
 }

 template <typename DataSetType>
-DataSetType RunStreamlinesHelper(vtkm::filter::Streamline& filter, const DataSetType& input)
+DataSetType RunStreamlinesHelper(vtkm::filter::flow::Streamline& filter, const DataSetType& input)
 {
  auto dataSetBounds = vtkm::cont::BoundsCompute(input);
  vtkm::cont::ArrayHandle<vtkm::Particle> seedArray;
@ -447,30 +489,40 @@ void BenchStreamlines(::benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;

-  const uint32_t cycle = static_cast<uint32_t>(state.range(0));
-  const bool isStructured = static_cast<bool>(state.range(1));
-  const bool isMultiBlock = static_cast<bool>(state.range(2));
-  const RenderingMode renderAlgo = static_cast<RenderingMode>(state.range(3));
-
-  vtkm::cont::Timer inputGenTimer{ device };
-  inputGenTimer.Start();
-  BuildInputDataSet(cycle, isStructured, isMultiBlock, DataSetDim);
-  inputGenTimer.Stop();
-
-  vtkm::filter::Streamline streamline;
-  streamline.SetStepSize(0.1f);
-  streamline.SetNumberOfSteps(1000);
-  streamline.SetActiveField(PointVectorsName);
+  const bool isStructured = static_cast<bool>(state.range(0));
+  const bool isMultiBlock = static_cast<bool>(state.range(1));
+  const RenderingMode renderAlgo = static_cast<RenderingMode>(state.range(2));

  vtkm::cont::Timer totalTimer{ device };
  vtkm::cont::Timer filterTimer{ device };
  vtkm::cont::Timer renderTimer{ device };
  vtkm::cont::Timer writeTimer{ device };

+  size_t hash_val = 0;
+  hash_combine(hash_val, std::string("BenchStreamlines"), isStructured, isMultiBlock, renderAlgo);
+
+  int* cycles = &(bench_cycles[hash_val]);
+  if (!benchmark_repetitions)
+    *cycles = 0;
+
  for (auto _ : state)
  {
    (void)_;
    totalTimer.Start();
+
+    // Disable the benchmark timers for the updating/creation of the datasets
+    state.PauseTiming(); // Stop timers.
+    vtkm::cont::Timer inputGenTimer{ device };
+    inputGenTimer.Start();
+    BuildInputDataSet(*cycles, isStructured, isMultiBlock, DataSetDim);
+    inputGenTimer.Stop();
+
+    vtkm::filter::flow::Streamline streamline;
+    streamline.SetStepSize(0.1f);
+    streamline.SetNumberOfSteps(1000);
+    streamline.SetActiveField(PointVectorsName);
+    state.ResumeTiming(); // And resume timers.
+
    filterTimer.Start();

    std::vector<vtkm::cont::DataSet> dataSets;
@ -493,11 +545,13 @@ void BenchStreamlines(::benchmark::State& state)
    renderTimer.Stop();

    writeTimer.Start();
-    WriteToDisk(*canvas, renderAlgo, "streamlines", isStructured, isMultiBlock, cycle);
+    WriteToDisk(*canvas, renderAlgo, "streamlines", isStructured, isMultiBlock, *cycles);
    writeTimer.Stop();

    totalTimer.Stop();

+    (*cycles)++;
+
    state.SetIterationTime(totalTimer.GetElapsedTime());
    state.counters.insert(
      { { "InputGenTime", static_cast<uint32_t>(inputGenTimer.GetElapsedTime() * 1000) },
@ -509,21 +563,24 @@ void BenchStreamlines(::benchmark::State& state)

 void BenchStreamlinesGenerator(::benchmark::internal::Benchmark* bm)
 {
-  bm->ArgNames({ "Cycle", "IsStructured", "IsMultiBlock", "RenderingMode" });
+  bm->ArgNames({ "IsStructured", "IsMultiBlock", "RenderingMode" });

  std::vector<uint32_t> isStructureds{ false, true };
  std::vector<uint32_t> isMultiBlocks{ false, true };
  std::vector<RenderingMode> renderingModes{ RenderingMode::Mesh };
-  for (uint32_t cycle = 1; cycle <= DEFAULT_NUM_CYCLES; ++cycle)
+  for (auto& isStructured : isStructureds)
  {
-    for (auto& isStructured : isStructureds)
+    for (auto& isMultiBlock : isMultiBlocks)
    {
-      for (auto& isMultiBlock : isMultiBlocks)
+      for (auto& mode : renderingModes)
      {
-        for (auto& mode : renderingModes)
-        {
-          bm->Args({ cycle, isStructured, isMultiBlock, static_cast<int>(mode) });
-        }
+        size_t hash_val = 0;
+        hash_combine(hash_val, std::string("BenchStreamlines"), isStructured, isMultiBlock, mode);
+        auto search = bench_cycles.find(hash_val);
+        // If we can't find the hash, or we're not doing repetitions, reset to 0
+        if (search == bench_cycles.end())
+          bench_cycles[hash_val] = 0;
+        bm->Args({ isStructured, isMultiBlock, static_cast<int>(mode) });
      }
    }
  }
@ -567,15 +624,9 @@ void BenchSlice(::benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;

-  const uint32_t cycle = static_cast<uint32_t>(state.range(0));
-  const bool isStructured = static_cast<bool>(state.range(1));
-  const bool isMultiBlock = static_cast<bool>(state.range(2));
-  const RenderingMode renderAlgo = static_cast<RenderingMode>(state.range(3));
-
-  vtkm::cont::Timer inputGenTimer{ device };
-  inputGenTimer.Start();
-  BuildInputDataSet(cycle, isStructured, isMultiBlock, DataSetDim);
-  inputGenTimer.Stop();
+  const bool isStructured = static_cast<bool>(state.range(0));
+  const bool isMultiBlock = static_cast<bool>(state.range(1));
+  const RenderingMode renderAlgo = static_cast<RenderingMode>(state.range(2));

  vtkm::filter::contour::Slice filter;

@ -584,10 +635,26 @@ void BenchSlice(::benchmark::State& state)
  vtkm::cont::Timer renderTimer{ device };
  vtkm::cont::Timer writeTimer{ device };

+  size_t hash_val = 0;
+  hash_combine(hash_val, std::string("BenchSlice"), isStructured, isMultiBlock, renderAlgo);
+
+  int* cycles = &(bench_cycles[hash_val]);
+  if (!benchmark_repetitions)
+    *cycles = 0;
+
  for (auto _ : state)
  {
    (void)_;
    totalTimer.Start();
+
+    // Disable the benchmark timers for the updating/creation of the datasets
+    state.PauseTiming(); // Stop timers.
+    vtkm::cont::Timer inputGenTimer{ device };
+    inputGenTimer.Start();
+    BuildInputDataSet(*cycles, isStructured, isMultiBlock, DataSetDim);
+    inputGenTimer.Stop();
+    state.ResumeTiming(); // And resume timers.
+
    filterTimer.Start();
    std::vector<vtkm::cont::DataSet> dataSets;
    if (isMultiBlock)
@ -617,11 +684,13 @@ void BenchSlice(::benchmark::State& state)
    renderTimer.Stop();

    writeTimer.Start();
-    WriteToDisk(*canvas, renderAlgo, "slice", isStructured, isMultiBlock, cycle);
+    WriteToDisk(*canvas, renderAlgo, "slice", isStructured, isMultiBlock, *cycles);
    writeTimer.Stop();

    totalTimer.Stop();

+    (*cycles)++;
+
    state.SetIterationTime(totalTimer.GetElapsedTime());
    state.counters.insert(
      { { "InputGenTime", static_cast<uint32_t>(inputGenTimer.GetElapsedTime() * 1000) },
@ -633,21 +702,24 @@ void BenchSlice(::benchmark::State& state)

 void BenchSliceGenerator(::benchmark::internal::Benchmark* bm)
 {
-  bm->ArgNames({ "Cycle", "IsStructured", "IsMultiBlock", "RenderingMode" });
+  bm->ArgNames({ "IsStructured", "IsMultiBlock", "RenderingMode" });

  std::vector<uint32_t> isStructureds{ false, true };
  std::vector<uint32_t> isMultiBlocks{ false, true };
  std::vector<RenderingMode> renderingModes{ RenderingMode::RayTrace };
-  for (uint32_t cycle = 1; cycle <= DEFAULT_NUM_CYCLES; ++cycle)
+  for (auto& isStructured : isStructureds)
  {
-    for (auto& isStructured : isStructureds)
+    for (auto& isMultiBlock : isMultiBlocks)
    {
-      for (auto& isMultiBlock : isMultiBlocks)
+      for (auto& mode : renderingModes)
      {
-        for (auto& mode : renderingModes)
-        {
-          bm->Args({ cycle, isStructured, isMultiBlock, static_cast<int>(mode) });
-        }
+        size_t hash_val = 0;
+        hash_combine(hash_val, std::string("BenchSlice"), isStructured, isMultiBlock, mode);
+        auto search = bench_cycles.find(hash_val);
+        // If we can't find the hash, or we're not doing repetitions, reset to 0
+        if (search == bench_cycles.end())
+          bench_cycles[hash_val] = 0;
+        bm->Args({ isStructured, isMultiBlock, static_cast<int>(mode) });
      }
    }
  }
@ -659,26 +731,35 @@ void BenchMeshRendering(::benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;

-  const uint32_t cycle = static_cast<uint32_t>(state.range(0));
-  const bool isStructured = static_cast<bool>(state.range(1));
-  const bool isMultiBlock = static_cast<bool>(state.range(2));
+  const bool isStructured = static_cast<bool>(state.range(0));
+  const bool isMultiBlock = static_cast<bool>(state.range(1));

  vtkm::cont::Timer inputGenTimer{ device };
  vtkm::cont::Timer renderTimer{ device };
  vtkm::cont::Timer writeTimer{ device };

-  inputGenTimer.Start();
-  BuildInputDataSet(cycle, isStructured, isMultiBlock, DataSetDim);
-  inputGenTimer.Stop();
-
  vtkm::cont::Timer totalTimer{ device };

+  size_t hash_val = 0;
+  hash_combine(hash_val, std::string("BenchMeshRendering"), isStructured, isMultiBlock);
+
+  int* cycles = &(bench_cycles[hash_val]);
+  if (!benchmark_repetitions)
+    *cycles = 0;
+
  for (auto _ : state)
  {
    (void)_;

    totalTimer.Start();

+    // Disable the benchmark timers for the updating/creation of the datasets
+    state.PauseTiming(); // Stop timers.
+    inputGenTimer.Start();
+    BuildInputDataSet(*cycles, isStructured, isMultiBlock, DataSetDim);
+    inputGenTimer.Stop();
+    state.ResumeTiming(); // And resume timers.
+
    std::vector<vtkm::cont::DataSet> dataSets =
      isMultiBlock ? ExtractDataSets(PartitionedInputDataSet) : ExtractDataSets(InputDataSet);

@ -687,11 +768,13 @@ void BenchMeshRendering(::benchmark::State& state)
    renderTimer.Stop();

    writeTimer.Start();
-    WriteToDisk(*canvas, RenderingMode::Mesh, "mesh", isStructured, isMultiBlock, cycle);
+    WriteToDisk(*canvas, RenderingMode::Mesh, "mesh", isStructured, isMultiBlock, *cycles);
    writeTimer.Stop();

    totalTimer.Stop();

+    (*cycles)++;
+
    state.SetIterationTime(totalTimer.GetElapsedTime());
    state.counters.insert(
      { { "InputGenTime", static_cast<uint32_t>(inputGenTimer.GetElapsedTime() * 1000) },
@ -703,18 +786,21 @@ void BenchMeshRendering(::benchmark::State& state)

 void BenchMeshRenderingGenerator(::benchmark::internal::Benchmark* bm)
 {
-  bm->ArgNames({ "Cycle", "IsStructured", "IsMultiBlock" });
+  bm->ArgNames({ "IsStructured", "IsMultiBlock" });

  std::vector<uint32_t> isStructureds{ false, true };
  std::vector<uint32_t> isMultiBlocks{ false, true };
-  for (uint32_t cycle = 1; cycle <= DEFAULT_NUM_CYCLES; ++cycle)
+  for (auto& isStructured : isStructureds)
  {
-    for (auto& isStructured : isStructureds)
+    for (auto& isMultiBlock : isMultiBlocks)
    {
-      for (auto& isMultiBlock : isMultiBlocks)
-      {
-        bm->Args({ cycle, isStructured, isMultiBlock });
-      }
+      size_t hash_val = 0;
+      hash_combine(hash_val, std::string("BenchMeshRendering"), isStructured, isMultiBlock);
+      auto search = bench_cycles.find(hash_val);
+      // If we can't find the hash, or we're not doing repetitions, reset to 0
+      if (search == bench_cycles.end())
+        bench_cycles[hash_val] = 0;
+      bm->Args({ isStructured, isMultiBlock });
    }
  }
 }
@ -725,24 +811,33 @@ void BenchVolumeRendering(::benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;

-  const uint32_t cycle = static_cast<uint32_t>(state.range(0));
  const bool isStructured = true;
-  const bool isMultiBlock = static_cast<bool>(state.range(1));
-
-  vtkm::cont::Timer inputGenTimer{ device };
-  inputGenTimer.Start();
-  BuildInputDataSet(cycle, isStructured, isMultiBlock, DataSetDim);
-  inputGenTimer.Stop();
+  const bool isMultiBlock = static_cast<bool>(state.range(0));

  vtkm::cont::Timer totalTimer{ device };
  vtkm::cont::Timer renderTimer{ device };
  vtkm::cont::Timer writeTimer{ device };

+  size_t hash_val = 0;
+  hash_combine(hash_val, std::string("BenchVolumeRendering"), isMultiBlock);
+
+  int* cycles = &(bench_cycles[hash_val]);
+  if (!benchmark_repetitions)
+    *cycles = 0;
+
  for (auto _ : state)
  {
    (void)_;
    totalTimer.Start();

+    // Disable the benchmark timers for the updating/creation of the datasets
+    state.PauseTiming(); // Stop timers.
+    vtkm::cont::Timer inputGenTimer{ device };
+    inputGenTimer.Start();
+    BuildInputDataSet(*cycles, isStructured, isMultiBlock, DataSetDim);
+    inputGenTimer.Stop();
+    state.ResumeTiming(); // And resume timers.
+
    renderTimer.Start();
    std::vector<vtkm::cont::DataSet> dataSets =
      isMultiBlock ? ExtractDataSets(PartitionedInputDataSet) : ExtractDataSets(InputDataSet);
@ -750,11 +845,13 @@ void BenchVolumeRendering(::benchmark::State& state)
    renderTimer.Stop();

    writeTimer.Start();
-    WriteToDisk(*canvas, RenderingMode::Volume, "volume", isStructured, isMultiBlock, cycle);
+    WriteToDisk(*canvas, RenderingMode::Volume, "volume", isStructured, isMultiBlock, *cycles);
    writeTimer.Stop();

    totalTimer.Stop();

+    (*cycles)++;
+
    state.SetIterationTime(totalTimer.GetElapsedTime());
    state.counters.insert(
      { { "InputGenTime", static_cast<uint32_t>(inputGenTimer.GetElapsedTime() * 1000) },
@ -766,15 +863,18 @@ void BenchVolumeRendering(::benchmark::State& state)

 void BenchVolumeRenderingGenerator(::benchmark::internal::Benchmark* bm)
 {
-  bm->ArgNames({ "Cycle", "IsMultiBlock" });
+  bm->ArgNames({ "IsMultiBlock" });

  std::vector<uint32_t> isMultiBlocks{ false };
-  for (uint32_t cycle = 1; cycle <= DEFAULT_NUM_CYCLES; ++cycle)
+  for (auto& isMultiBlock : isMultiBlocks)
  {
-    for (auto& isMultiBlock : isMultiBlocks)
-    {
-      bm->Args({ cycle, isMultiBlock });
-    }
+    size_t hash_val = 0;
+    hash_combine(hash_val, std::string("BenchVolumeRendering"), isMultiBlock);
+    auto search = bench_cycles.find(hash_val);
+    // If we can't find the hash, or we're not doing repetitions, reset to 0
+    if (search == bench_cycles.end())
+      bench_cycles[hash_val] = 0;
+    bm->Args({ isMultiBlock });
  }
 }

@ -884,55 +984,6 @@ void ParseBenchmarkOptions(int& argc, char** argv)

  std::cerr << "Using data set dimensions = " << DataSetDim << std::endl;
  std::cerr << "Using image size = " << ImageSize << "x" << ImageSize << std::endl;
-
-  // Now go back through the arg list and remove anything that is not in the list of
-  // unknown options or non-option arguments.
-  int destArg = 1;
-  // This is copy/pasted from vtkm::cont::Initialize(), should probably be abstracted eventually:
-  for (int srcArg = 1; srcArg < argc; ++srcArg)
-  {
-    std::string thisArg{ argv[srcArg] };
-    bool copyArg = false;
-
-    // Special case: "--" gets removed by optionparser but should be passed.
-    if (thisArg == "--")
-    {
-      copyArg = true;
-    }
-    for (const option::Option* opt = options[UNKNOWN]; !copyArg && opt != nullptr;
-         opt = opt->next())
-    {
-      if (thisArg == opt->name)
-      {
-        copyArg = true;
-      }
-      if ((opt->arg != nullptr) && (thisArg == opt->arg))
-      {
-        copyArg = true;
-      }
-      // Special case: optionparser sometimes removes a single "-" from an option
-      if (thisArg.substr(1) == opt->name)
-      {
-        copyArg = true;
-      }
-    }
-    for (int nonOpt = 0; !copyArg && nonOpt < commandLineParse.nonOptionsCount(); ++nonOpt)
-    {
-      if (thisArg == commandLineParse.nonOption(nonOpt))
-      {
-        copyArg = true;
-      }
-    }
-    if (copyArg)
-    {
-      if (destArg != srcArg)
-      {
-        argv[destArg] = argv[srcArg];
-      }
-      ++destArg;
-    }
-  }
-  argc = destArg;
 }

 } // end anon namespace
@ -954,5 +1005,29 @@ int main(int argc, char* argv[])
    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }

+  bool benchmark_min_time = false;
+  bool benchmark_report_aggregates_only = false;
+  for (auto i = 0; i <= argc; i++)
+    if (!strncmp(args[i], "--benchmark_repetitions", 23))
+      benchmark_repetitions = true;
+    else if (!strncmp(args[i], "--benchmark_min_time", 20))
+      benchmark_min_time = true;
+    else if (!strncmp(args[i], "--benchmark_report_aggregates_only", 34))
+      benchmark_report_aggregates_only = true;
+
+  // If repetitions are explicitly set without also specifying a minimum_time,
+  // force the minimum time to be fairly small so that in all likelyhood, benchmarks
+  // will only run 1 iteration for each test
+  //
+  // And, for good measure, only output the accumulated statistics
+  if (benchmark_repetitions)
+  {
+    if (!benchmark_min_time)
+      args[argc++] = strdup("--benchmark_min_time=0.00000001");
+
+    if (!benchmark_report_aggregates_only)
+      args[argc++] = strdup("--benchmark_report_aggregates_only=true");
+  }
+
  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkODEIntegrators.cxx
+++ b/benchmarking/BenchmarkODEIntegrators.cxx
@ -10,17 +10,14 @@

 #include "Benchmarker.h"

+#include <vtkm/Particle.h>
 #include <vtkm/cont/DataSet.h>
 #include <vtkm/cont/DataSetBuilderUniform.h>
-#include <vtkm/cont/ErrorInternal.h>
 #include <vtkm/cont/Logging.h>
 #include <vtkm/cont/RuntimeDeviceTracker.h>
 #include <vtkm/cont/Timer.h>
 #include <vtkm/cont/internal/OptionParser.h>
-#include <vtkm/filter/ParticleAdvection.h>
-#include <vtkm/worklet/particleadvection/EulerIntegrator.h>
-#include <vtkm/worklet/particleadvection/RK4Integrator.h>
-
+#include <vtkm/filter/flow/ParticleAdvection.h>

 namespace
 {
@ -50,7 +47,7 @@ void BenchParticleAdvection(::benchmark::State& state)
                                   vtkm::Particle(vtkm::Vec3f(.2f, 2.0f, .2f), 1),
                                   vtkm::Particle(vtkm::Vec3f(.2f, 3.0f, .2f), 2) });

-  vtkm::filter::ParticleAdvection particleAdvection;
+  vtkm::filter::flow::ParticleAdvection particleAdvection;

  particleAdvection.SetStepSize(vtkm::FloatDefault(1) / state.range(0));
  particleAdvection.SetNumberOfSteps(static_cast<vtkm::Id>(state.range(0)));
--- a/benchmarking/CMakeLists.txt
+++ b/benchmarking/CMakeLists.txt
@ -48,14 +48,6 @@ set(benchmarks
  BenchmarkTopologyAlgorithms
  )

-#Taking too long to compile with HIPCC
-if(HIP IN_LIST Kokkos_DEVICES)
-  list(REMOVE_ITEM benchmarks
-       BenchmarkDeviceAdapter
-       BenchmarkODEIntegrators
-      )
-endif()
-
 set(VTKm_BENCHS_RANGE_LOWER_BOUNDARY 4096 CACHE STRING "Smallest sample for input size bench for BenchmarkDeviceAdapter")
 set(VTKm_BENCHS_RANGE_UPPER_BOUNDARY 134217728 CACHE STRING "Biggest sample for input size bench for BenchmarkDeviceAdapter")
 mark_as_advanced(VTKm_BENCHS_RANGE_LOWER_BOUNDARY VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
@ -64,10 +56,8 @@ foreach (benchmark ${benchmarks})
  add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_source vtkm_filter vtkm_io)
 endforeach ()

-if(NOT HIP IN_LIST Kokkos_DEVICES)
-  target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_LOWER_BOUNDARY=${VTKm_BENCHS_RANGE_LOWER_BOUNDARY})
-  target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_UPPER_BOUNDARY=${VTKm_BENCHS_RANGE_UPPER_BOUNDARY})
-endif()
+target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_LOWER_BOUNDARY=${VTKm_BENCHS_RANGE_LOWER_BOUNDARY})
+target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_UPPER_BOUNDARY=${VTKm_BENCHS_RANGE_UPPER_BOUNDARY})

 if(TARGET vtkm_rendering)
  add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering vtkm_source)
--- a/benchmarking/README_insitu.md
+++ b/benchmarking/README_insitu.md
@ -0,0 +1,20 @@
+This document describes how to use the command-line options for Google Benchmarks (GBench) to control the behavior of BenchmarkInSitu.
+
+Generally, "BenchmarkInSitu --help" will provide the list of standard benchmarks along with the associated ones from GBench.
+
+As a refresher, GBench iterates a test defined in the application, in this case, Contour, Streamlines, ..., a number of times until two criteria are met, a statistically stable set of samples have been generated, and the test ran for a specified minimum amount of time (by default, 0.5 seconds).
+
+There are three ways to run the InSitu benchmark that control the number of iterations run by GBench. These are independent of the "standard" arguments passed to the benchmark (we'll define the standard arguments as: --vtkm-device, --size, --image-size, plus other not defined by GBench).
+
+1. BenchmarkInSitu <standard arguments>
+	- Under this scenario, the iterations are controlled completely by GBench. Generally, each test will be run between 1 and N iterations depending on how long each test runs.
+
+2. BenchmarkInSitu <standard arguments> --benchmark_min_time=<min_time>
+	- This will ensure that the test will run for at least <min_time> seconds. You will set this option if you don't care about the actual number of iterations, but only that each test runs for at least a specified time.
+
+3. BenchmarkInSitu <standard arguments> --benchmark_repetitions=<reps>
+	- The purpose of this option is to *exactly* control the number of iterations performed by GBench. Internally, this does two things:
+		- Sets the minimum time to a very small value ("--benchmark_min_time=0.00000001")
+		- Sets the output to only report aggregate statistics for each test, e.g., mean, median, standard deviation (--benchmark_report_aggregates_only=true)
+	Both of these arguments can be overridden by providing different values on the command-line. With the current setting, all test runs have resulted in only <reps> repetitions being executed.
+
--- a/config/vtkm.pc.in
+++ b/config/vtkm.pc.in
@ -27,6 +27,5 @@ Libs: -L${libdir} \
    -lvtkm_worklet-@VTKm_VERSION@ \
    -lvtkm_source-@VTKm_VERSION@ \
    -lvtkm_io-@VTKm_VERSION@ \
-    -lvtkm_lodepng-@VTKm_VERSION@ \
    -lvtkm_cont-@VTKm_VERSION@ \
    -lvtkmdiympi_nompi
--- a/config/vtkm_config.mk.in
+++ b/config/vtkm_config.mk.in
@ -34,6 +34,5 @@ VTKm_LIB_FLAGS = -L $(VTKm_DIR)/lib \
    -lvtkm_worklet-$(VTKM_VERSION) \
    -lvtkm_source-$(VTKM_VERSION) \
    -lvtkm_io-$(VTKM_VERSION) \
-    -lvtkm_lodepng-$(VTKM_VERSION) \
    -lvtkm_cont-$(VTKM_VERSION) \
    -lvtkmdiympi_nompi
--- a/data/baseline/vanc.augment_hierarchical_tree.ct_txt
+++ b/data/baseline/vanc.augment_hierarchical_tree.ct_txt
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e56a995e539990ac31477dd6001bd80307e8a6337da8d27836b5cbc5eea31b8c
+size 1790
--- a/data/baseline/vanc.branch_compile.ct_txt
+++ b/data/baseline/vanc.branch_compile.ct_txt
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3bfe6fda1f13176da2a934296a29c7a022e9c7803029d824aff35fa1c1a5460
+size 405
--- a/data/data/misc/warpXfields.vtk
+++ b/data/data/misc/warpXfields.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a569aff0c6872611ef75a4dcb597e1320cb966b80f840a0dbd76a29b2760dbb
+size 50335052
--- a/data/data/misc/warpXparticles.vtk
+++ b/data/data/misc/warpXparticles.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:671345bdb045aeadc8e9fa1060de51e53286c74929c6c8c60a529b318f02bbfc
+size 3795
--- a/data/data/unstructured/pixel_cells.vtk
+++ b/data/data/unstructured/pixel_cells.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6372aec0eb79ee7c99de0e1546ff946dcfac2ea0eb7fb575fddc80decf0a438b
+size 572
--- a/data/data/unstructured/voxel_cells.vtk
+++ b/data/data/unstructured/voxel_cells.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a272a8d800bca5bc879d52cb4999562378c5ac108d953634b1dbad7ad07d8c8a
+size 1031
--- a/docs/HotFixGuide.md
+++ b/docs/HotFixGuide.md
@ -0,0 +1,78 @@
+# HotFix Guide
+
+## HotFix general instructions
+
+The following instructions intend to be general case for applying hotfixes in
+release branches, for more specific cases, simplified instructions are to be
+found in the below sub-sections.
+
+1. Find the oldest relevant release branch BASE to which this hotfix applies.
+   - Relevant release branches include: release, and release-specific
+     maintained branches.
+2. Create a hotfix branch branching from BASE.
+   - if the hotfix branch already exists `git rebase --onto BASE`.
+3. Open a merge-request targeting:
+   - master, if applies to master.
+   - Otherwise, release, if applies to the latest release.
+   - Otherwise, the most recent release-specific branch to which this hotfix
+     applies.
+   - Lastly, if none of above, BASE.
+4. (If needed) If the hotfix is a backport (cherry-picked) of an existing merge-requests,
+   add a cross-reference each of the existing merge-request with the format of `!1234`
+   inside the description of the newly created merge-request.
+   - Cherry-pick each of the relevant commits of the existing merge-requests using
+     `git cherry-pick -x XXYYZZ`. 
+5. At the bottom of the description of the merge-request add: `Backport: branch_name`
+   directive for each of the branches that exists between BASE (inclusive) and
+   the branch that we target our merge-request (exclusive).
+
+In the case of merge conflicts in any of the backports refer to [Kitware backport guide][1].
+
+## HotFix for latest release and master branch only
+
+For hotfixes that applies to release and master branch, create a branch based
+off of the tip of the release branch and create a merge-request targeting master.
+
+If the hotfix branch already exists based off of a commit in the master branch,
+you can change the base branch of the hotfix branch from master to latest
+release with:
+
+```
+# Assuming that both your local master and release branch are updated; and
+# assuming that you are currently in your topic branch
+
+git rebase --onto release master
+```
+
+Next, you can bring this commit to __release__ by adding the following line to
+the bottom of the MR description: `Backport: release`. This directive will later
+internally instruct the Kitware Robot to bring the changes of the hotfix branch
+creating a merge commit in the release branch with its second parent being the
+hotfix branch tip.
+
+Lastly, the master branch history will be automatically connected with
+release after the merge-request is merged as explained in
+[here](#How-master-and-release-branch-are-connected).
+
+## HotFix for release branch only
+
+For hotfixes that only applies to release branch, whose changes are unneeded in
+master, create a branch based off of the __release__ branch and create a
+merge-request targeting __release__ branch. Proceed as in a regular MR.
+
+### How master and release branch are connected
+
+Every merge commit in the release branch will be automatically connected to
+master branch by our Gitlab robot creating a merge-request using the
+`-s ours` strategy, __note__ that `-s ours` strategy does not actually bring any
+change to the target branch, it will solely create an empty merge commit in master
+connecting release branch..
+
+
+## Other cases
+
+There are more possible case scenarios that you might encounter while attempting
+to bring a hotfix to release, for further cases please refer to the
+[Kitware backport guide][1] which this document is based of.
+
+[1]: https://gitlab.kitware.com/utils/git-workflow/-/wikis/Backport-topics
--- a/docs/ReleaseHotFix.md
+++ b/docs/ReleaseHotFix.md
@ -1,55 +0,0 @@
-Release HotFix
-===============
-
-# HotFix from master branch
-
-## On a single MR
-
-You have created a branch from master branch and you have a MR request targeting
-the __master__ branch.
-
-You can bring this commit to __release__ by adding the following line to
-the bottom of the MR description.
-
-```
-Backport: release
-```
-
-This will cherry-pick this commit and push it to __release__ after typing `Do:
-merge` in a comment.
-
-You must also make sure that there will not be any merge conflict with the 
-__release__ branch, thus you need to create an additional commit using the following
-command:
-
-```
-git merge --no-ff origin/release
-```
-
-This will ensure that backport will be able to push your commit to __release__.
-
-## On multiple MRs
-
-1. Create one merge request sourcing your HotFix branch and targeting __master__
-and merge.
-
-2. Create one merge request sourcing __master__ and targeting __release__ and merge.
-
-# HotFix from release branch
-
-You have created a branch from the __release__ branch and you have a MR request
-targeting __release__, you can proceed as in a regular MR.
-
-Every merge in release will be automatically brought to master by the robot
-using `-s ours` strategy. 
-
-__VERY IMPORTANT__: `-s ours` strategy does not actually bring any change to the 
-target branch, thus if needed you might want to bring the changes
-from the HotFix to __master__ by creating a another MR which cherry-picks
-the merge commit in `release` for the given HotFix.
-
-Use the difference to first parent for the cherry-pick commit:
-
-```
-git cherry-pick -m1 -x <HASH OF COMMIT>
-```
--- a/docs/ReleaseProcess.md
+++ b/docs/ReleaseProcess.md
@ -3,7 +3,7 @@ Release Process

 ## Prologue

-This document is divided in two parts: 
+This document is divided in two parts:
 - A overview of the branching and release scheme used in VTK-m.
 - A concise instructions to get started with the release process.

@ -18,10 +18,30 @@ While all of the development is performed in the master branch, once in a while
 when we want to do a release, we tag a commit in master and we push it to the
 release branch.

-Also there are times when we want to get _Hotfix_ that affects previous releases
+Also there are times when we want to get _hotfix_ that affects previous releases
 to the release branch, in this case we can also push the merge request commit
 with the fix into the release branch.

+## Release-specific branches
+
+Sometime we need to keep maintaining an older release which does not sit at the
+tip of the release branch. For this purpose we use release-specific branches
+with the name of `release-@MAJOR_VER@.@MINOR_VER@`.
+
+To create a new release-specific branch you need someone with push access to
+create a release-specific branch pointing at the latest commit of the minor
+release of interest, this is, for release-1.7 it will be v1.7.1 as opposed to
+v1.7.0.
+
+There can be the case that between release X.Y and X.Y+1 there are hotfixes
+commits that do not correspond to a patch tag releases. In this particular case,
+create the release-specific branch pointing to the last commit before X.Y+1.
+
+To add a hotfix to a release-specific branch, follow the instructions described
+in [HotFixes](./ReleaseHotFix.md) noting that you need to adjust the branch
+names from release to `release-@MAJOR_VER@.@MINOR_VER@`.
+
+
 A not so simple example of how the branching scheme looks like can be found
 here:

@ -29,28 +49,28 @@ here:
 # → git log --graph --decorate --oneline --all

 *   2e9230d (master) Merge branch 'update'
-|\  
+|\
 | * 59279dd (HEAD -> release, tag: v1.0.1) v1.0.1 2nd release of VTKm
 | * b60611b Add release notes for v1.0.1
 | *   9d26451 Merge branch 'master' into update
-| |\  
-| |/  
-|/|   
+| |\
+| |/
+|/|
 * | 75137e5 Unrelated commit
 * | e982be0 Merge branch 'release'
-|\| 
+|\|
 | *   f2c4eb6 Merge branch 'hotfix' into release
-| |\  
+| |\
 | | * c1c2655 Hotfix
-| |/  
+| |/
 * | e53df9e Unrelated commit
 * | ec6b481 Unrelated commit
-|/  
+|/
 * 0742a47 (tag: v1.0.0) v1.0.0 1st release of VTKm
 * 4fe993c Add release notes for v1.0.0
 ```

-This will make the release branch to only contain tags and _HotFix_ merges as
+This will make the release branch to only contain tags and _hotfix_ merges as
 shown in here:

 ```git
--- a/docs/ReleaseRoadmap.md
+++ b/docs/ReleaseRoadmap.md
@ -0,0 +1,23 @@
+# Minor Release Roadmap
+
+| Version   | Date         | Delay (days) | Life-cycle (*planned) | End of Support   |
+| --------- | ------------ | -------      | -----------           | ---------------- |
+| 1.7.0     | 2021-12-01   | +8           | Long Term             | 2022-12-01       |
+| 1.8.0     | 2022-06-01   | +14          | Long Term             | 2023-06-01       |
+| 1.9.0     | 2022-09-01   |              | Short Term*           | TBD              |
+| 2.0.0     | 2022-12-01   |              | Long Term*            | TBD              |
+| 2.1.0     | 2023-03-01   |              | Short Term*           | TBD              |
+| 2.2.0     | 2023-06-01   |              | Long Term*            | TBD              |
+
+
+## Legend
+
+- Version: Only counts major and minor versions, patch releases are released
+  unscheduled as needed.
+- Date: Scheduled date for the Final Release of the corresponding release.
+- Delay: Days of delay between scheduled date and release date.
+- Life-cycle: The duration of the support:
+  - Long Term, usually maintained for one natural year, it might use a
+    specific-release support branch if it is not the latest release.
+  - Short Term, usually maintained until the next minor release, usually 3-6
+    months.
--- a/docs/changelog/copy-invalid-variant.md
+++ b/docs/changelog/copy-invalid-variant.md
@ -0,0 +1,5 @@
+# Fix bug with copying invalid variants
+
+There was a bug where if you attempted to copy a `Variant` that was not
+valid (i.e. did not hold an object); a seg fault could happen. This has
+been changed to set the target variant to also be invalid.
--- a/docs/changelog/deallocate-after-initialize.md
+++ b/docs/changelog/deallocate-after-initialize.md
@ -0,0 +1,14 @@
+# Add test for array and datas that are cleaned up after finalize
+
+It is the case that arrays might be deallocated from a device after the
+device is closed. This can happen, for example, when an `ArrayHandle` is
+declared globally. It gets constructed before VTK-m is initialized. This
+is OK as long as you do not otherwise use it until VTK-m is initialized.
+However, if you use that `ArrayHandle` to move data to a device and that
+data is left on the device when the object closes, then the
+`ArrayHandle` will be left holding a reference to invalid device memory
+once the device is shut down. This can cause problems when the
+`ArrayHandle` destructs itself and attempts to release this memory.
+
+The VTK-m devices should gracefully handle deallocations that happen
+after device shutdown.
--- a/docs/changelog/deprecate-old-filters.md
+++ b/docs/changelog/deprecate-old-filters.md
@ -0,0 +1,9 @@
+# Old Filter Base Classes are Deprecated
+
+In recent versions of VTK-m, a new structure for filter classes was
+introduced. All of the existing filters have been moved over to this new
+structure, and the old filter class structure has been deprecated.
+
+This is in preparation for changed in VTK-m 2.0, where the old filter
+classes will be removed and the new filter classes will have the `New` in
+their name removed (so that they become simply `Filter` and `FilterField`).
--- a/docs/changelog/osx-type-comparison.md
+++ b/docs/changelog/osx-type-comparison.md
@ -0,0 +1,15 @@
+# Fix type comparison on OSX
+
+`UnknownArrayHandle` compares `std::type_index` objects to check whether a
+requested type is the same as that held in the array handle. However, it is
+possible that different translation units can create different but
+equivalent `std::type_info`/`std::type_index` objects. In this case, the
+`==` operator might return false for two equivalent types. This can happen
+on OSX.
+
+To get around this problem, `UnknownArrayHandle` now does a more extensive
+check for `std::type_info` object. It first uses the `==` operator to
+compare them (as before), which usually works but can possibly return
+`false` when the correct result is `true`. To check for this case, it then
+compares the name for the two types and returns `true` iff the two names
+are the same.
--- a/docs/changelog/resizable-arrayhandle-buffer-vec.md
+++ b/docs/changelog/resizable-arrayhandle-buffer-vec.md
@ -0,0 +1,30 @@
+# Allow ArrayHandle to have a runtime selectable number of buffers
+
+Previously, the number of buffers held by an `ArrayHandle` had to be
+determined statically at compile time by the storage. Most of the time this
+is fine. However, there are some exceptions where the number of buffers
+need to be selected at runtime. For example, the `ArrayHandleRecombineVec`
+does not specify the number of components it uses, and it needed a hack
+where it stored buffers in the metadata of another buffer, which is bad.
+
+This change allows the number of buffers to vary at runtime (at least at
+construction). The buffers were already managed in a `std::vector`. It now
+no longer forces the vector to be a specific size. `GetNumberOfBuffers` was
+removed from the `Storage`. Instead, if the number of buffers was not
+specified at construction, an allocation of size 0 is done to create
+default buffers.
+
+The biggest change is to the interface of the storage object methods, which
+now take `std::vector` instead of pointers to `Buffer` objects. This adds a
+little hassle in having to copy subsets of this `vector` when a storage
+object has multiple sub-arrays. But it does simplify some of the
+templating.
+
+Other changes to the `Storage` structure include requiring all objects to
+include a `CreateBuffers` method that accepts no arguments. This method
+will be used by `ArrayHandle` in its default constructor. Previously,
+`ArrayHandle` would create the `vector` of `Buffer` objects itself, but it
+now must call this method in the `Storage` to do this. (It also has a nice
+side effect of allowing the `Storage` to initialize the buffer objects if
+necessary. Another change was to remove the `GetNumberOfBuffers` method
+(which no longer has meaning).
--- a/docs/changelog/split-mesh-quality-filter.md
+++ b/docs/changelog/split-mesh-quality-filter.md
@ -0,0 +1,17 @@
+# Divided the mesh quality filter
+
+The original implementation of the `MeshQuality` filter created one large
+kernel with a switch statement that jumped to the code of the metric
+actually desired. This is problematic for a couple of reasons. First, it
+takes the compiler a long time to optimize for all the inlined cases of a
+large kernel. Second, it creates a larger than necessary function that has
+to be loaded onto the GPU to execute.
+
+The code was modified to move the switch statement outside of the GPU
+kernel. Instead, the routine for each metric is compiled into its own
+kernel. For convenience, each routine is wrapped into its own independent
+filter (e.g., `MeshQualityArea`, `MeshQualityVolume`). The uber
+`MeshQuality` filter still exists, and its use is still encouraged even if
+you only need a particular metric. However, internally the switch statement
+now occurs on the host to select the appropriate specific filter that loads
+a more targeted kernel.
--- a/docs/changelog/test-device-sources.md
+++ b/docs/changelog/test-device-sources.md
@ -0,0 +1,10 @@
+# Added DEVICE_SOURCES to vtkm_unit_tests
+
+The `vtkm_unit_tests` function in the CMake build now allows you to specify
+which files need to be compiled with a device compiler using the
+`DEVICE_SOURCES` argument. Previously, the only way to specify that unit
+tests needed to be compiled with a device compiler was to use the
+`ALL_BACKENDS` argument, which would automatically compile everything with
+the device compiler as well as test the code on all backends.
+`ALL_BACKENDS` is still supported, but it no longer changes the sources to
+be compiled with the device compiler.
--- a/docs/changelog/unknownarrayhandle-all-types.md
+++ b/docs/changelog/unknownarrayhandle-all-types.md
@ -0,0 +1,11 @@
+# Do not require `VecTraits` for `UnknownArrayHandle` components
+
+Whan an `UnknownArrayHandler` is constructed from an `ArrayHandle`, it uses
+the `VecTraits` of the component type to construct its internal functions.
+This meant that you could not put an `ArrayHandle` with a component type
+that did not have `VecTraits` into an `UnknownArrayHandle`.
+
+`UnknownArrayHandle` now no longer needs the components of its arrays to
+have `VecTraits`. If the component type of the array does not have
+`VecTraits`, it treats the components as if they are a scalar type.
+
--- a/docs/changelog/variant-istype.md
+++ b/docs/changelog/variant-istype.md
@ -0,0 +1,5 @@
+# Add Variant::IsType
+
+The `Variant` class was missing a way to check the type. You could do it
+indirectly using `variant.GetIndex() == variant.GetIndexOf<T>()`, but
+having this convenience function is more clear.
--- a/docs/changelog/vtk-file-voxels.md
+++ b/docs/changelog/vtk-file-voxels.md
@ -0,0 +1,7 @@
+# Fix bug with voxels in legacy vtk files
+
+The legacy VTK file reader for unstructured grids had a bug when reading
+cells of type voxel. VTK-m does not support the voxel cell type in
+unstructured grids (i.e. explicit cell sets), so it has to convert them to
+hexahedron cells. A bug in the reader was mangling the cell array index
+during this conversion.
--- a/examples/clipping/CMakeLists.txt
+++ b/examples/clipping/CMakeLists.txt
@ -15,7 +15,3 @@ find_package(VTKm REQUIRED QUIET)

 add_executable(Clipping Clipping.cxx)
 target_link_libraries(Clipping PRIVATE vtkm_filter vtkm_io)
-
-vtkm_add_target_information(Clipping
-                            DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
-                            DEVICE_SOURCES Clipping.cxx)
--- a/examples/contour_tree/CMakeLists.txt
+++ b/examples/contour_tree/CMakeLists.txt
@ -18,9 +18,3 @@ target_link_libraries(ContourTreeMesh2D vtkm_filter)

 add_executable(ContourTreeMesh3D ContourTreeMesh3D.cxx)
 target_link_libraries(ContourTreeMesh3D vtkm_filter)
-
-vtkm_add_target_information(ContourTreeMesh2D ContourTreeMesh3D
-                            DROP_UNUSED_SYMBOLS
-                            MODIFY_CUDA_FLAGS
-                            DEVICE_SOURCES
-                              ContourTreeMesh2D.cxx ContourTreeMesh3D.cxx)
--- a/examples/contour_tree/ContourTreeMesh2D.cxx
+++ b/examples/contour_tree/ContourTreeMesh2D.cxx
@ -58,7 +58,7 @@
 #include <vtkm/cont/DataSetBuilderUniform.h>
 #include <vtkm/cont/Initialize.h>

-#include <vtkm/filter/ContourTreeUniform.h>
+#include <vtkm/filter/scalar_topology/ContourTreeUniform.h>

 #include <fstream>
 #include <vector>
@ -105,7 +105,7 @@ int main(int argc, char* argv[])
  inDataSet.AddPointField("values", values);

  // Convert 2D mesh of values into contour tree, pairs of vertex ids
-  vtkm::filter::ContourTreeMesh2D filter;
+  vtkm::filter::scalar_topology::ContourTreeMesh2D filter;
  filter.SetActiveField("values");
  // Output data set is pairs of saddle and peak vertex IDs
  vtkm::cont::DataSet output = filter.Execute(inDataSet);
--- a/examples/contour_tree/ContourTreeMesh3D.cxx
+++ b/examples/contour_tree/ContourTreeMesh3D.cxx
@ -58,7 +58,7 @@
 #include <vtkm/cont/DataSetBuilderUniform.h>
 #include <vtkm/cont/Initialize.h>

-#include <vtkm/filter/ContourTreeUniform.h>
+#include <vtkm/filter/scalar_topology/ContourTreeUniform.h>

 #include <fstream>
 #include <vector>
@ -106,7 +106,7 @@ int main(int argc, char* argv[])
  inDataSet.AddPointField("values", values);

  // Convert 3D mesh of values into contour tree, pairs of vertex ids
-  vtkm::filter::ContourTreeMesh3D filter;
+  vtkm::filter::scalar_topology::ContourTreeMesh3D filter;
  filter.SetActiveField("values");
  // Output data set is pairs of saddle and peak vertex IDs
  vtkm::cont::DataSet output = filter.Execute(inDataSet);
--- a/examples/contour_tree_augmented/ContourTreeApp.cxx
+++ b/examples/contour_tree_augmented/ContourTreeApp.cxx
@ -71,11 +71,12 @@
 #include <vtkm/cont/Timer.h>
 #include <vtkm/io/BOVDataSetReader.h>

-#include <vtkm/filter/ContourTreeUniformAugmented.h>
-#include <vtkm/worklet/contourtree_augmented/PrintVectors.h>
-#include <vtkm/worklet/contourtree_augmented/ProcessContourTree.h>
-#include <vtkm/worklet/contourtree_augmented/Types.h>
-#include <vtkm/worklet/contourtree_augmented/processcontourtree/Branch.h>
+#include <vtkm/filter/MapFieldPermutation.h>
+#include <vtkm/filter/scalar_topology/ContourTreeUniformAugmented.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_augmented/PrintVectors.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_augmented/ProcessContourTree.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_augmented/Types.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_augmented/processcontourtree/Branch.h>

 // clang-format off
 VTKM_THIRDPARTY_PRE_INCLUDE
@ -153,6 +154,122 @@ private:
  std::vector<std::string> mCLOptions;
 };

+inline vtkm::Id3 ComputeNumberOfBlocksPerAxis(vtkm::Id3 globalSize, vtkm::Id numberOfBlocks)
+{
+  vtkm::Id currNumberOfBlocks = numberOfBlocks;
+  vtkm::Id3 blocksPerAxis{ 1, 1, 1 };
+  while (currNumberOfBlocks > 1)
+  {
+    vtkm::IdComponent splitAxis = 0;
+    for (vtkm::IdComponent d = 1; d < 3; ++d)
+    {
+      if (globalSize[d] > globalSize[splitAxis])
+      {
+        splitAxis = d;
+      }
+    }
+    if (currNumberOfBlocks % 2 == 0)
+    {
+      blocksPerAxis[splitAxis] *= 2;
+      globalSize[splitAxis] /= 2;
+      currNumberOfBlocks /= 2;
+    }
+    else
+    {
+      blocksPerAxis[splitAxis] *= currNumberOfBlocks;
+      break;
+    }
+  }
+  return blocksPerAxis;
+}
+
+inline std::tuple<vtkm::Id3, vtkm::Id3, vtkm::Id3> ComputeBlockExtents(vtkm::Id3 globalSize,
+                                                                       vtkm::Id3 blocksPerAxis,
+                                                                       vtkm::Id blockNo)
+{
+  // DEBUG: std::cout << "ComputeBlockExtents("<<globalSize <<", " << blocksPerAxis << ", " << blockNo << ")" << std::endl;
+  // DEBUG: std::cout << "Block " << blockNo;
+
+  vtkm::Id3 blockIndex, blockOrigin, blockSize;
+  for (vtkm::IdComponent d = 0; d < 3; ++d)
+  {
+    blockIndex[d] = blockNo % blocksPerAxis[d];
+    blockNo /= blocksPerAxis[d];
+
+    float dx = float(globalSize[d] - 1) / float(blocksPerAxis[d]);
+    blockOrigin[d] = vtkm::Id(blockIndex[d] * dx);
+    vtkm::Id maxIdx =
+      blockIndex[d] < blocksPerAxis[d] - 1 ? vtkm::Id((blockIndex[d] + 1) * dx) : globalSize[d] - 1;
+    blockSize[d] = maxIdx - blockOrigin[d] + 1;
+    // DEBUG: std::cout << " " << blockIndex[d] <<  dx << " " << blockOrigin[d] << " " << maxIdx << " " << blockSize[d] << "; ";
+  }
+  // DEBUG: std::cout << " -> " << blockIndex << " "  << blockOrigin << " " << blockSize << std::endl;
+  return std::make_tuple(blockIndex, blockOrigin, blockSize);
+}
+
+inline vtkm::cont::DataSet CreateSubDataSet(const vtkm::cont::DataSet& ds,
+                                            vtkm::Id3 blockOrigin,
+                                            vtkm::Id3 blockSize,
+                                            const std::string& fieldName)
+{
+  vtkm::Id3 globalSize;
+  ds.GetCellSet().CastAndCallForTypes<VTKM_DEFAULT_CELL_SET_LIST_STRUCTURED>(
+    vtkm::worklet::contourtree_augmented::GetPointDimensions(), globalSize);
+  const vtkm::Id nOutValues = blockSize[0] * blockSize[1] * blockSize[2];
+
+  const auto inDataArrayHandle = ds.GetPointField(fieldName).GetData();
+
+  vtkm::cont::ArrayHandle<vtkm::Id> copyIdsArray;
+  copyIdsArray.Allocate(nOutValues);
+  auto copyIdsPortal = copyIdsArray.WritePortal();
+
+  vtkm::Id3 outArrIdx;
+  for (outArrIdx[2] = 0; outArrIdx[2] < blockSize[2]; ++outArrIdx[2])
+    for (outArrIdx[1] = 0; outArrIdx[1] < blockSize[1]; ++outArrIdx[1])
+      for (outArrIdx[0] = 0; outArrIdx[0] < blockSize[0]; ++outArrIdx[0])
+      {
+        vtkm::Id3 inArrIdx = outArrIdx + blockOrigin;
+        vtkm::Id inIdx = (inArrIdx[2] * globalSize[1] + inArrIdx[1]) * globalSize[0] + inArrIdx[0];
+        vtkm::Id outIdx =
+          (outArrIdx[2] * blockSize[1] + outArrIdx[1]) * blockSize[0] + outArrIdx[0];
+        VTKM_ASSERT(inIdx >= 0 && inIdx < inDataArrayHandle.GetNumberOfValues());
+        VTKM_ASSERT(outIdx >= 0 && outIdx < nOutValues);
+        copyIdsPortal.Set(outIdx, inIdx);
+      }
+  // DEBUG: std::cout << copyIdsPortal.GetNumberOfValues() << std::endl;
+
+  vtkm::cont::Field permutedField;
+  bool success =
+    vtkm::filter::MapFieldPermutation(ds.GetPointField(fieldName), copyIdsArray, permutedField);
+  if (!success)
+    throw vtkm::cont::ErrorBadType("Field copy failed (probably due to invalid type)");
+
+
+  vtkm::cont::DataSetBuilderUniform dsb;
+  if (globalSize[2] <= 1) // 2D Data Set
+  {
+    vtkm::Id2 dimensions{ blockSize[0], blockSize[1] };
+    vtkm::cont::DataSet dataSet = dsb.Create(dimensions);
+    vtkm::cont::CellSetStructured<2> cellSet;
+    cellSet.SetPointDimensions(dimensions);
+    cellSet.SetGlobalPointDimensions(vtkm::Id2{ globalSize[0], globalSize[1] });
+    cellSet.SetGlobalPointIndexStart(vtkm::Id2{ blockOrigin[0], blockOrigin[1] });
+    dataSet.SetCellSet(cellSet);
+    dataSet.AddField(permutedField);
+    return dataSet;
+  }
+  else
+  {
+    vtkm::cont::DataSet dataSet = dsb.Create(blockSize);
+    vtkm::cont::CellSetStructured<3> cellSet;
+    cellSet.SetPointDimensions(blockSize);
+    cellSet.SetGlobalPointDimensions(globalSize);
+    cellSet.SetGlobalPointIndexStart(blockOrigin);
+    dataSet.SetCellSet(cellSet);
+    dataSet.AddField(permutedField);
+    return dataSet;
+  }
+}


 // Compute and render an isosurface for a uniform grid example
@ -171,7 +288,6 @@ int main(int argc, char* argv[])
  MPI_Comm_rank(comm, &rank);
  MPI_Comm_size(comm, &size);
  int numBlocks = size;
-  int blocksPerRank = 1;
 #endif

  // initialize vtkm-m (e.g., logging via -v and device via the -d option)
@ -354,7 +470,7 @@ int main(int argc, char* argv[])
  // From https://www.unix.com/302983597-post2.html
  char cstr_filename[32];
  snprintf(cstr_filename, sizeof(cstr_filename), "cout_%d.log", rank);
-  int out = open(cstr_filename, O_RDWR | O_CREAT | O_APPEND, 0600);
+  int out = open(cstr_filename, O_RDWR | O_CREAT | O_TRUNC | O_APPEND, 0600);
  if (-1 == out)
  {
    perror("opening cout.log");
@ -362,7 +478,7 @@ int main(int argc, char* argv[])
  }

  snprintf(cstr_filename, sizeof(cstr_filename), "cerr_%d.log", rank);
-  int err = open(cstr_filename, O_RDWR | O_CREAT | O_APPEND, 0600);
+  int err = open(cstr_filename, O_RDWR | O_CREAT | O_TRUNC | O_APPEND, 0600);
  if (-1 == err)
  {
    perror("opening cerr.log");
@ -396,43 +512,15 @@ int main(int argc, char* argv[])
  std::vector<vtkm::Id> dims;
  if (filename.compare(filename.length() - 3, 3, "bov") == 0)
  {
-    std::cout << "Reading BOV file" << std::endl;
    vtkm::io::BOVDataSetReader reader(filename);
    inDataSet = reader.ReadDataSet();
    nDims = 3;
    currTime = totalTime.GetElapsedTime();
    dataReadTime = currTime - prevTime;
    prevTime = currTime;
-#ifdef WITH_MPI
-    // Copy the data into the values array so we can construct a multiblock dataset
-    // TODO All we should need to do to implement BOV support is to copy the values
-    // in the values vector and copy the dimensions in the dims vector
-    vtkm::Id3 meshSize;
-    vtkm::worklet::contourtree_augmented::GetPointDimensions temp;
-    temp(inDataSet.GetCellSet(), meshSize);
-    dims[0] = meshSize[0];
-    dims[1] = meshSize[1];
-    dims[2] = meshSize[2];
-    // TODO/FIXME: The following is commented out since it creates a a warning that
-    // AsVirtual() will no longer be supported. Since this implementation is
-    // incomplete anyway, it currently makes more sense to comment it out than
-    // to fix the warning.
-    // auto tempField = inDataSet.GetField("values").GetData();
-    // values.resize(static_cast<std::size_t>(tempField.GetNumberOfValues()));
-    // auto tempFieldHandle = tempField.AsVirtual<ValueType>().ReadPortal();
-    // for (vtkm::Id i = 0; i < tempField.GetNumberOfValues(); i++)
-    // {
-    //   values[static_cast<std::size_t>(i)] = static_cast<ValueType>(tempFieldHandle.Get(i));
-    // }
-    VTKM_LOG_S(vtkm::cont::LogLevel::Error,
-               "BOV reader not yet support in MPI mode by this example");
-    MPI_Finalize();
-    return EXIT_FAILURE;
-#endif
  }
  else // Read ASCII data input
  {
-    std::cout << "Reading ASCII file" << std::endl;
    std::ifstream inFile(filename);
    if (inFile.bad())
      return 0;
@ -485,7 +573,6 @@ int main(int argc, char* argv[])
    // swap dims order
    std::swap(dims[0], dims[1]);

-#ifndef WITH_MPI // We only need the inDataSet if are not using MPI otherwise we'll constructe a multi-block dataset
    // build the input dataset
    vtkm::cont::DataSetBuilderUniform dsb;
    // 2D data
@ -506,7 +593,6 @@ int main(int argc, char* argv[])
      inDataSet = dsb.Create(vdims);
    }
    inDataSet.AddPointField("values", values);
-#endif
  } // END ASCII Read

  // Print the mesh metadata
@ -538,104 +624,49 @@ int main(int argc, char* argv[])
 #ifndef WITH_MPI                              // construct regular, single-block VTK-M input dataset
  vtkm::cont::DataSet useDataSet = inDataSet; // Single block dataset
 #else  // Create a multi-block dataset for multi-block DIY-paralle processing
-  vtkm::cont::PartitionedDataSet useDataSet; // Partitioned variant of the input dataset
-  vtkm::Id3 blocksPerDim =
-    nDims == 3 ? vtkm::Id3(1, 1, numBlocks) : vtkm::Id3(1, numBlocks, 1); // Decompose the data into
+  // Determine split
  vtkm::Id3 globalSize = nDims == 3 ? vtkm::Id3(static_cast<vtkm::Id>(dims[0]),
                                                static_cast<vtkm::Id>(dims[1]),
                                                static_cast<vtkm::Id>(dims[2]))
                                    : vtkm::Id3(static_cast<vtkm::Id>(dims[0]),
                                                static_cast<vtkm::Id>(dims[1]),
-                                                static_cast<vtkm::Id>(0));
-  vtkm::cont::ArrayHandle<vtkm::Id3> localBlockIndices;
-  vtkm::cont::ArrayHandle<vtkm::Id3> localBlockOrigins;
-  vtkm::cont::ArrayHandle<vtkm::Id3> localBlockSizes;
-  localBlockIndices.Allocate(blocksPerRank);
-  localBlockOrigins.Allocate(blocksPerRank);
-  localBlockSizes.Allocate(blocksPerRank);
-  auto localBlockIndicesPortal = localBlockIndices.WritePortal();
-  auto localBlockOriginsPortal = localBlockOrigins.WritePortal();
-  auto localBlockSizesPortal = localBlockSizes.WritePortal();
-
+                                                static_cast<vtkm::Id>(1));
+  vtkm::Id3 blocksPerDim = ComputeNumberOfBlocksPerAxis(globalSize, numBlocks);
+  vtkm::Id blocksPerRank = numBlocks / size;
+  vtkm::Id numRanksWithExtraBlock = numBlocks % size;
+  vtkm::Id blocksOnThisRank, startBlockNo;
+  if (rank < numRanksWithExtraBlock)
  {
-    vtkm::Id lastDimSize =
-      (nDims == 2) ? static_cast<vtkm::Id>(dims[1]) : static_cast<vtkm::Id>(dims[2]);
-    if (size > (lastDimSize / 2.))
-    {
-      VTKM_LOG_IF_S(vtkm::cont::LogLevel::Error,
-                    rank == 0,
-                    "Number of ranks too large for data. Use " << lastDimSize / 2
-                                                               << "or fewer ranks");
-      MPI_Finalize();
-      return EXIT_FAILURE;
-    }
-    vtkm::Id standardBlockSize = (vtkm::Id)(lastDimSize / numBlocks);
-    vtkm::Id blockSize = standardBlockSize;
-    vtkm::Id blockSliceSize =
-      nDims == 2 ? static_cast<vtkm::Id>(dims[0]) : static_cast<vtkm::Id>((dims[0] * dims[1]));
-    vtkm::Id blockNumValues = blockSize * blockSliceSize;
+    blocksOnThisRank = blocksPerRank + 1;
+    startBlockNo = (blocksPerRank + 1) * rank;
+  }
+  else
+  {
+    blocksOnThisRank = blocksPerRank;
+    startBlockNo = numRanksWithExtraBlock * (blocksPerRank + 1) +
+      (rank - numRanksWithExtraBlock) * blocksPerRank;
+  }

-    vtkm::Id startBlock = blocksPerRank * rank;
-    vtkm::Id endBlock = startBlock + blocksPerRank;
-    for (vtkm::Id blockIndex = startBlock; blockIndex < endBlock; ++blockIndex)
-    {
-      vtkm::Id localBlockIndex = blockIndex - startBlock;
-      vtkm::Id blockStart = blockIndex * blockNumValues;
-      vtkm::Id blockEnd = blockStart + blockNumValues;
-      if (blockIndex < (numBlocks - 1)) // add overlap between regions
-      {
-        blockEnd += blockSliceSize;
-      }
-      else
-      {
-        blockEnd = lastDimSize * blockSliceSize;
-      }
-      vtkm::Id currBlockSize = (vtkm::Id)((blockEnd - blockStart) / blockSliceSize);
+  if (blocksOnThisRank != 1)
+  {
+    std::cerr << "Currently only one block per rank supported!";
+    MPI_Finalize();
+    return EXIT_FAILURE;
+  }

-      vtkm::cont::DataSetBuilderUniform dsb;
-      vtkm::cont::DataSet ds;
+  // Created partitioned (split) data set
+  vtkm::cont::PartitionedDataSet useDataSet;
+  vtkm::cont::ArrayHandle<vtkm::Id3> localBlockIndices;
+  localBlockIndices.Allocate(blocksPerRank);
+  auto localBlockIndicesPortal = localBlockIndices.WritePortal();

-      // 2D data
-      if (nDims == 2)
-      {
-        vtkm::Id2 vdims;
-        vdims[0] = static_cast<vtkm::Id>(dims[0]);
-        vdims[1] = static_cast<vtkm::Id>(currBlockSize);
-        vtkm::Vec<ValueType, 2> origin(0, blockIndex * blockSize);
-        vtkm::Vec<ValueType, 2> spacing(1, 1);
-        ds = dsb.Create(vdims, origin, spacing);
-
-        localBlockIndicesPortal.Set(localBlockIndex, vtkm::Id3(blockIndex, 0, 0));
-        localBlockOriginsPortal.Set(localBlockIndex,
-                                    vtkm::Id3((blockStart / blockSliceSize), 0, 0));
-        localBlockSizesPortal.Set(localBlockIndex,
-                                  vtkm::Id3(currBlockSize, static_cast<vtkm::Id>(dims[0]), 0));
-      }
-      // 3D data
-      else
-      {
-        vtkm::Id3 vdims;
-        vdims[0] = static_cast<vtkm::Id>(dims[1]);
-        vdims[1] = static_cast<vtkm::Id>(dims[0]);
-        vdims[2] = static_cast<vtkm::Id>(currBlockSize);
-        vtkm::Vec<ValueType, 3> origin(0, 0, (blockIndex * blockSize));
-        vtkm::Vec<ValueType, 3> spacing(1, 1, 1);
-        ds = dsb.Create(vdims, origin, spacing);
-
-        localBlockIndicesPortal.Set(localBlockIndex, vtkm::Id3(0, 0, blockIndex));
-        localBlockOriginsPortal.Set(localBlockIndex,
-                                    vtkm::Id3(0, 0, (blockStart / blockSliceSize)));
-        localBlockSizesPortal.Set(
-          localBlockIndex,
-          vtkm::Id3(static_cast<vtkm::Id>(dims[0]), static_cast<vtkm::Id>(dims[1]), currBlockSize));
-      }
-
-      std::vector<vtkm::Float32> subValues((values.begin() + blockStart),
-                                           (values.begin() + blockEnd));
-
-      ds.AddPointField("values", subValues);
-      useDataSet.AppendPartition(ds);
-    }
+  for (vtkm::Id blockNo = 0; blockNo < blocksOnThisRank; ++blockNo)
+  {
+    vtkm::Id3 blockOrigin, blockSize, blockIndex;
+    std::tie(blockIndex, blockOrigin, blockSize) =
+      ComputeBlockExtents(globalSize, blocksPerDim, startBlockNo + blockNo);
+    useDataSet.AppendPartition(CreateSubDataSet(inDataSet, blockOrigin, blockSize, "values"));
+    localBlockIndicesPortal.Set(blockNo, blockIndex);
  }
 #endif // WITH_MPI construct input dataset

@ -644,11 +675,11 @@ int main(int argc, char* argv[])
  prevTime = currTime;

  // Convert the mesh of values into contour tree, pairs of vertex ids
-  vtkm::filter::ContourTreeAugmented filter(useMarchingCubes, computeRegularStructure);
+  vtkm::filter::scalar_topology::ContourTreeAugmented filter(useMarchingCubes,
+                                                             computeRegularStructure);

 #ifdef WITH_MPI
-  filter.SetSpatialDecomposition(
-    blocksPerDim, globalSize, localBlockIndices, localBlockOrigins, localBlockSizes);
+  filter.SetBlockIndices(blocksPerDim, localBlockIndices);
 #endif
  filter.SetActiveField("values");

--- a/examples/contour_tree_distributed/BranchCompilerApp.cxx
+++ b/examples/contour_tree_distributed/BranchCompilerApp.cxx
@ -0,0 +1,77 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2014 UT-Battelle, LLC.
+//  Copyright 2014 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+// Copyright (c) 2018, The Regents of the University of California, through
+// Lawrence Berkeley National Laboratory (subject to receipt of any required approvals
+// from the U.S. Dept. of Energy).  All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// (1) Redistributions of source code must retain the above copyright notice, this
+//     list of conditions and the following disclaimer.
+//
+// (2) Redistributions in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+// (3) Neither the name of the University of California, Lawrence Berkeley National
+//     Laboratory, U.S. Dept. of Energy nor the names of its contributors may be
+//     used to endorse or promote products derived from this software without
+//     specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+// IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+// OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+// OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//=============================================================================
+//
+// COMMENTS:
+//
+// Input is assumed to be a sequence of lines of the form:
+//	I	Global ID of branch root
+//	II	Value of supernode
+//	III	Global ID of supernode
+//
+//	All lines are assumed to have been sorted already.  Because of how the
+//      Unix sort utility operates (textual sort), the most we can assume is that all
+//      supernodes corresponding to a given branch root are sorted together.
+//
+//	We therefore do simple stream processing, identifying new branches by
+//      the changes in root ID.
+//
+//=======================================================================================
+
+#include <vtkm/filter/scalar_topology/worklet/contourtree_distributed/BranchCompiler.h>
+
+int main()
+{ // main()
+  vtkm::worklet::contourtree_distributed::BranchCompiler compiler;
+
+  compiler.Parse(std::cin);
+  compiler.Print(std::cout);
+  return 0;
+} // main()
--- a/examples/contour_tree_distributed/CMakeLists.txt
+++ b/examples/contour_tree_distributed/CMakeLists.txt
@ -81,10 +81,16 @@ if (VTKm_ENABLE_MPI)
  target_link_libraries(TreeCompiler vtkm_filter)
  vtkm_add_target_information(TreeCompiler DROP_UNUSED_SYMBOLS)

+  add_executable(BranchCompiler BranchCompilerApp.cxx)
+  target_link_libraries(BranchCompiler vtkm_filter)
+  vtkm_add_target_information(BranchCompiler DROP_UNUSED_SYMBOLS)
+
  configure_file(split_data_2d.py split_data_2d.py COPYONLY)
  configure_file(split_data_3d.py split_data_3d.py COPYONLY)
  configure_file(hact_test.sh hact_test.sh COPYONLY)
  configure_file(hact_test_volume.sh hact_test_volume.sh COPYONLY)
+  configure_file(hact_test_branch_decomposition.sh hact_test_branch_decomposition.sh COPYONLY)
+  configure_file(testrun_branch_decomposition.sh testrun_branch_decomposition.sh COPYONLY)
  configure_file(testrun.sh testrun.sh COPYONLY)
  configure_file(testrun_volume.sh testrun_volume.sh COPYONLY)
 endif()
--- a/examples/contour_tree_distributed/ContourTreeApp.cxx
+++ b/examples/contour_tree_distributed/ContourTreeApp.cxx
@ -71,12 +71,14 @@
 #include <vtkm/cont/Timer.h>
 #include <vtkm/io/BOVDataSetReader.h>

-#include <vtkm/filter/ContourTreeUniformDistributed.h>
-#include <vtkm/worklet/contourtree_augmented/PrintVectors.h>
-#include <vtkm/worklet/contourtree_augmented/ProcessContourTree.h>
-#include <vtkm/worklet/contourtree_augmented/Types.h>
-#include <vtkm/worklet/contourtree_distributed/HierarchicalContourTree.h>
-#include <vtkm/worklet/contourtree_distributed/TreeCompiler.h>
+#include <vtkm/filter/scalar_topology/ContourTreeUniformDistributed.h>
+#include <vtkm/filter/scalar_topology/DistributedBranchDecompositionFilter.h>
+#include <vtkm/filter/scalar_topology/worklet/branch_decomposition/HierarchicalVolumetricBranchDecomposer.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_augmented/PrintVectors.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_augmented/ProcessContourTree.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_augmented/Types.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_distributed/HierarchicalContourTree.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_distributed/TreeCompiler.h>

 // clang-format off
 VTKM_THIRDPARTY_PRE_INCLUDE
@ -203,6 +205,19 @@ int main(int argc, char* argv[])
    augmentHierarchicalTree = true;
  }

+  bool computeHierarchicalVolumetricBranchDecomposition = false;
+  if (parser.hasOption("--computeVolumeBranchDecomposition"))
+  {
+    computeHierarchicalVolumetricBranchDecomposition = true;
+    if (!augmentHierarchicalTree)
+    {
+      VTKM_LOG_S(vtkm::cont::LogLevel::Warn,
+                 "Warning: --computeVolumeBranchDecomposition only "
+                 "allowed augmentation. Enabling --augmentHierarchicalTree option.");
+      augmentHierarchicalTree = true;
+    }
+  }
+
  bool useBoundaryExtremaOnly = true;
  if (parser.hasOption("--useFullBoundary"))
  {
@ -261,7 +276,7 @@ int main(int argc, char* argv[])
  {
    if (rank == 0)
    {
-      std::cout << "ContourTreeAugmented <options> <fileName>" << std::endl;
+      std::cout << "ContourTreeDistributed <options> <fileName>" << std::endl;
      std::cout << std::endl;
      std::cout << "<fileName>       Name of the input data file." << std::endl;
      std::cout << "The file is expected to be ASCII with either: " << std::endl;
@ -284,6 +299,9 @@ int main(int argc, char* argv[])
                << std::endl;
      std::cout << "                 and when using only boundary extrema." << std::endl;
      std::cout << "--augmentHierarchicalTree Augment the hierarchical tree." << std::endl;
+      std::cout << "--computeVolumeBranchDecomposition Compute the volume branch decomposition. "
+                << std::endl;
+      std::cout << "                 Requries --augmentHierarchicalTree to be set." << std::endl;
      std::cout << "--preSplitFiles  Input data is already pre-split into blocks." << std::endl;
      std::cout << "--saveDot        Save DOT files of the distributed contour tree " << std::endl
                << "                 computation (Default=False). " << std::endl;
@ -311,6 +329,9 @@ int main(int argc, char* argv[])
                 << "    mc=" << useMarchingCubes << std::endl
                 << "    useFullBoundary=" << !useBoundaryExtremaOnly << std::endl
                 << "    saveDot=" << saveDotFiles << std::endl
+                 << "    augmentHierarchicalTree=" << augmentHierarchicalTree << std::endl
+                 << "    computeVolumetricBranchDecomposition="
+                 << computeHierarchicalVolumetricBranchDecomposition << std::endl
                 << "    saveOutputData=" << saveOutputData << std::endl
                 << "    forwardSummary=" << forwardSummary << std::endl
                 << "    nblocks=" << numBlocks << std::endl);
@ -380,14 +401,8 @@ int main(int argc, char* argv[])
  vtkm::Id3 globalSize;
  vtkm::Id3 blocksPerDim;
  vtkm::cont::ArrayHandle<vtkm::Id3> localBlockIndices;
-  vtkm::cont::ArrayHandle<vtkm::Id3> localBlockOrigins;
-  vtkm::cont::ArrayHandle<vtkm::Id3> localBlockSizes;
  localBlockIndices.Allocate(blocksPerRank);
-  localBlockOrigins.Allocate(blocksPerRank);
-  localBlockSizes.Allocate(blocksPerRank);
  auto localBlockIndicesPortal = localBlockIndices.WritePortal();
-  auto localBlockOriginsPortal = localBlockOrigins.WritePortal();
-  auto localBlockSizesPortal = localBlockSizes.WritePortal();

  // Read the pre-split data files
  if (preSplitFiles)
@ -574,6 +589,11 @@ int main(int argc, char* argv[])
                                                static_cast<ValueType>(offset[1]) };
        const vtkm::Vec<ValueType, 2> v_spacing{ 1, 1 };
        ds = dsb.Create(v_dims, v_origin, v_spacing);
+        vtkm::cont::CellSetStructured<2> cs;
+        cs.SetPointDimensions(v_dims);
+        cs.SetGlobalPointDimensions(vtkm::Id2{ globalSize[0], globalSize[1] });
+        cs.SetGlobalPointIndexStart(vtkm::Id2{ offset[0], offset[1] });
+        ds.SetCellSet(cs);
      }
      else
      {
@ -586,6 +606,11 @@ int main(int argc, char* argv[])
                                                static_cast<ValueType>(offset[2]) };
        vtkm::Vec<ValueType, 3> v_spacing(1, 1, 1);
        ds = dsb.Create(v_dims, v_origin, v_spacing);
+        vtkm::cont::CellSetStructured<3> cs;
+        cs.SetPointDimensions(v_dims);
+        cs.SetGlobalPointDimensions(globalSize);
+        cs.SetGlobalPointIndexStart(vtkm::Id3{ offset[0], offset[1], offset[2] });
+        ds.SetCellSet(cs);
      }
      ds.AddPointField("values", values);
      // and add to partition
@ -596,14 +621,6 @@ int main(int argc, char* argv[])
        vtkm::Id3{ static_cast<vtkm::Id>(blockIndex[0]),
                   static_cast<vtkm::Id>(blockIndex[1]),
                   static_cast<vtkm::Id>(nDims == 3 ? blockIndex[2] : 0) });
-      localBlockOriginsPortal.Set(blockNo,
-                                  vtkm::Id3{ static_cast<vtkm::Id>(offset[0]),
-                                             static_cast<vtkm::Id>(offset[1]),
-                                             static_cast<vtkm::Id>(nDims == 3 ? offset[2] : 0) });
-      localBlockSizesPortal.Set(blockNo,
-                                vtkm::Id3{ static_cast<vtkm::Id>(dims[0]),
-                                           static_cast<vtkm::Id>(dims[1]),
-                                           static_cast<vtkm::Id>(nDims == 3 ? dims[2] : 0) });

      if (blockNo == 0)
      {
@ -732,7 +749,6 @@ int main(int argc, char* argv[])
                            : vtkm::Id3(static_cast<vtkm::Id>(dims[0]),
                                        static_cast<vtkm::Id>(dims[1]),
                                        static_cast<vtkm::Id>(1));
-    std::cout << blocksPerDim << " " << globalSize << std::endl;
    {
      vtkm::Id lastDimSize =
        (nDims == 2) ? static_cast<vtkm::Id>(dims[1]) : static_cast<vtkm::Id>(dims[2]);
@ -780,12 +796,12 @@ int main(int argc, char* argv[])
          vtkm::Vec<ValueType, 2> origin(0, blockIndex * blockSize);
          vtkm::Vec<ValueType, 2> spacing(1, 1);
          ds = dsb.Create(vdims, origin, spacing);
-
+          vtkm::cont::CellSetStructured<2> cs;
+          cs.SetPointDimensions(vdims);
+          cs.SetGlobalPointDimensions(vtkm::Id2{ globalSize[0], globalSize[1] });
+          cs.SetGlobalPointIndexStart(vtkm::Id2{ 0, (blockStart / blockSliceSize) });
+          ds.SetCellSet(cs);
          localBlockIndicesPortal.Set(localBlockIndex, vtkm::Id3(0, blockIndex, 0));
-          localBlockOriginsPortal.Set(localBlockIndex,
-                                      vtkm::Id3(0, (blockStart / blockSliceSize), 0));
-          localBlockSizesPortal.Set(localBlockIndex,
-                                    vtkm::Id3(static_cast<vtkm::Id>(dims[0]), currBlockSize, 0));
        }
        // 3D data
        else
@ -797,14 +813,12 @@ int main(int argc, char* argv[])
          vtkm::Vec<ValueType, 3> origin(0, 0, (blockIndex * blockSize));
          vtkm::Vec<ValueType, 3> spacing(1, 1, 1);
          ds = dsb.Create(vdims, origin, spacing);
-
+          vtkm::cont::CellSetStructured<3> cs;
+          cs.SetPointDimensions(vdims);
+          cs.SetGlobalPointDimensions(globalSize);
+          cs.SetGlobalPointIndexStart(vtkm::Id3(0, 0, blockStart / blockSliceSize));
+          ds.SetCellSet(cs);
          localBlockIndicesPortal.Set(localBlockIndex, vtkm::Id3(0, 0, blockIndex));
-          localBlockOriginsPortal.Set(localBlockIndex,
-                                      vtkm::Id3(0, 0, (blockStart / blockSliceSize)));
-          localBlockSizesPortal.Set(localBlockIndex,
-                                    vtkm::Id3(static_cast<vtkm::Id>(dims[0]),
-                                              static_cast<vtkm::Id>(dims[1]),
-                                              currBlockSize));
        }

        std::vector<vtkm::Float32> subValues((values.begin() + blockStart),
@ -842,17 +856,13 @@ int main(int argc, char* argv[])
  prevTime = currTime;

  // Convert the mesh of values into contour tree, pairs of vertex ids
-  vtkm::filter::ContourTreeUniformDistributed filter(blocksPerDim,
-                                                     globalSize,
-                                                     localBlockIndices,
-                                                     localBlockOrigins,
-                                                     localBlockSizes,
-                                                     useBoundaryExtremaOnly,
-                                                     useMarchingCubes,
-                                                     augmentHierarchicalTree,
-                                                     saveDotFiles,
-                                                     timingsLogLevel,
-                                                     treeLogLevel);
+  vtkm::filter::scalar_topology::ContourTreeUniformDistributed filter(timingsLogLevel,
+                                                                      treeLogLevel);
+  filter.SetBlockIndices(blocksPerDim, localBlockIndices);
+  filter.SetUseBoundaryExtremaOnly(useBoundaryExtremaOnly);
+  filter.SetUseMarchingCubes(useMarchingCubes);
+  filter.SetAugmentHierarchicalTree(augmentHierarchicalTree);
+  filter.SetSaveDotFiles(saveDotFiles);
  filter.SetActiveField("values");

  // Execute the contour tree analysis
@ -872,35 +882,56 @@ int main(int argc, char* argv[])
  {
    if (augmentHierarchicalTree)
    {
-      for (vtkm::Id ds_no = 0; ds_no < result.GetNumberOfPartitions(); ++ds_no)
+      if (computeHierarchicalVolumetricBranchDecomposition)
      {
-        auto ds = result.GetPartition(ds_no);
-        vtkm::worklet::contourtree_augmented::IdArrayType supernodes;
-        ds.GetField("Supernodes").GetData().AsArrayHandle(supernodes);
-        vtkm::worklet::contourtree_augmented::IdArrayType superarcs;
-        ds.GetField("Superarcs").GetData().AsArrayHandle(superarcs);
-        vtkm::worklet::contourtree_augmented::IdArrayType regularNodeGlobalIds;
-        ds.GetField("RegularNodeGlobalIds").GetData().AsArrayHandle(regularNodeGlobalIds);
-        vtkm::Id totalVolume = globalSize[0] * globalSize[1] * globalSize[2];
-        vtkm::worklet::contourtree_augmented::IdArrayType intrinsicVolume;
-        ds.GetField("IntrinsicVolume").GetData().AsArrayHandle(intrinsicVolume);
-        vtkm::worklet::contourtree_augmented::IdArrayType dependentVolume;
-        ds.GetField("DependentVolume").GetData().AsArrayHandle(dependentVolume);
+        vtkm::filter::scalar_topology::DistributedBranchDecompositionFilter bd_filter;
+        auto bd_result = bd_filter.Execute(result);

-        std::string dumpVolumesString =
-          vtkm::worklet::contourtree_distributed::HierarchicalContourTree<ValueType>::DumpVolumes(
-            supernodes,
-            superarcs,
-            regularNodeGlobalIds,
-            totalVolume,
-            intrinsicVolume,
-            dependentVolume);
+        for (vtkm::Id ds_no = 0; ds_no < result.GetNumberOfPartitions(); ++ds_no)
+        {
+          auto ds = bd_result.GetPartition(ds_no);
+          std::string branchDecompositionFileName = std::string("BranchDecomposition_Rank_") +
+            std::to_string(static_cast<int>(rank)) + std::string("_Block_") +
+            std::to_string(static_cast<int>(ds_no)) + std::string(".txt");

-        std::string volumesFileName = std::string("TreeWithVolumes_Rank_") +
-          std::to_string(static_cast<int>(rank)) + std::string("_Block_") +
-          std::to_string(static_cast<int>(ds_no)) + std::string(".txt");
-        std::ofstream treeStream(volumesFileName.c_str());
-        treeStream << dumpVolumesString;
+          std::ofstream treeStream(branchDecompositionFileName.c_str());
+          treeStream
+            << vtkm::filter::scalar_topology::HierarchicalVolumetricBranchDecomposer::PrintBranches(
+                 ds);
+        }
+      }
+      else
+      {
+        for (vtkm::Id ds_no = 0; ds_no < result.GetNumberOfPartitions(); ++ds_no)
+        {
+          auto ds = result.GetPartition(ds_no);
+          vtkm::worklet::contourtree_augmented::IdArrayType supernodes;
+          ds.GetField("Supernodes").GetData().AsArrayHandle(supernodes);
+          vtkm::worklet::contourtree_augmented::IdArrayType superarcs;
+          ds.GetField("Superarcs").GetData().AsArrayHandle(superarcs);
+          vtkm::worklet::contourtree_augmented::IdArrayType regularNodeGlobalIds;
+          ds.GetField("RegularNodeGlobalIds").GetData().AsArrayHandle(regularNodeGlobalIds);
+          vtkm::Id totalVolume = globalSize[0] * globalSize[1] * globalSize[2];
+          vtkm::worklet::contourtree_augmented::IdArrayType intrinsicVolume;
+          ds.GetField("IntrinsicVolume").GetData().AsArrayHandle(intrinsicVolume);
+          vtkm::worklet::contourtree_augmented::IdArrayType dependentVolume;
+          ds.GetField("DependentVolume").GetData().AsArrayHandle(dependentVolume);
+
+          std::string dumpVolumesString =
+            vtkm::worklet::contourtree_distributed::HierarchicalContourTree<ValueType>::DumpVolumes(
+              supernodes,
+              superarcs,
+              regularNodeGlobalIds,
+              totalVolume,
+              intrinsicVolume,
+              dependentVolume);
+
+          std::string volumesFileName = std::string("TreeWithVolumes_Rank_") +
+            std::to_string(static_cast<int>(rank)) + std::string("_Block_") +
+            std::to_string(static_cast<int>(ds_no)) + std::string(".txt");
+          std::ofstream treeStream(volumesFileName.c_str());
+          treeStream << dumpVolumesString;
+        }
      }
    }
    else
--- a/examples/contour_tree_distributed/TreeCompilerApp.cxx
+++ b/examples/contour_tree_distributed/TreeCompilerApp.cxx
@ -61,7 +61,7 @@
 //==============================================================================

 #include <stdio.h>
-#include <vtkm/worklet/contourtree_distributed/TreeCompiler.h>
+#include <vtkm/filter/scalar_topology/worklet/contourtree_distributed/TreeCompiler.h>

 // main routine
 int main(int argc, char** argv)
--- a/examples/contour_tree_distributed/hact_test_branch_decomposition.sh
+++ b/examples/contour_tree_distributed/hact_test_branch_decomposition.sh
@ -0,0 +1,44 @@
+#!/bin/sh
+GTCT_DIR=${GTCT_DIR:-${HOME}/devel/parallel-peak-pruning/ContourTree/SweepAndMergeSerial/out}
+RED=""
+GREEN=""
+NC=""
+if [ -t 1 ]; then
+# If stdout is a terminal, color Pass and FAIL green and red, respectively
+    RED=$(tput setaf 1)
+    GREEN=$(tput setaf 2)
+    NC=$(tput sgr0)
+fi
+
+echo "Removing previously generated files"
+rm *.log *.dat
+
+echo "Copying target file "$1 "into current directory"
+filename=${1##*/}
+fileroot=${filename%.txt}
+
+cp $1 ${filename}
+
+echo "Splitting data into "$2" x "$2" parts"
+./split_data_2d.py ${filename} $2
+rm ${filename}
+
+echo "Running HACT"
+n_parts=$(($2*$2))
+# mpirun -np 4 --oversubscribe ./ContourTree_Distributed --vtkm-device Any --preSplitFiles --saveOutputData --augmentHierarchicalTree --computeVolumeBranchDecomposition --numBlocks=${n_parts} ${fileroot}_part_%d_of_${n_parts}.txt
+mpirun -np 2 --oversubscribe ./ContourTree_Distributed --vtkm-device Any --preSplitFiles --saveOutputData --augmentHierarchicalTree --computeVolumeBranchDecomposition --numBlocks=${n_parts} ${fileroot}_part_%d_of_${n_parts}.txt
+rm ${fileroot}_part_*_of_${n_parts}.txt
+
+echo "Compiling Outputs"
+sort -u BranchDecomposition_Rank_*.txt > outsort${fileroot}_$2x$2.txt
+cat outsort${fileroot}_$2x$2.txt | ./BranchCompiler | sort > bcompile${fileroot}_$2x$2.txt
+rm BranchDecomposition_Rank_*.txt outsort${fileroot}_$2x$2.txt
+
+echo "Diffing"
+echo diff bcompile${fileroot}_$2x$2.txt ${GTCT_DIR}/branch_decomposition_volume_hybrid_${fileroot}.txt
+diff bcompile${fileroot}_$2x$2.txt ${GTCT_DIR}/branch_decomposition_volume_hybrid_${fileroot}.txt
+
+if test $? -eq 0; then echo "${GREEN}Pass${NC}"; rm bcompile${fileroot}_$2x$2.txt; else echo "${RED}FAIL${NC}"; fi;
+
+# echo "Generating Dot files"
+# ./makedot.sh
--- a/examples/contour_tree_distributed/testrun_branch_decomposition.sh
+++ b/examples/contour_tree_distributed/testrun_branch_decomposition.sh
@ -0,0 +1,99 @@
+#!/bin/sh
+
+mkdir -p out
+DATA_DIR=${DATA_DIR:-${HOME}/devel/parallel-peak-pruning/Data/2D}
+
+if [ ! -d  $DATA_DIR ]; then
+    echo "Error: Directory  $DATA_DIR does not exist!"
+    exit 1;
+fi;
+
+echo
+echo "Starting Timing Runs"
+echo
+echo "8x9 Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/8x9test.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/8x9test.txt 4
+# ./hact_test_branch_decomposition.sh $DATA_DIR/8x9test.txt 8
+echo
+echo "Vancouver Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vanc.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vanc.txt 4
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vanc.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vanc.txt 16
+echo
+echo "Vancouver SWSW Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWSW.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWSW.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWSW.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWSW.txt 16
+echo
+echo "Vancouver SWNW Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWNW.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWNW.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWNW.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWNW.txt 16
+echo
+echo "Vancouver SWSE Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWSE.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWSE.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWSE.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWSE.txt 16
+echo
+echo "Vancouver SWNE Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWNE.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWNE.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWNE.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSWNE.txt 16
+echo
+echo "Vancouver NE Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverNE.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverNE.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverNE.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vancouverNE.txt 16
+echo
+echo "Vancouver NW Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverNW.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverNW.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverNW.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vancouverNW.txt 16
+echo
+echo "Vancouver SE Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSE.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSE.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSE.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSE.txt 16
+echo
+echo "Vancouver SW Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSW.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSW.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSW.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/vancouverSW.txt 16
+echo
+echo "Icefields Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/icefield.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/icefield.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/icefield.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/icefield.txt 16
+# ./hact_test_branch_decomposition.sh $DATA_DIR/icefield.txt 32
+# ./hact_test_branch_decomposition.sh $DATA_DIR/icefield.txt 64
+echo
+echo "GTOPO30 Full Tiny Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/gtopo_full_tiny.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/gtopo_full_tiny.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/gtopo_full_tiny.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo_full_tiny.txt 16
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo_full_tiny.txt 32
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo_full_tiny.txt 64
+echo
+echo "GTOPO30 UK Tile Test Set"
+./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 2
+./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 4
+./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 8
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 16
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 32
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 64
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 128
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 256
+# ./hact_test_branch_decomposition.sh $DATA_DIR/gtopo30w020n40.txt 512
+echo "Done"
--- a/examples/demo/CMakeLists.txt
+++ b/examples/demo/CMakeLists.txt
@ -16,7 +16,4 @@ find_package(VTKm REQUIRED QUIET)
 if(TARGET vtkm_rendering)
  add_executable(Demo Demo.cxx)
  target_link_libraries(Demo PRIVATE vtkm_filter vtkm_rendering vtkm_source)
-  vtkm_add_target_information(Demo
-                              DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
-                              DEVICE_SOURCES Demo.cxx)
 endif()
--- a/examples/game_of_life/GameOfLife.cxx
+++ b/examples/game_of_life/GameOfLife.cxx
@ -19,13 +19,14 @@

 #include <vtkm/cont/ArrayHandle.h>
 #include <vtkm/cont/ArrayHandleCounting.h>
+#include <vtkm/cont/CellSetStructured.h>
 #include <vtkm/cont/DataSetBuilderUniform.h>
 #include <vtkm/cont/Initialize.h>
 #include <vtkm/cont/Timer.h>

 #include <vtkm/interop/TransferToOpenGL.h>

-#include <vtkm/filter/FilterDataSet.h>
+#include <vtkm/filter/NewFilterField.h>
 #include <vtkm/worklet/WorkletPointNeighborhood.h>

 #include <vtkm/cont/Invoker.h>
@ -48,11 +49,6 @@

 #include "LoadShaders.h"

-struct GameOfLifePolicy : public vtkm::filter::PolicyBase<GameOfLifePolicy>
-{
-  using FieldTypeList = vtkm::List<vtkm::UInt8, vtkm::Vec4ui_8>;
-};
-
 struct UpdateLifeState : public vtkm::worklet::WorkletPointNeighborhood
 {
  using CountingHandle = vtkm::cont::ArrayHandleCounting<vtkm::Id>;
@ -99,44 +95,33 @@ struct UpdateLifeState : public vtkm::worklet::WorkletPointNeighborhood
 };


-class GameOfLife : public vtkm::filter::FilterDataSet<GameOfLife>
+class GameOfLife : public vtkm::filter::NewFilterField
 {
 public:
-  template <typename Policy>
-  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& input,
-                                          vtkm::filter::PolicyBase<Policy> policy)
+  VTKM_CONT GameOfLife() { this->SetActiveField("state", vtkm::cont::Field::Association::Points); }

+  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& input) override
  {
    vtkm::cont::ArrayHandle<vtkm::UInt8> state;
    vtkm::cont::ArrayHandle<vtkm::UInt8> prevstate;
    vtkm::cont::ArrayHandle<vtkm::Vec4ui_8> colors;

    //get the coordinate system we are using for the 2D area
-    const vtkm::cont::UnknownCellSet cells = input.GetCellSet();
+    vtkm::cont::CellSetStructured<2> cells;
+    input.GetCellSet().AsCellSet(cells);

    //get the previous state of the game
-    input.GetField("state", vtkm::cont::Field::Association::Points).GetData().CopyTo(prevstate);
+    this->GetFieldFromDataSet(input).GetData().AsArrayHandle(prevstate);

    //Update the game state
-    this->Invoke(
-      UpdateLifeState{}, vtkm::filter::ApplyPolicyCellSet(cells, policy), prevstate, state, colors);
+    this->Invoke(UpdateLifeState{}, cells, prevstate, state, colors);

    //save the results
-    vtkm::cont::DataSet output;
-    output.CopyStructure(input);
-
+    vtkm::cont::DataSet output =
+      this->CreateResultFieldPoint(input, this->GetActiveFieldName(), state);
    output.AddField(vtkm::cont::make_FieldPoint("colors", colors));
-    output.AddField(vtkm::cont::make_FieldPoint("state", state));
    return output;
  }
-
-  template <typename DerivedPolicy>
-  VTKM_CONT bool MapFieldOntoOutput(vtkm::cont::DataSet&,
-                                    const vtkm::cont::Field&,
-                                    vtkm::filter::PolicyBase<DerivedPolicy>)
-  {
-    return false;
-  }
 };

 struct UploadData
@ -346,7 +331,7 @@ int main(int argc, char** argv)
  glutDisplayFunc([]() {
    const vtkm::Float32 c = static_cast<vtkm::Float32>(gTimer.GetElapsedTime());

-    vtkm::cont::DataSet oData = gFilter->Execute(*gData, GameOfLifePolicy());
+    vtkm::cont::DataSet oData = gFilter->Execute(*gData);
    gRenderer->render(oData);
    glutSwapBuffers();

--- a/examples/hello_worklet/HelloWorklet.cxx
+++ b/examples/hello_worklet/HelloWorklet.cxx
@ -10,8 +10,7 @@

 #include <vtkm/worklet/WorkletMapField.h>

-#include <vtkm/filter/CreateResult.h>
-#include <vtkm/filter/FilterField.h>
+#include <vtkm/filter/NewFilterField.h>

 #include <vtkm/io/VTKDataSetReader.h>
 #include <vtkm/io/VTKDataSetWriter.h>
@ -23,58 +22,56 @@
 #include <cstdlib>
 #include <iostream>

-namespace vtkm
-{
-namespace worklet
+namespace hello_worklet_example
 {

 struct HelloWorklet : public vtkm::worklet::WorkletMapField
 {
  using ControlSignature = void(FieldIn inVector, FieldOut outMagnitude);

-  VTKM_EXEC void operator()(const vtkm::Vec3f& inVector, vtkm::FloatDefault& outMagnitude) const
+  template <typename T>
+  VTKM_EXEC void operator()(const vtkm::Vec<T, 3>& inVector, T& outMagnitude) const
  {
    outMagnitude = vtkm::Magnitude(inVector);
  }
 };
-}
-} // namespace vtkm::worklet
+
+} // namespace hello_worklet_example

 namespace vtkm
 {
 namespace filter
 {

-class HelloField : public vtkm::filter::FilterField<HelloField>
+class HelloField : public vtkm::filter::NewFilterField
 {
 public:
-  // Specify that this filter operates on 3-vectors
-  using SupportedTypes = vtkm::TypeListFieldVec3;
-
-  template <typename FieldType, typename Policy>
-  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inDataSet,
-                                          const FieldType& inField,
-                                          const vtkm::filter::FieldMetadata& fieldMetadata,
-                                          vtkm::filter::PolicyBase<Policy>)
+  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inDataSet)
  {
-    VTKM_IS_ARRAY_HANDLE(FieldType);
+    // Input field
+    vtkm::cont::Field inField = this->GetFieldFromDataSet(inDataSet);

-    //construct our output
-    vtkm::cont::ArrayHandle<vtkm::FloatDefault> outField;
+    // Holder for output
+    vtkm::cont::UnknownArrayHandle outArray;

-    //construct our invoker to launch worklets
-    vtkm::worklet::HelloWorklet mag;
-    this->Invoke(mag, inField, outField); //launch mag worklets
+    hello_worklet_example::HelloWorklet mag;
+    auto resolveType = [&](const auto& inputArray) {
+      // use std::decay to remove const ref from the decltype of concrete.
+      using T = typename std::decay_t<decltype(inputArray)>::ValueType::ComponentType;
+      vtkm::cont::ArrayHandle<T> result;
+      this->Invoke(mag, inputArray, result);
+      outArray = result;
+    };

-    //construct output field information
-    if (this->GetOutputFieldName().empty())
+    this->CastAndCallVecField<3>(inField, resolveType);
+
+    std::string outFieldName = this->GetOutputFieldName();
+    if (outFieldName.empty())
    {
-      this->SetOutputFieldName(fieldMetadata.GetName() + "_magnitude");
+      outFieldName = inField.GetName() + "_magnitude";
    }

-    //return the result, which is the input data with the computed field added to it
-    return vtkm::filter::CreateResult(
-      inDataSet, outField, this->GetOutputFieldName(), fieldMetadata);
+    return this->CreateResultField(inDataSet, outFieldName, inField.GetAssociation(), outArray);
  }
 };
 }
--- a/examples/histogram/CMakeLists.txt
+++ b/examples/histogram/CMakeLists.txt
@ -13,9 +13,9 @@ project(Histogram CXX)
 #Find the VTK-m package
 find_package(VTKm REQUIRED QUIET)
 if (VTKm_ENABLE_MPI)
-  add_executable(Histogram Histogram.cxx HistogramMPI.h HistogramMPI.hxx)
+  add_executable(Histogram Histogram.cxx HistogramMPI.h HistogramMPI.cxx)
  target_link_libraries(Histogram PRIVATE vtkm_filter MPI::MPI_CXX)
  vtkm_add_target_information(Histogram
-                              DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
-                              DEVICE_SOURCES Histogram.cxx)
+    DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
+    DEVICE_SOURCES HistogramMPI.cxx)
 endif()
--- a/examples/histogram/Histogram.cxx
+++ b/examples/histogram/Histogram.cxx
@ -64,9 +64,8 @@ int main(int argc, char* argv[])
  // tell VTK-m the communicator to use.
  vtkm::cont::EnvironmentTracker::SetCommunicator(world);

-  int rank, size;
-  MPI_Comm_rank(vtkmdiy::mpi::mpi_cast(world.handle()), &rank);
-  MPI_Comm_size(vtkmdiy::mpi::mpi_cast(world.handle()), &size);
+  int rank = world.rank();
+  int size = world.size();

  if (argc != 2)
  {
--- a/examples/histogram/HistogramMPI.cxx
+++ b/examples/histogram/HistogramMPI.cxx
@ -8,6 +8,8 @@
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================

+#include "HistogramMPI.h"
+
 #include <vtkm/filter/density_estimate/worklet/FieldHistogram.h>

 #include <vtkm/cont/Algorithm.h>
@ -102,53 +104,69 @@ public:
 } // namespace detail

 //-----------------------------------------------------------------------------
-inline VTKM_CONT HistogramMPI::HistogramMPI()
-  : NumberOfBins(10)
-  , BinDelta(0)
-  , ComputedRange()
-  , Range()
+VTKM_CONT vtkm::cont::DataSet HistogramMPI::DoExecute(const vtkm::cont::DataSet& input)
 {
-  this->SetOutputFieldName("histogram");
-}
+  const auto& fieldArray = this->GetFieldFromDataSet(input).GetData();

-//-----------------------------------------------------------------------------
-template <typename T, typename StorageType, typename DerivedPolicy>
-inline VTKM_CONT vtkm::cont::DataSet HistogramMPI::DoExecute(
-  const vtkm::cont::DataSet&,
-  const vtkm::cont::ArrayHandle<T, StorageType>& field,
-  const vtkm::filter::FieldMetadata&,
-  const vtkm::filter::PolicyBase<DerivedPolicy>&)
-{
-  vtkm::cont::ArrayHandle<vtkm::Id> binArray;
-  T delta;
-
-  vtkm::worklet::FieldHistogram worklet;
-  if (this->ComputedRange.IsNonEmpty())
+  if (!this->InExecutePartitions)
  {
-    worklet.Run(field,
+    // Handle initialization that would be done in PreExecute if the data set had partitions.
+    if (this->Range.IsNonEmpty())
+    {
+      this->ComputedRange = this->Range;
+    }
+    else
+    {
+      auto handle = vtkm::cont::FieldRangeGlobalCompute(
+        input, this->GetActiveFieldName(), this->GetActiveFieldAssociation());
+      if (handle.GetNumberOfValues() != 1)
+      {
+        throw vtkm::cont::ErrorFilterExecution("expecting scalar field.");
+      }
+      this->ComputedRange = handle.ReadPortal().Get(0);
+    }
+  }
+
+  vtkm::cont::ArrayHandle<vtkm::Id> binArray;
+
+  auto resolveType = [&](const auto& concrete) {
+    using T = typename std::decay_t<decltype(concrete)>::ValueType;
+    T delta;
+
+    vtkm::worklet::FieldHistogram worklet;
+    worklet.Run(concrete,
                this->NumberOfBins,
                static_cast<T>(this->ComputedRange.Min),
                static_cast<T>(this->ComputedRange.Max),
                delta,
                binArray);
-  }
-  else
-  {
-    worklet.Run(field, this->NumberOfBins, this->ComputedRange, delta, binArray);
-  }

-  this->BinDelta = static_cast<vtkm::Float64>(delta);
+    this->BinDelta = static_cast<vtkm::Float64>(delta);
+  };
+
+  fieldArray
+    .CastAndCallForTypesWithFloatFallback<vtkm::TypeListFieldScalar, VTKM_DEFAULT_STORAGE_LIST>(
+      resolveType);
+
  vtkm::cont::DataSet output;
-  vtkm::cont::Field rfield(
-    this->GetOutputFieldName(), vtkm::cont::Field::Association::WholeMesh, binArray);
-  output.AddField(rfield);
+  output.AddField(
+    { this->GetOutputFieldName(), vtkm::cont::Field::Association::WholeDataSet, binArray });
+
+  // The output is a "summary" of the input, no need to map fields
  return output;
 }

+VTKM_CONT vtkm::cont::PartitionedDataSet HistogramMPI::DoExecutePartitions(
+  const vtkm::cont::PartitionedDataSet& input)
+{
+  this->PreExecute(input);
+  auto result = this->NewFilter::DoExecutePartitions(input);
+  this->PostExecute(input, result);
+  return result;
+}
+
 //-----------------------------------------------------------------------------
-template <typename DerivedPolicy>
-inline VTKM_CONT void HistogramMPI::PreExecute(const vtkm::cont::PartitionedDataSet& input,
-                                               const vtkm::filter::PolicyBase<DerivedPolicy>&)
+inline VTKM_CONT void HistogramMPI::PreExecute(const vtkm::cont::PartitionedDataSet& input)
 {
  if (this->Range.IsNonEmpty())
  {
@ -167,10 +185,8 @@ inline VTKM_CONT void HistogramMPI::PreExecute(const vtkm::cont::PartitionedData
 }

 //-----------------------------------------------------------------------------
-template <typename DerivedPolicy>
 inline VTKM_CONT void HistogramMPI::PostExecute(const vtkm::cont::PartitionedDataSet&,
-                                                vtkm::cont::PartitionedDataSet& result,
-                                                const vtkm::filter::PolicyBase<DerivedPolicy>&)
+                                                vtkm::cont::PartitionedDataSet& result)
 {
  // iterate and compute HistogramMPI for each local block.
  detail::DistributedHistogram helper(result.GetNumberOfPartitions());
@ -182,7 +198,7 @@ inline VTKM_CONT void HistogramMPI::PostExecute(const vtkm::cont::PartitionedDat

  vtkm::cont::DataSet output;
  vtkm::cont::Field rfield(this->GetOutputFieldName(),
-                           vtkm::cont::Field::Association::WholeMesh,
+                           vtkm::cont::Field::Association::WholeDataSet,
                           helper.ReduceAll(this->NumberOfBins));
  output.AddField(rfield);

--- a/examples/histogram/HistogramMPI.h
+++ b/examples/histogram/HistogramMPI.h
@ -10,7 +10,7 @@
 #ifndef vtk_m_examples_histogram_HistogramMPI_h
 #define vtk_m_examples_histogram_HistogramMPI_h

-#include <vtkm/filter/FilterField.h>
+#include <vtkm/filter/NewFilterField.h>

 namespace example
 {
@ -19,7 +19,7 @@ namespace example
 ///
 /// Construct a HistogramMPI with a default of 10 bins.
 ///
-class HistogramMPI : public vtkm::filter::FilterField<HistogramMPI>
+class HistogramMPI : public vtkm::filter::NewFilterField
 {
 public:
  //currently the HistogramMPI filter only works on scalar data.
@ -29,7 +29,7 @@ public:

  //Construct a HistogramMPI with a default of 10 bins
  VTKM_CONT
-  HistogramMPI();
+  HistogramMPI() { this->SetOutputFieldName("histogram"); }

  VTKM_CONT
  void SetNumberOfBins(vtkm::Id count) { this->NumberOfBins = count; }
@ -37,7 +37,7 @@ public:
  VTKM_CONT
  vtkm::Id GetNumberOfBins() const { return this->NumberOfBins; }

-  //@{
+  ///@{
  /// Get/Set the range to use to generate the HistogramMPI. If range is set to
  /// empty, the field's global range (computed using `vtkm::cont::FieldRangeGlobalCompute`)
  /// will be used.
@ -46,7 +46,7 @@ public:

  VTKM_CONT
  const vtkm::Range& GetRange() const { return this->Range; }
-  //@}
+  ///@}

  /// Returns the bin delta of the last computed field.
  VTKM_CONT
@ -58,34 +58,27 @@ public:
  VTKM_CONT
  vtkm::Range GetComputedRange() const { return this->ComputedRange; }

-  template <typename T, typename StorageType, typename DerivedPolicy>
-  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& input,
-                                          const vtkm::cont::ArrayHandle<T, StorageType>& field,
-                                          const vtkm::filter::FieldMetadata& fieldMeta,
-                                          const vtkm::filter::PolicyBase<DerivedPolicy>& policy);
+protected:
+  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& input) override;
+  VTKM_CONT vtkm::cont::PartitionedDataSet DoExecutePartitions(
+    const vtkm::cont::PartitionedDataSet& input) override;

-  //@{
+  ///@{
  /// when operating on vtkm::cont::PartitionedDataSet, we
  /// want to do processing across ranks as well. Just adding pre/post handles
  /// for the same does the trick.
-  template <typename DerivedPolicy>
-  VTKM_CONT void PreExecute(const vtkm::cont::PartitionedDataSet& input,
-                            const vtkm::filter::PolicyBase<DerivedPolicy>& policy);
-
-  template <typename DerivedPolicy>
+  VTKM_CONT void PreExecute(const vtkm::cont::PartitionedDataSet& input);
  VTKM_CONT void PostExecute(const vtkm::cont::PartitionedDataSet& input,
-                             vtkm::cont::PartitionedDataSet& output,
-                             const vtkm::filter::PolicyBase<DerivedPolicy>&);
-  //@}
+                             vtkm::cont::PartitionedDataSet& output);
+  ///@}

 private:
-  vtkm::Id NumberOfBins;
-  vtkm::Float64 BinDelta;
+  vtkm::Id NumberOfBins = 10;
+  vtkm::Float64 BinDelta = 0;
  vtkm::Range ComputedRange;
  vtkm::Range Range;
+  bool InExecutePartitions = false;
 };
 } // namespace example

-#include "HistogramMPI.hxx"
-
 #endif // vtk_m_filter_Histogram_h
--- a/examples/lagrangian/lagrangian.cxx
+++ b/examples/lagrangian/lagrangian.cxx
@ -17,7 +17,7 @@
 #include <vtkm/cont/DataSetBuilderRectilinear.h>
 #include <vtkm/cont/DataSetBuilderUniform.h>
 #include <vtkm/cont/Initialize.h>
-#include <vtkm/filter/Lagrangian.h>
+#include <vtkm/filter/flow/Lagrangian.h>

 using namespace std;

@ -79,7 +79,7 @@ int main(int argc, char** argv)
    vtkm::cont::InitializeOptions::DefaultAnyDevice | vtkm::cont::InitializeOptions::Strict;
  vtkm::cont::Initialize(argc, argv, opts);

-  vtkm::filter::Lagrangian lagrangianFilter;
+  vtkm::filter::flow::Lagrangian lagrangianFilter;
  lagrangianFilter.SetResetParticles(true);
  vtkm::Float32 stepSize = 0.01f;
  lagrangianFilter.SetStepSize(stepSize);
--- a/examples/mesh_quality/CMakeLists.txt
+++ b/examples/mesh_quality/CMakeLists.txt
@ -27,11 +27,3 @@ find_package(VTKm REQUIRED QUIET)

 add_executable(MeshQuality MeshQuality.cxx)
 target_link_libraries(MeshQuality PRIVATE vtkm_filter vtkm_io)
-
-if(TARGET vtkm::tbb)
-  target_compile_definitions(MeshQuality PRIVATE BUILDING_TBB_VERSION)
-endif()
-
-vtkm_add_target_information(MeshQuality
-                            DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
-                            DEVICE_SOURCES MeshQuality.cxx)
--- a/examples/multi_backend/MultiDeviceGradient.cxx
+++ b/examples/multi_backend/MultiDeviceGradient.cxx
@ -8,11 +8,251 @@
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================

-#define vtk_m_examples_multibackend_MultiDeviceGradient_cxx
-
 #include "MultiDeviceGradient.h"
-#include "MultiDeviceGradient.hxx"

-template vtkm::cont::PartitionedDataSet MultiDeviceGradient::PrepareForExecution<
-  vtkm::filter::PolicyDefault>(const vtkm::cont::PartitionedDataSet&,
-                               const vtkm::filter::PolicyBase<vtkm::filter::PolicyDefault>&);
+#include <vtkm/cont/Logging.h>
+#include <vtkm/cont/RuntimeDeviceInformation.h>
+#include <vtkm/cont/RuntimeDeviceTracker.h>
+#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
+#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
+#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
+
+#include <vtkm/filter/vector_analysis/Gradient.h>
+
+namespace
+{
+
+void process_partition_tbb(RuntimeTaskQueue& queue)
+{
+  //Step 1. Set the device adapter to this thread to TBB.
+  //This makes sure that any vtkm::filters used by our
+  //task operate only on TBB. The "global" thread tracker
+  //is actually thread-local, so we can use that.
+  //
+  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(vtkm::cont::DeviceAdapterTagTBB{});
+
+  while (queue.hasTasks())
+  {
+    //Step 2. Get the task to run on TBB
+    auto task = queue.pop();
+
+    //Step 3. Run the task on TBB. We check the validity
+    //of the task since we could be given an empty task
+    //when the queue is empty and we are shutting down
+    if (task != nullptr)
+    {
+      task();
+    }
+
+    //Step 4. Notify the queue that we finished processing this task
+    queue.completedTask();
+    std::cout << "finished a partition on tbb (" << std::this_thread::get_id() << ")" << std::endl;
+  }
+}
+
+void process_partition_openMP(RuntimeTaskQueue& queue)
+{
+  //Step 1. Set the device adapter to this thread to openMP.
+  //This makes sure that any vtkm::filters used by our
+  //task operate only on openMP. The "global" thread tracker
+  //is actually thread-local, so we can use that.
+  //
+  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
+
+  while (queue.hasTasks())
+  {
+    //Step 2. Get the task to run on openMP
+    auto task = queue.pop();
+
+    //Step 3. Run the task on openMP. We check the validity
+    //of the task since we could be given an empty task
+    //when the queue is empty and we are shutting down
+    if (task != nullptr)
+    {
+      task();
+    }
+
+    //Step 4. Notify the queue that we finished processing this task
+    queue.completedTask();
+    std::cout << "finished a partition on openMP (" << std::this_thread::get_id() << ")"
+              << std::endl;
+  }
+}
+
+void process_partition_cuda(RuntimeTaskQueue& queue, int gpuId)
+{
+  //Step 1. Set the device adapter to this thread to cuda.
+  //This makes sure that any vtkm::filters used by our
+  //task operate only on cuda.  The "global" thread tracker
+  //is actually thread-local, so we can use that.
+  //
+  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(vtkm::cont::DeviceAdapterTagCuda{});
+  (void)gpuId;
+
+  while (queue.hasTasks())
+  {
+    //Step 2. Get the task to run on cuda
+    auto task = queue.pop();
+
+    //Step 3. Run the task on cuda. We check the validity
+    //of the task since we could be given an empty task
+    //when the queue is empty and we are shutting down
+    if (task != nullptr)
+    {
+      task();
+    }
+
+    //Step 4. Notify the queue that we finished processing this task
+    queue.completedTask();
+    std::cout << "finished a partition on cuda (" << std::this_thread::get_id() << ")" << std::endl;
+  }
+}
+
+} //namespace
+
+//-----------------------------------------------------------------------------
+VTKM_CONT MultiDeviceGradient::MultiDeviceGradient()
+  : ComputePointGradient(false)
+  , Queue()
+  , Workers()
+{
+  //Step 1. Determine the number of workers we want
+  auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
+  const bool runOnCuda = tracker.CanRunOn(vtkm::cont::DeviceAdapterTagCuda{});
+  const bool runOnOpenMP = tracker.CanRunOn(vtkm::cont::DeviceAdapterTagOpenMP{});
+  const bool runOnTbb = tracker.CanRunOn(vtkm::cont::DeviceAdapterTagTBB{});
+
+  //Note currently the virtual implementation has some issues
+  //In a multi-threaded environment only cuda can be used or
+  //all SMP backends ( Serial, TBB, OpenMP ).
+  //Once this issue is resolved we can enable CUDA + TBB in
+  //this example
+
+  //Step 2. Launch workers that will use cuda (if enabled).
+  //The threads share a queue object so we need to explicitly pass it
+  //by reference (the std::ref call)
+  if (runOnCuda)
+  {
+    std::cout << "adding cuda workers" << std::endl;
+    try
+    {
+      vtkm::Id gpu_count = 0;
+      vtkm::cont::RuntimeDeviceInformation{}
+        .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda{})
+        .GetMaxDevices(gpu_count);
+      for (int i = 0; i < gpu_count; ++i)
+      {
+        //The number of workers per GPU is purely arbitrary currently,
+        //but in general we want multiple of them so we can overlap compute
+        //and transfer
+        this->Workers.emplace_back(std::bind(process_partition_cuda, std::ref(this->Queue), i));
+        this->Workers.emplace_back(std::bind(process_partition_cuda, std::ref(this->Queue), i));
+        this->Workers.emplace_back(std::bind(process_partition_cuda, std::ref(this->Queue), i));
+        this->Workers.emplace_back(std::bind(process_partition_cuda, std::ref(this->Queue), i));
+      }
+    }
+    catch (const vtkm::cont::ErrorBadDevice& err)
+    {
+      VTKM_LOG_S(vtkm::cont::LogLevel::Error,
+                 "Error getting CudaDeviceCount: " << err.GetMessage());
+    }
+  }
+  //Step 3. Launch a worker that will use openMP (if enabled).
+  //The threads share a queue object so we need to explicitly pass it
+  //by reference (the std::ref call)
+  else if (runOnOpenMP)
+  {
+    std::cout << "adding a openMP worker" << std::endl;
+    this->Workers.emplace_back(std::bind(process_partition_openMP, std::ref(this->Queue)));
+  }
+  //Step 4. Launch a worker that will use tbb (if enabled).
+  //The threads share a queue object so we need to explicitly pass it
+  //by reference (the std::ref call)
+  else if (runOnTbb)
+  {
+    std::cout << "adding a tbb worker" << std::endl;
+    this->Workers.emplace_back(std::bind(process_partition_tbb, std::ref(this->Queue)));
+  }
+}
+
+//-----------------------------------------------------------------------------
+VTKM_CONT MultiDeviceGradient::~MultiDeviceGradient()
+{
+  this->Queue.shutdown();
+
+  //shutdown all workers
+  for (auto&& thread : this->Workers)
+  {
+    thread.join();
+  }
+}
+
+//-----------------------------------------------------------------------------
+VTKM_CONT vtkm::cont::PartitionedDataSet MultiDeviceGradient::DoExecutePartitions(
+  const vtkm::cont::PartitionedDataSet& pds)
+{
+  //Step 1. Say that we have no more to submit for this PartitionedDataSet
+  //This is needed to happen for each execute as we want to support
+  //the same filter being used for multiple inputs
+  this->Queue.reset();
+
+  //Step 2. Construct the PartitionedDataSet we are going to fill. The size
+  //signature to PartitionedDataSet just reserves size
+  vtkm::cont::PartitionedDataSet output;
+  output.AppendPartitions(
+    std::vector<vtkm::cont::DataSet>(static_cast<size_t>(pds.GetNumberOfPartitions())));
+  vtkm::cont::PartitionedDataSet* outPtr = &output;
+
+
+  //Step 3. Construct the filter we want to run on each partition
+  vtkm::filter::vector_analysis::Gradient gradient;
+  gradient.SetComputePointGradient(this->GetComputePointGradient());
+  gradient.SetActiveField(this->GetActiveFieldName());
+
+  //Step 3b. Post 1 partition up as work and block until it is
+  //complete. This is needed as currently constructing the virtual
+  //Point Coordinates is not thread safe.
+  auto partition = pds.cbegin();
+  {
+    vtkm::cont::DataSet input = *partition;
+    this->Queue.push( //build a lambda that is the work to do
+      [=]() {
+        vtkm::filter::vector_analysis::Gradient perThreadGrad = gradient;
+
+        vtkm::cont::DataSet result = perThreadGrad.Execute(input);
+        outPtr->ReplacePartition(0, result);
+      });
+    this->Queue.waitForAllTasksToComplete();
+    partition++;
+  }
+
+  vtkm::Id index = 1;
+  for (; partition != pds.cend(); ++partition)
+  {
+    vtkm::cont::DataSet input = *partition;
+    //Step 4. For each input partition construct a lambda
+    //and add it to the queue for workers to take. This
+    //will allows us to have multiple works execute in a non
+    //blocking manner
+    this->Queue.push( //build a lambda that is the work to do
+      [=]() {
+        vtkm::filter::vector_analysis::Gradient perThreadGrad = gradient;
+
+        vtkm::cont::DataSet result = perThreadGrad.Execute(input);
+        outPtr->ReplacePartition(index, result);
+      });
+    index++;
+  }
+
+  // Step 5. Wait on all workers to finish
+  this->Queue.waitForAllTasksToComplete();
+
+  return output;
+}
+
+VTKM_CONT vtkm::cont::DataSet MultiDeviceGradient::DoExecute(const vtkm::cont::DataSet& inData)
+{
+  vtkm::cont::PartitionedDataSet outData = this->Execute(vtkm::cont::PartitionedDataSet(inData));
+  VTKM_ASSERT(outData.GetNumberOfPartitions() == 1);
+  return outData.GetPartition(0);
+}
--- a/examples/multi_backend/MultiDeviceGradient.h
+++ b/examples/multi_backend/MultiDeviceGradient.h
@ -10,7 +10,7 @@
 #ifndef vtk_m_examples_multibackend_MultiDeviceGradient_h
 #define vtk_m_examples_multibackend_MultiDeviceGradient_h

-#include <vtkm/filter/FilterField.h>
+#include <vtkm/filter/NewFilterField.h>

 #include "TaskQueue.h"

@ -22,11 +22,9 @@ using RuntimeTaskQueue = TaskQueue<std::function<void()>>;
 ///
 /// The Policy used with MultiDeviceGradient must include the TBB and CUDA
 /// backends.
-class MultiDeviceGradient : public vtkm::filter::FilterField<MultiDeviceGradient>
+class MultiDeviceGradient : public vtkm::filter::NewFilterField
 {
 public:
-  using SupportedTypes = vtkm::List<vtkm::Float32, vtkm::Float64, vtkm::Vec3f_32, vtkm::Vec3f_64>;
-
  //Construct a MultiDeviceGradient and worker pool
  VTKM_CONT
  MultiDeviceGradient();
@ -43,10 +41,12 @@ public:

  /// Will submit each block to a work queue that the threads will
  /// pull work from
-  template <typename DerivedPolicy>
-  VTKM_CONT vtkm::cont::PartitionedDataSet PrepareForExecution(
-    const vtkm::cont::PartitionedDataSet&,
-    const vtkm::filter::PolicyBase<DerivedPolicy>&);
+  VTKM_CONT vtkm::cont::PartitionedDataSet DoExecutePartitions(
+    const vtkm::cont::PartitionedDataSet& inData) override;
+
+  // All filters must override this method. Our implementation will just wrap this in
+  // a partitioned data set.
+  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inData) override;

 private:
  bool ComputePointGradient;
@ -54,10 +54,4 @@ private:
  std::vector<std::thread> Workers;
 };

-#ifndef vtk_m_examples_multibackend_MultiDeviceGradient_cxx
-extern template vtkm::cont::PartitionedDataSet MultiDeviceGradient::PrepareForExecution<
-  vtkm::filter::PolicyDefault>(const vtkm::cont::PartitionedDataSet&,
-                               const vtkm::filter::PolicyBase<vtkm::filter::PolicyDefault>&);
-#endif
-
 #endif
--- a/examples/multi_backend/MultiDeviceGradient.hxx
+++ b/examples/multi_backend/MultiDeviceGradient.hxx
@ -1,252 +0,0 @@
-//============================================================================
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//============================================================================
-
-#include <vtkm/cont/Logging.h>
-#include <vtkm/cont/RuntimeDeviceInformation.h>
-#include <vtkm/cont/RuntimeDeviceTracker.h>
-#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
-#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
-#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
-
-#include <vtkm/filter/vector_analysis/Gradient.h>
-
-
-namespace
-{
-
-void process_partition_tbb(RuntimeTaskQueue& queue)
-{
-  //Step 1. Set the device adapter to this thread to TBB.
-  //This makes sure that any vtkm::filters used by our
-  //task operate only on TBB. The "global" thread tracker
-  //is actually thread-local, so we can use that.
-  //
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(vtkm::cont::DeviceAdapterTagTBB{});
-
-  while (queue.hasTasks())
-  {
-    //Step 2. Get the task to run on TBB
-    auto task = queue.pop();
-
-    //Step 3. Run the task on TBB. We check the validity
-    //of the task since we could be given an empty task
-    //when the queue is empty and we are shutting down
-    if (task != nullptr)
-    {
-      task();
-    }
-
-    //Step 4. Notify the queue that we finished processing this task
-    queue.completedTask();
-    std::cout << "finished a partition on tbb (" << std::this_thread::get_id() << ")" << std::endl;
-  }
-}
-
-void process_partition_openMP(RuntimeTaskQueue& queue)
-{
-  //Step 1. Set the device adapter to this thread to openMP.
-  //This makes sure that any vtkm::filters used by our
-  //task operate only on openMP. The "global" thread tracker
-  //is actually thread-local, so we can use that.
-  //
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
-
-  while (queue.hasTasks())
-  {
-    //Step 2. Get the task to run on openMP
-    auto task = queue.pop();
-
-    //Step 3. Run the task on openMP. We check the validity
-    //of the task since we could be given an empty task
-    //when the queue is empty and we are shutting down
-    if (task != nullptr)
-    {
-      task();
-    }
-
-    //Step 4. Notify the queue that we finished processing this task
-    queue.completedTask();
-    std::cout << "finished a partition on openMP (" << std::this_thread::get_id() << ")"
-              << std::endl;
-  }
-}
-
-void process_partition_cuda(RuntimeTaskQueue& queue, int gpuId)
-{
-  //Step 1. Set the device adapter to this thread to cuda.
-  //This makes sure that any vtkm::filters used by our
-  //task operate only on cuda.  The "global" thread tracker
-  //is actually thread-local, so we can use that.
-  //
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(vtkm::cont::DeviceAdapterTagCuda{});
-  (void)gpuId;
-
-  while (queue.hasTasks())
-  {
-    //Step 2. Get the task to run on cuda
-    auto task = queue.pop();
-
-    //Step 3. Run the task on cuda. We check the validity
-    //of the task since we could be given an empty task
-    //when the queue is empty and we are shutting down
-    if (task != nullptr)
-    {
-      task();
-    }
-
-    //Step 4. Notify the queue that we finished processing this task
-    queue.completedTask();
-    std::cout << "finished a partition on cuda (" << std::this_thread::get_id() << ")" << std::endl;
-  }
-}
-
-} //namespace
-
-//-----------------------------------------------------------------------------
-VTKM_CONT MultiDeviceGradient::MultiDeviceGradient()
-  : ComputePointGradient(false)
-  , Queue()
-  , Workers()
-{
-  //Step 1. Determine the number of workers we want
-  auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
-  const bool runOnCuda = tracker.CanRunOn(vtkm::cont::DeviceAdapterTagCuda{});
-  const bool runOnOpenMP = tracker.CanRunOn(vtkm::cont::DeviceAdapterTagOpenMP{});
-  const bool runOnTbb = tracker.CanRunOn(vtkm::cont::DeviceAdapterTagTBB{});
-
-  //Note currently the virtual implementation has some issues
-  //In a multi-threaded environment only cuda can be used or
-  //all SMP backends ( Serial, TBB, OpenMP ).
-  //Once this issue is resolved we can enable CUDA + TBB in
-  //this example
-
-  //Step 2. Launch workers that will use cuda (if enabled).
-  //The threads share a queue object so we need to explicitly pass it
-  //by reference (the std::ref call)
-  if (runOnCuda)
-  {
-    std::cout << "adding cuda workers" << std::endl;
-    try
-    {
-      vtkm::Id gpu_count = 0;
-      vtkm::cont::RuntimeDeviceInformation{}
-        .GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda{})
-        .GetMaxDevices(gpu_count);
-      for (int i = 0; i < gpu_count; ++i)
-      {
-        //The number of workers per GPU is purely arbitrary currently,
-        //but in general we want multiple of them so we can overlap compute
-        //and transfer
-        this->Workers.emplace_back(std::bind(process_partition_cuda, std::ref(this->Queue), i));
-        this->Workers.emplace_back(std::bind(process_partition_cuda, std::ref(this->Queue), i));
-        this->Workers.emplace_back(std::bind(process_partition_cuda, std::ref(this->Queue), i));
-        this->Workers.emplace_back(std::bind(process_partition_cuda, std::ref(this->Queue), i));
-      }
-    }
-    catch (const vtkm::cont::ErrorBadDevice& err)
-    {
-      VTKM_LOG_S(vtkm::cont::LogLevel::Error,
-                 "Error getting CudaDeviceCount: " << err.GetMessage());
-    }
-  }
-  //Step 3. Launch a worker that will use openMP (if enabled).
-  //The threads share a queue object so we need to explicitly pass it
-  //by reference (the std::ref call)
-  else if (runOnOpenMP)
-  {
-    std::cout << "adding a openMP worker" << std::endl;
-    this->Workers.emplace_back(std::bind(process_partition_openMP, std::ref(this->Queue)));
-  }
-  //Step 4. Launch a worker that will use tbb (if enabled).
-  //The threads share a queue object so we need to explicitly pass it
-  //by reference (the std::ref call)
-  else if (runOnTbb)
-  {
-    std::cout << "adding a tbb worker" << std::endl;
-    this->Workers.emplace_back(std::bind(process_partition_tbb, std::ref(this->Queue)));
-  }
-}
-
-//-----------------------------------------------------------------------------
-VTKM_CONT MultiDeviceGradient::~MultiDeviceGradient()
-{
-  this->Queue.shutdown();
-
-  //shutdown all workers
-  for (auto&& thread : this->Workers)
-  {
-    thread.join();
-  }
-}
-
-//-----------------------------------------------------------------------------
-template <typename DerivedPolicy>
-inline VTKM_CONT vtkm::cont::PartitionedDataSet MultiDeviceGradient::PrepareForExecution(
-  const vtkm::cont::PartitionedDataSet& pds,
-  const vtkm::filter::PolicyBase<DerivedPolicy>&)
-{
-  //Step 1. Say that we have no more to submit for this PartitionedDataSet
-  //This is needed to happen for each execute as we want to support
-  //the same filter being used for multiple inputs
-  this->Queue.reset();
-
-  //Step 2. Construct the PartitionedDataSet we are going to fill. The size
-  //signature to PartitionedDataSet just reserves size
-  vtkm::cont::PartitionedDataSet output;
-  output.AppendPartitions(
-    std::vector<vtkm::cont::DataSet>(static_cast<size_t>(pds.GetNumberOfPartitions())));
-  vtkm::cont::PartitionedDataSet* outPtr = &output;
-
-
-  //Step 3. Construct the filter we want to run on each partition
-  vtkm::filter::vector_analysis::Gradient gradient;
-  gradient.SetComputePointGradient(this->GetComputePointGradient());
-  gradient.SetActiveField(this->GetActiveFieldName());
-
-  //Step 3b. Post 1 partition up as work and block until it is
-  //complete. This is needed as currently constructing the virtual
-  //Point Coordinates is not thread safe.
-  auto partition = pds.cbegin();
-  {
-    vtkm::cont::DataSet input = *partition;
-    this->Queue.push( //build a lambda that is the work to do
-      [=]() {
-        vtkm::filter::vector_analysis::Gradient perThreadGrad = gradient;
-
-        vtkm::cont::DataSet result = perThreadGrad.Execute(input);
-        outPtr->ReplacePartition(0, result);
-      });
-    this->Queue.waitForAllTasksToComplete();
-    partition++;
-  }
-
-  vtkm::Id index = 1;
-  for (; partition != pds.cend(); ++partition)
-  {
-    vtkm::cont::DataSet input = *partition;
-    //Step 4. For each input partition construct a lambda
-    //and add it to the queue for workers to take. This
-    //will allows us to have multiple works execute in a non
-    //blocking manner
-    this->Queue.push( //build a lambda that is the work to do
-      [=]() {
-        vtkm::filter::vector_analysis::Gradient perThreadGrad = gradient;
-
-        vtkm::cont::DataSet result = perThreadGrad.Execute(input);
-        outPtr->ReplacePartition(index, result);
-      });
-    index++;
-  }
-
-  // Step 5. Wait on all workers to finish
-  this->Queue.waitForAllTasksToComplete();
-
-  return output;
-}
--- a/examples/oscillator/CMakeLists.txt
+++ b/examples/oscillator/CMakeLists.txt
@ -15,6 +15,3 @@ find_package(VTKm REQUIRED QUIET)

 add_executable(Oscillator Oscillator.cxx)
 target_link_libraries(Oscillator PRIVATE vtkm_source)
-vtkm_add_target_information(Oscillator
-                            DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
-                            DEVICE_SOURCES Oscillator.cxx)
--- a/examples/oscillator/Oscillator.cxx
+++ b/examples/oscillator/Oscillator.cxx
@ -9,19 +9,13 @@
 //============================================================================
 #include <algorithm>
 #include <cctype>
-#include <fstream>
 #include <iostream>
-#include <sstream>

 #include <vtkm/Math.h>
 #include <vtkm/cont/ArrayHandle.h>
 #include <vtkm/cont/DataSetBuilderUniform.h>
 #include <vtkm/cont/Initialize.h>

-#include <vtkm/filter/FilterDataSet.h>
-
-#include <vtkm/cont/TryExecute.h>
-
 #include <vtkm/source/Oscillator.h>

 #if !defined(_WIN32) || defined(__CYGWIN__)
--- a/examples/particle_advection/CMakeLists.txt
+++ b/examples/particle_advection/CMakeLists.txt
@ -14,10 +14,4 @@ project(ParticleAdvection CXX)
 find_package(VTKm REQUIRED QUIET)

 add_executable(Particle_Advection ParticleAdvection.cxx)
-target_link_libraries(Particle_Advection PRIVATE vtkm_filter vtkm_io)
-vtkm_add_target_information(Particle_Advection
-                            DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
-                            DEVICE_SOURCES ParticleAdvection.cxx)
-if(TARGET vtkm::tbb)
-  target_compile_definitions(Particle_Advection PRIVATE BUILDING_TBB_VERSION)
-endif()
+target_link_libraries(Particle_Advection PRIVATE vtkm_filter_flow vtkm_io)
--- a/examples/particle_advection/ParticleAdvection.cxx
+++ b/examples/particle_advection/ParticleAdvection.cxx
@ -8,9 +8,10 @@
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================

+#include <vtkm/Particle.h>
 #include <vtkm/cont/DataSet.h>
 #include <vtkm/cont/Initialize.h>
-#include <vtkm/filter/Streamline.h>
+#include <vtkm/filter/flow/Streamline.h>
 #include <vtkm/io/VTKDataSetReader.h>
 #include <vtkm/io/VTKDataSetWriter.h>

@ -74,7 +75,7 @@ int main(int argc, char** argv)
  auto seedArray = vtkm::cont::make_ArrayHandle(seeds, vtkm::CopyFlag::Off);

  //compute streamlines
-  vtkm::filter::Streamline streamline;
+  vtkm::filter::flow::Streamline streamline;

  streamline.SetStepSize(stepSize);
  streamline.SetNumberOfSteps(numSteps);
--- a/examples/polyline_archimedean_helix/CMakeLists.txt
+++ b/examples/polyline_archimedean_helix/CMakeLists.txt
@ -13,8 +13,8 @@ project(PolyLineArchimedeanHelix CXX)
 find_package(VTKm REQUIRED QUIET)
 if (VTKm_ENABLE_RENDERING)
    add_executable(PolyLineArchimedeanHelix PolyLineArchimedeanHelix.cxx)
-    vtkm_add_target_information(PolyLineArchimedeanHelix
-                                DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
-                                DEVICE_SOURCES PolyLineArchimedeanHelix.cxx)
    target_link_libraries(PolyLineArchimedeanHelix PRIVATE vtkm_filter vtkm_rendering)
+    vtkm_add_target_information(PolyLineArchimedeanHelix
+      DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
+      DEVICE_SOURCES PolyLineArchimedeanHelix.cxx)
 endif()
--- a/examples/redistribute_points/CMakeLists.txt
+++ b/examples/redistribute_points/CMakeLists.txt
@ -12,7 +12,7 @@ project(RedistributePoints CXX)

 #Find the VTK-m package
 find_package(VTKm REQUIRED QUIET)
-add_executable(RedistributePoints RedistributePoints.cxx RedistributePoints.h)
+add_executable(RedistributePoints RedistributePoints.cxx RedistributePoints.h main.cxx)
 target_link_libraries(RedistributePoints PRIVATE vtkm_filter vtkm_io)
 vtkm_add_target_information(RedistributePoints
                            DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
--- a/examples/redistribute_points/RedistributePoints.cxx
+++ b/examples/redistribute_points/RedistributePoints.cxx
@ -8,53 +8,228 @@
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================

-#include <vtkm/cont/DataSet.h>
-#include <vtkm/cont/EnvironmentTracker.h>
-#include <vtkm/cont/Initialize.h>
+#include "RedistributePoints.h"

-#include <vtkm/io/VTKDataSetReader.h>
-#include <vtkm/io/VTKDataSetWriter.h>
+#include <vtkm/ImplicitFunction.h>
+#include <vtkm/cont/Algorithm.h>
+#include <vtkm/cont/AssignerPartitionedDataSet.h>
+#include <vtkm/cont/BoundsGlobalCompute.h>
+#include <vtkm/cont/EnvironmentTracker.h>
+#include <vtkm/cont/Serialization.h>
+#include <vtkm/filter/entity_extraction/ExtractPoints.h>

 #include <vtkm/thirdparty/diy/diy.h>

-#include "RedistributePoints.h"
-
-#include <sstream>
-using std::cout;
-using std::endl;
-
-int main(int argc, char* argv[])
+namespace example
 {
-  // Process vtk-m general args
-  auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
-  auto config = vtkm::cont::Initialize(argc, argv, opts);

-  vtkmdiy::mpi::environment env(argc, argv);
-  vtkmdiy::mpi::communicator comm;
-  vtkm::cont::EnvironmentTracker::SetCommunicator(comm);
+namespace internal
+{

-  if (argc != 3)
-  {
-    cout << "Usage: " << endl
-         << "$ " << argv[0] << " [options] <input-vtk-file> <output-file-prefix>" << endl;
-    cout << config.Usage << endl;
-    return EXIT_FAILURE;
-  }
-
-  vtkm::cont::DataSet input;
-  if (comm.rank() == 0)
-  {
-    vtkm::io::VTKDataSetReader reader(argv[1]);
-    input = reader.ReadDataSet();
-  }
-
-  example::RedistributePoints redistributor;
-  auto output = redistributor.Execute(input);
-
-  std::ostringstream str;
-  str << argv[2] << "-" << comm.rank() << ".vtk";
-
-  vtkm::io::VTKDataSetWriter writer(str.str());
-  writer.WriteDataSet(output);
-  return EXIT_SUCCESS;
+static vtkmdiy::ContinuousBounds convert(const vtkm::Bounds& bds)
+{
+  vtkmdiy::ContinuousBounds result(3);
+  result.min[0] = static_cast<float>(bds.X.Min);
+  result.min[1] = static_cast<float>(bds.Y.Min);
+  result.min[2] = static_cast<float>(bds.Z.Min);
+  result.max[0] = static_cast<float>(bds.X.Max);
+  result.max[1] = static_cast<float>(bds.Y.Max);
+  result.max[2] = static_cast<float>(bds.Z.Max);
+  return result;
 }
+
+
+template <typename FilterType>
+class Redistributor
+{
+  const vtkmdiy::RegularDecomposer<vtkmdiy::ContinuousBounds>& Decomposer;
+  const FilterType& Filter;
+
+  vtkm::cont::DataSet Extract(const vtkm::cont::DataSet& input,
+                              const vtkmdiy::ContinuousBounds& bds) const
+  {
+    // extract points
+    vtkm::Box box(bds.min[0], bds.max[0], bds.min[1], bds.max[1], bds.min[2], bds.max[2]);
+
+    vtkm::filter::entity_extraction::ExtractPoints extractor;
+    extractor.SetCompactPoints(true);
+    extractor.SetImplicitFunction(box);
+    return extractor.Execute(input);
+  }
+
+  class ConcatenateFields
+  {
+  public:
+    explicit ConcatenateFields(vtkm::Id totalSize)
+      : TotalSize(totalSize)
+      , CurrentIdx(0)
+    {
+    }
+
+    void Append(const vtkm::cont::Field& field)
+    {
+      VTKM_ASSERT(this->CurrentIdx + field.GetNumberOfValues() <= this->TotalSize);
+
+      if (this->Field.GetNumberOfValues() == 0)
+      {
+        // Copy metadata
+        this->Field = field;
+        // Reset array
+        this->Field.SetData(field.GetData().NewInstanceBasic());
+        // Preallocate array
+        this->Field.GetData().Allocate(this->TotalSize);
+      }
+      else
+      {
+        VTKM_ASSERT(this->Field.GetName() == field.GetName() &&
+                    this->Field.GetAssociation() == field.GetAssociation());
+      }
+
+      field.GetData().CastAndCallForTypes<VTKM_DEFAULT_TYPE_LIST, VTKM_DEFAULT_STORAGE_LIST>(
+        Appender{}, this->Field, this->CurrentIdx);
+      this->CurrentIdx += field.GetNumberOfValues();
+    }
+
+    const vtkm::cont::Field& GetResult() const { return this->Field; }
+
+  private:
+    struct Appender
+    {
+      template <typename T, typename S>
+      void operator()(const vtkm::cont::ArrayHandle<T, S>& data,
+                      vtkm::cont::Field& field,
+                      vtkm::Id currentIdx) const
+      {
+        vtkm::cont::ArrayHandle<T> farray =
+          field.GetData().template AsArrayHandle<vtkm::cont::ArrayHandle<T>>();
+        vtkm::cont::Algorithm::CopySubRange(data, 0, data.GetNumberOfValues(), farray, currentIdx);
+      }
+    };
+
+    vtkm::Id TotalSize;
+    vtkm::Id CurrentIdx;
+    vtkm::cont::Field Field;
+  };
+
+public:
+  Redistributor(const vtkmdiy::RegularDecomposer<vtkmdiy::ContinuousBounds>& decomposer,
+                const FilterType& filter)
+    : Decomposer(decomposer)
+    , Filter(filter)
+  {
+  }
+
+  void operator()(vtkm::cont::DataSet* block, const vtkmdiy::ReduceProxy& rp) const
+  {
+    if (rp.in_link().size() == 0)
+    {
+      if (block->GetNumberOfCoordinateSystems() > 0)
+      {
+        for (int cc = 0; cc < rp.out_link().size(); ++cc)
+        {
+          auto target = rp.out_link().target(cc);
+          // let's get the bounding box for the target block.
+          vtkmdiy::ContinuousBounds bds(3);
+          this->Decomposer.fill_bounds(bds, target.gid);
+
+          auto extractedDS = this->Extract(*block, bds);
+          // TODO: Need a better way to serialize DataSet. See issue #725.
+          rp.enqueue(target, vtkm::cont::SerializableDataSet<>(extractedDS));
+        }
+        // clear our dataset.
+        *block = vtkm::cont::DataSet();
+      }
+    }
+    else
+    {
+      vtkm::Id numValues = 0;
+      std::vector<vtkm::cont::DataSet> receives;
+      for (int cc = 0; cc < rp.in_link().size(); ++cc)
+      {
+        auto target = rp.in_link().target(cc);
+        if (rp.incoming(target.gid).size() > 0)
+        {
+          // TODO: Need a better way to serialize DataSet. See issue #725.
+          vtkm::cont::SerializableDataSet<> sds;
+          rp.dequeue(target.gid, sds);
+          receives.push_back(sds.DataSet);
+          numValues += receives.back().GetCoordinateSystem(0).GetNumberOfPoints();
+        }
+      }
+
+      *block = vtkm::cont::DataSet();
+      if (receives.size() == 1)
+      {
+        *block = receives[0];
+      }
+      else if (receives.size() > 1)
+      {
+        ConcatenateFields concatCoords(numValues);
+        for (const auto& ds : receives)
+        {
+          concatCoords.Append(ds.GetCoordinateSystem(0));
+        }
+        block->AddCoordinateSystem(vtkm::cont::CoordinateSystem(
+          concatCoords.GetResult().GetName(), concatCoords.GetResult().GetData()));
+
+        for (vtkm::IdComponent i = 0; i < receives[0].GetNumberOfFields(); ++i)
+        {
+          ConcatenateFields concatField(numValues);
+          for (const auto& ds : receives)
+          {
+            concatField.Append(ds.GetField(i));
+          }
+          block->AddField(concatField.GetResult());
+        }
+      }
+    }
+  }
+};
+
+} // namespace example::internal
+
+
+VTKM_CONT vtkm::cont::PartitionedDataSet RedistributePoints::DoExecutePartitions(
+  const vtkm::cont::PartitionedDataSet& input)
+{
+  auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
+
+  // let's first get the global bounds of the domain
+  vtkm::Bounds gbounds = vtkm::cont::BoundsGlobalCompute(input);
+
+  vtkm::cont::AssignerPartitionedDataSet assigner(input.GetNumberOfPartitions());
+  vtkmdiy::RegularDecomposer<vtkmdiy::ContinuousBounds> decomposer(
+    /*dim*/ 3, internal::convert(gbounds), assigner.nblocks());
+
+  vtkmdiy::Master master(
+    comm,
+    /*threads*/ 1,
+    /*limit*/ -1,
+    []() -> void* { return new vtkm::cont::DataSet(); },
+    [](void* ptr) { delete static_cast<vtkm::cont::DataSet*>(ptr); });
+  decomposer.decompose(comm.rank(), assigner, master);
+
+  assert(static_cast<vtkm::Id>(master.size()) == input.GetNumberOfPartitions());
+  // let's populate local blocks
+  master.foreach ([&input](vtkm::cont::DataSet* ds, const vtkmdiy::Master::ProxyWithLink& proxy) {
+    auto lid = proxy.master()->lid(proxy.gid());
+    *ds = input.GetPartition(lid);
+  });
+
+  internal::Redistributor<RedistributePoints> redistributor(decomposer, *this);
+  vtkmdiy::all_to_all(master, assigner, redistributor, /*k=*/2);
+
+  vtkm::cont::PartitionedDataSet result;
+  master.foreach ([&result](vtkm::cont::DataSet* ds, const vtkmdiy::Master::ProxyWithLink&) {
+    result.AppendPartition(*ds);
+  });
+
+  return result;
+}
+
+vtkm::cont::DataSet RedistributePoints::DoExecute(const vtkm::cont::DataSet&)
+{
+  throw vtkm::cont::ErrorBadType("RedistributePoints requires PartitionedDataSet.");
+}
+
+} // namespace example
--- a/examples/redistribute_points/RedistributePoints.h
+++ b/examples/redistribute_points/RedistributePoints.h
@ -7,238 +7,28 @@
 //  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================
+#ifndef example_RedistributePoints_h
+#define example_RedistributePoints_h

-#include <vtkm/ImplicitFunction.h>
-#include <vtkm/cont/Algorithm.h>
-#include <vtkm/cont/AssignerPartitionedDataSet.h>
-#include <vtkm/cont/BoundsGlobalCompute.h>
-#include <vtkm/cont/EnvironmentTracker.h>
-#include <vtkm/cont/Serialization.h>
-#include <vtkm/filter/Filter.h>
-#include <vtkm/filter/entity_extraction/ExtractPoints.h>
-
-#include <vtkm/thirdparty/diy/diy.h>
+#include <vtkm/filter/NewFilter.h>

 namespace example
 {

-namespace internal
-{
-
-static vtkmdiy::ContinuousBounds convert(const vtkm::Bounds& bds)
-{
-  vtkmdiy::ContinuousBounds result(3);
-  result.min[0] = static_cast<float>(bds.X.Min);
-  result.min[1] = static_cast<float>(bds.Y.Min);
-  result.min[2] = static_cast<float>(bds.Z.Min);
-  result.max[0] = static_cast<float>(bds.X.Max);
-  result.max[1] = static_cast<float>(bds.Y.Max);
-  result.max[2] = static_cast<float>(bds.Z.Max);
-  return result;
-}
-
-
-template <typename FilterType>
-class Redistributor
-{
-  const vtkmdiy::RegularDecomposer<vtkmdiy::ContinuousBounds>& Decomposer;
-  const FilterType& Filter;
-
-  vtkm::cont::DataSet Extract(const vtkm::cont::DataSet& input,
-                              const vtkmdiy::ContinuousBounds& bds) const
-  {
-    // extract points
-    vtkm::Box box(bds.min[0], bds.max[0], bds.min[1], bds.max[1], bds.min[2], bds.max[2]);
-
-    vtkm::filter::entity_extraction::ExtractPoints extractor;
-    extractor.SetCompactPoints(true);
-    extractor.SetImplicitFunction(box);
-    return extractor.Execute(input);
-  }
-
-  class ConcatenateFields
-  {
-  public:
-    explicit ConcatenateFields(vtkm::Id totalSize)
-      : TotalSize(totalSize)
-      , CurrentIdx(0)
-    {
-    }
-
-    void Append(const vtkm::cont::Field& field)
-    {
-      VTKM_ASSERT(this->CurrentIdx + field.GetNumberOfValues() <= this->TotalSize);
-
-      if (this->Field.GetNumberOfValues() == 0)
-      {
-        // Copy metadata
-        this->Field = field;
-        // Reset array
-        this->Field.SetData(field.GetData().NewInstanceBasic());
-        // Preallocate array
-        this->Field.GetData().Allocate(this->TotalSize);
-      }
-      else
-      {
-        VTKM_ASSERT(this->Field.GetName() == field.GetName() &&
-                    this->Field.GetAssociation() == field.GetAssociation());
-      }
-
-      field.GetData().CastAndCallForTypes<VTKM_DEFAULT_TYPE_LIST, VTKM_DEFAULT_STORAGE_LIST>(
-        Appender{}, this->Field, this->CurrentIdx);
-      this->CurrentIdx += field.GetNumberOfValues();
-    }
-
-    const vtkm::cont::Field& GetResult() const { return this->Field; }
-
-  private:
-    struct Appender
-    {
-      template <typename T, typename S>
-      void operator()(const vtkm::cont::ArrayHandle<T, S>& data,
-                      vtkm::cont::Field& field,
-                      vtkm::Id currentIdx) const
-      {
-        vtkm::cont::ArrayHandle<T> farray =
-          field.GetData().template AsArrayHandle<vtkm::cont::ArrayHandle<T>>();
-        vtkm::cont::Algorithm::CopySubRange(data, 0, data.GetNumberOfValues(), farray, currentIdx);
-      }
-    };
-
-    vtkm::Id TotalSize;
-    vtkm::Id CurrentIdx;
-    vtkm::cont::Field Field;
-  };
-
-public:
-  Redistributor(const vtkmdiy::RegularDecomposer<vtkmdiy::ContinuousBounds>& decomposer,
-                const FilterType& filter)
-    : Decomposer(decomposer)
-    , Filter(filter)
-  {
-  }
-
-  void operator()(vtkm::cont::DataSet* block, const vtkmdiy::ReduceProxy& rp) const
-  {
-    if (rp.in_link().size() == 0)
-    {
-      if (block->GetNumberOfCoordinateSystems() > 0)
-      {
-        for (int cc = 0; cc < rp.out_link().size(); ++cc)
-        {
-          auto target = rp.out_link().target(cc);
-          // let's get the bounding box for the target block.
-          vtkmdiy::ContinuousBounds bds(3);
-          this->Decomposer.fill_bounds(bds, target.gid);
-
-          auto extractedDS = this->Extract(*block, bds);
-          rp.enqueue(target, vtkm::filter::MakeSerializableDataSet(extractedDS, this->Filter));
-        }
-        // clear our dataset.
-        *block = vtkm::cont::DataSet();
-      }
-    }
-    else
-    {
-      vtkm::Id numValues = 0;
-      std::vector<vtkm::cont::DataSet> receives;
-      for (int cc = 0; cc < rp.in_link().size(); ++cc)
-      {
-        auto target = rp.in_link().target(cc);
-        if (rp.incoming(target.gid).size() > 0)
-        {
-          auto sds = vtkm::filter::MakeSerializableDataSet(this->Filter);
-          rp.dequeue(target.gid, sds);
-          receives.push_back(sds.DataSet);
-          numValues += receives.back().GetCoordinateSystem(0).GetNumberOfPoints();
-        }
-      }
-
-      *block = vtkm::cont::DataSet();
-      if (receives.size() == 1)
-      {
-        *block = receives[0];
-      }
-      else if (receives.size() > 1)
-      {
-        ConcatenateFields concatCoords(numValues);
-        for (const auto& ds : receives)
-        {
-          concatCoords.Append(ds.GetCoordinateSystem(0));
-        }
-        block->AddCoordinateSystem(vtkm::cont::CoordinateSystem(
-          concatCoords.GetResult().GetName(), concatCoords.GetResult().GetData()));
-
-        for (vtkm::IdComponent i = 0; i < receives[0].GetNumberOfFields(); ++i)
-        {
-          ConcatenateFields concatField(numValues);
-          for (const auto& ds : receives)
-          {
-            concatField.Append(ds.GetField(i));
-          }
-          block->AddField(concatField.GetResult());
-        }
-      }
-    }
-  }
-};
-
-} // namespace example::internal
-
-
-class RedistributePoints : public vtkm::filter::Filter<RedistributePoints>
+class RedistributePoints : public vtkm::filter::NewFilter
 {
 public:
-  VTKM_CONT
-  RedistributePoints() {}
+  VTKM_CONT RedistributePoints() {}

-  VTKM_CONT
-  ~RedistributePoints() {}
+  VTKM_CONT ~RedistributePoints() {}

-  template <typename DerivedPolicy>
-  VTKM_CONT vtkm::cont::PartitionedDataSet PrepareForExecution(
-    const vtkm::cont::PartitionedDataSet& input,
-    const vtkm::filter::PolicyBase<DerivedPolicy>& policy);
+protected:
+  VTKM_CONT vtkm::cont::PartitionedDataSet DoExecutePartitions(
+    const vtkm::cont::PartitionedDataSet& input) override;
+
+  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& input) override;
 };

-template <typename DerivedPolicy>
-inline VTKM_CONT vtkm::cont::PartitionedDataSet RedistributePoints::PrepareForExecution(
-  const vtkm::cont::PartitionedDataSet& input,
-  const vtkm::filter::PolicyBase<DerivedPolicy>&)
-{
-  auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
-
-  // let's first get the global bounds of the domain
-  vtkm::Bounds gbounds = vtkm::cont::BoundsGlobalCompute(input);
-
-  vtkm::cont::AssignerPartitionedDataSet assigner(input.GetNumberOfPartitions());
-  vtkmdiy::RegularDecomposer<vtkmdiy::ContinuousBounds> decomposer(
-    /*dim*/ 3, internal::convert(gbounds), assigner.nblocks());
-
-  vtkmdiy::Master master(
-    comm,
-    /*threads*/ 1,
-    /*limit*/ -1,
-    []() -> void* { return new vtkm::cont::DataSet(); },
-    [](void* ptr) { delete static_cast<vtkm::cont::DataSet*>(ptr); });
-  decomposer.decompose(comm.rank(), assigner, master);
-
-  assert(static_cast<vtkm::Id>(master.size()) == input.GetNumberOfPartitions());
-  // let's populate local blocks
-  master.foreach ([&input](vtkm::cont::DataSet* ds, const vtkmdiy::Master::ProxyWithLink& proxy) {
-    auto lid = proxy.master()->lid(proxy.gid());
-    *ds = input.GetPartition(lid);
-  });
-
-  internal::Redistributor<RedistributePoints> redistributor(decomposer, *this);
-  vtkmdiy::all_to_all(master, assigner, redistributor, /*k=*/2);
-
-  vtkm::cont::PartitionedDataSet result;
-  master.foreach ([&result](vtkm::cont::DataSet* ds, const vtkmdiy::Master::ProxyWithLink&) {
-    result.AppendPartition(*ds);
-  });
-
-  return result;
-}
-
 } // namespace example
+
+#endif //example_RedistributePoints_h
--- a/examples/redistribute_points/main.cxx
+++ b/examples/redistribute_points/main.cxx
@ -0,0 +1,60 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+
+#include <vtkm/cont/DataSet.h>
+#include <vtkm/cont/EnvironmentTracker.h>
+#include <vtkm/cont/Initialize.h>
+
+#include <vtkm/io/VTKDataSetReader.h>
+#include <vtkm/io/VTKDataSetWriter.h>
+
+#include <vtkm/thirdparty/diy/diy.h>
+
+#include "RedistributePoints.h"
+
+#include <sstream>
+using std::cout;
+using std::endl;
+
+int main(int argc, char* argv[])
+{
+  // Process vtk-m general args
+  auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
+  auto config = vtkm::cont::Initialize(argc, argv, opts);
+
+  vtkmdiy::mpi::environment env(argc, argv);
+  vtkmdiy::mpi::communicator comm;
+  vtkm::cont::EnvironmentTracker::SetCommunicator(comm);
+
+  if (argc != 3)
+  {
+    cout << "Usage: " << endl
+         << "$ " << argv[0] << " [options] <input-vtk-file> <output-file-prefix>" << endl;
+    cout << config.Usage << endl;
+    return EXIT_FAILURE;
+  }
+
+  vtkm::cont::DataSet input;
+  if (comm.rank() == 0)
+  {
+    vtkm::io::VTKDataSetReader reader(argv[1]);
+    input = reader.ReadDataSet();
+  }
+
+  example::RedistributePoints redistributor;
+  auto output = redistributor.Execute(input);
+
+  std::ostringstream str;
+  str << argv[2] << "-" << comm.rank() << ".vtk";
+
+  vtkm::io::VTKDataSetWriter writer(str.str());
+  writer.WriteDataSet(output);
+  return EXIT_SUCCESS;
+}
--- a/examples/streamline_mpi/CMakeLists.txt
+++ b/examples/streamline_mpi/CMakeLists.txt
@ -16,12 +16,5 @@ find_package(VTKm REQUIRED QUIET)
 if (VTKm_ENABLE_MPI)
  add_executable(StreamlineMPI StreamlineMPI.cxx)
  target_compile_definitions(StreamlineMPI PRIVATE "MPI_ENABLED")
-  target_link_libraries(StreamlineMPI PRIVATE vtkm_filter vtkm_io MPI::MPI_CXX)
-  vtkm_add_target_information(StreamlineMPI
-                              DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
-                              DEVICE_SOURCES StreamlineMPI.cxx)
+  target_link_libraries(StreamlineMPI PRIVATE vtkm_filter_flow vtkm_io MPI::MPI_CXX)
 endif()
-
-#if(TARGET vtkm::tbb)
-#  target_compile_definitions(streamline_mpi PRIVATE BUILDING_TBB_VERSION)
-#endif()
--- a/examples/streamline_mpi/StreamlineMPI.cxx
+++ b/examples/streamline_mpi/StreamlineMPI.cxx
@ -8,13 +8,14 @@
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================

+#include <vtkm/Particle.h>
 #include <vtkm/cont/AssignerPartitionedDataSet.h>
 #include <vtkm/cont/DataSet.h>
 #include <vtkm/cont/EnvironmentTracker.h>
 #include <vtkm/cont/Field.h>
 #include <vtkm/cont/Initialize.h>
 #include <vtkm/cont/PartitionedDataSet.h>
-#include <vtkm/filter/Streamline.h>
+#include <vtkm/filter/flow/ParticleAdvection.h>
 #include <vtkm/io/VTKDataSetReader.h>
 #include <vtkm/io/VTKDataSetWriter.h>
 #include <vtkm/io/reader/VTKDataSetReader.h>
@ -23,12 +24,6 @@
 #include <vtkm/thirdparty/diy/diy.h>
 #include <vtkm/thirdparty/diy/mpi-cast.h>

-
-#include <vtkm/filter/ParticleAdvection.h>
-#include <vtkm/filter/particleadvection/BoundsMap.h>
-#include <vtkm/filter/particleadvection/ParticleMessenger.h>
-
-
 void LoadData(std::string& fname, std::vector<vtkm::cont::DataSet>& dataSets, int rank, int nRanks)
 {
  std::string buff;
@ -99,7 +94,7 @@ int main(int argc, char** argv)
  std::vector<vtkm::cont::DataSet> dataSets;
  LoadData(dataFile, dataSets, rank, size);

-  vtkm::filter::ParticleAdvection pa;
+  vtkm::filter::flow::ParticleAdvection pa;

  vtkm::cont::ArrayHandle<vtkm::Particle> seedArray;
  seedArray = vtkm::cont::make_ArrayHandle({ vtkm::Particle(vtkm::Vec3f(.1f, .1f, .9f), 0),
--- a/examples/temporal_advection/TemporalAdvection.cxx
+++ b/examples/temporal_advection/TemporalAdvection.cxx
@ -12,11 +12,14 @@
 #include <string>
 #include <vector>

+#include <vtkm/Particle.h>
+//#include <vtkm/cont/Algorithm.h>
+#include <vtkm/cont/ArrayCopy.h>
 #include <vtkm/cont/DataSet.h>
 #include <vtkm/cont/Initialize.h>
-#include <vtkm/cont/Timer.h>
+//#include <vtkm/cont/Timer.h>

-#include <vtkm/filter/Pathline.h>
+#include <vtkm/filter/flow/Pathline.h>

 #include <vtkm/io/VTKDataSetReader.h>
 #include <vtkm/io/VTKDataSetWriter.h>
@ -91,7 +94,7 @@ int main(int argc, char** argv)

  // Instantiate the filter by providing necessary parameters.
  // Necessary parameters are :
-  vtkm::filter::Pathline pathlineFilter;
+  vtkm::filter::flow::Pathline pathlineFilter;
  pathlineFilter.SetActiveField(fieldName);
  // 1. The current and next time slice. The current time slice is passed
  //    through the parameter to the Execute method.
--- a/examples/tetrahedra/CMakeLists.txt
+++ b/examples/tetrahedra/CMakeLists.txt
@ -18,9 +18,3 @@ target_link_libraries(Tetrahedralize PRIVATE vtkm_filter vtkm_io)

 add_executable(Triangulate Triangulate.cxx)
 target_link_libraries(Triangulate PRIVATE vtkm_filter vtkm_io)
-
-vtkm_add_target_information(Tetrahedralize Triangulate
-                            DROP_UNUSED_SYMBOLS
-                            MODIFY_CUDA_FLAGS
-                            DEVICE_SOURCES
-                              Tetrahedralize.cxx Triangulate.cxx)
--- a/tutorial/CMakeLists.txt
+++ b/tutorial/CMakeLists.txt
@ -36,11 +36,16 @@ target_link_libraries(two_filters vtkm_filter vtkm_io)

 add_executable(mag_grad mag_grad.cxx)
 target_link_libraries(mag_grad vtkm_filter vtkm_io)
+# Because mag_grad.cxx creates a worklet with code that
+# runs on a GPU, it needs additional information.
+vtkm_add_target_information(mag_grad
+  DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
+  DEVICE_SOURCES mag_grad.cxx)

 if (VTKm_ENABLE_RENDERING)
  add_executable(rendering rendering.cxx)
  target_link_libraries(rendering vtkm_filter vtkm_io vtkm_rendering)
-endif()
+endif ()

 add_executable(error_handling error_handling.cxx)
 target_link_libraries(error_handling vtkm_filter vtkm_io)
@ -50,46 +55,15 @@ target_link_libraries(logging vtkm_filter vtkm_io)

 add_executable(point_to_cell point_to_cell.cxx)
 target_link_libraries(point_to_cell vtkm_cont vtkm_filter vtkm_io)
+vtkm_add_target_information(point_to_cell
+  DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
+  DEVICE_SOURCES point_to_cell.cxx)

 add_executable(extract_edges extract_edges.cxx)
 target_link_libraries(extract_edges vtkm_cont vtkm_filter vtkm_io)
-
-set(tutorial_targets
-io
-contour
-contour_two_fields
-two_filters
-mag_grad
-error_handling
-logging
-point_to_cell
-extract_edges
-)
-
-set(tutorial_sources
-io.cxx
-contour.cxx
-contour_two_fields.cxx
-two_filters.cxx
-mag_grad.cxx
-error_handling.cxx
-logging.cxx
-point_to_cell.cxx
-extract_edges.cxx
-)
-
-if (VTKm_ENABLE_RENDERING)
-  list(APPEND tutorial_sources rendering.cxx)
-  list(APPEND tutorial_targets rendering)
-endif()
-
-
-vtkm_add_target_information(${tutorial_targets}
-                            DROP_UNUSED_SYMBOLS
-                            MODIFY_CUDA_FLAGS
-                            DEVICE_SOURCES
-                             ${tutorial_sources})
-
+vtkm_add_target_information(extract_edges
+  DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
+  DEVICE_SOURCES extract_edges.cxx)

 # Copy the data file to be adjacent to the binaries
 file(GENERATE OUTPUT "$<TARGET_FILE_DIR:mag_grad>/data/kitchen.vtk" INPUT "${CMAKE_CURRENT_SOURCE_DIR}/data/kitchen.vtk")
--- a/tutorial/extract_edges.cxx
+++ b/tutorial/extract_edges.cxx
@ -20,7 +20,9 @@
 #include <vtkm/io/VTKDataSetReader.h>
 #include <vtkm/io/VTKDataSetWriter.h>

-#include <vtkm/filter/FilterDataSet.h>
+#include <vtkm/filter/MapFieldMergeAverage.h>
+#include <vtkm/filter/MapFieldPermutation.h>
+#include <vtkm/filter/NewFilter.h>
 #include <vtkm/filter/contour/Contour.h>
 #include <vtkm/worklet/WorkletMapTopology.h>

@ -117,95 +119,23 @@ struct EdgeIndicesWorklet : vtkm::worklet::WorkletReduceByKey
 namespace
 {

-class ExtractEdges : public vtkm::filter::FilterDataSet<ExtractEdges>
-{
-public:
-  template <typename Policy>
-  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inData,
-                                          vtkm::filter::PolicyBase<Policy> policy);
-
-  template <typename T, typename StorageType, typename Policy>
-  VTKM_CONT bool DoMapField(vtkm::cont::DataSet& result,
-                            const vtkm::cont::ArrayHandle<T, StorageType>& input,
-                            const vtkm::filter::FieldMetadata& fieldMeta,
-                            const vtkm::filter::PolicyBase<Policy>& policy);
-
-private:
-  vtkm::worklet::ScatterCounting::OutputToInputMapType OutputToInputCellMap;
-  vtkm::worklet::Keys<vtkm::Id2> CellToEdgeKeys;
-};
-
-template <typename Policy>
-inline VTKM_CONT vtkm::cont::DataSet ExtractEdges::DoExecute(
-  const vtkm::cont::DataSet& inData,
-  vtkm::filter::PolicyBase<Policy> policy)
-{
-  auto inCellSet = vtkm::filter::ApplyPolicyCellSet(inData.GetCellSet(), policy, *this);
-
-  // First, count the edges in each cell.
-  vtkm::cont::ArrayHandle<vtkm::IdComponent> edgeCounts;
-  this->Invoke(CountEdgesWorklet{}, inCellSet, edgeCounts);
-
-  // Second, using these counts build a scatter that repeats a cell's visit
-  // for each edge in the cell.
-  vtkm::worklet::ScatterCounting scatter(edgeCounts);
-  this->OutputToInputCellMap = scatter.GetOutputToInputMap(inCellSet.GetNumberOfCells());
-  vtkm::worklet::ScatterCounting::VisitArrayType outputToInputEdgeMap =
-    scatter.GetVisitArray(inCellSet.GetNumberOfCells());
-
-  // Third, for each edge, extract a canonical id.
-  vtkm::cont::ArrayHandle<vtkm::Id2> canonicalIds;
-  this->Invoke(EdgeIdsWorklet{}, scatter, inCellSet, canonicalIds);
-
-  // Fourth, construct a Keys object to combine all like edge ids.
-  this->CellToEdgeKeys = vtkm::worklet::Keys<vtkm::Id2>(canonicalIds);
-
-  // Fifth, use a reduce-by-key to extract indices for each unique edge.
-  vtkm::cont::ArrayHandle<vtkm::Id> connectivityArray;
-  this->Invoke(EdgeIndicesWorklet{},
-               this->CellToEdgeKeys,
-               inCellSet,
-               this->OutputToInputCellMap,
-               outputToInputEdgeMap,
-               vtkm::cont::make_ArrayHandleGroupVec<2>(connectivityArray));
-
-  // Sixth, use the created connectivity array to build a cell set.
-  vtkm::cont::CellSetSingleType<> outCellSet;
-  outCellSet.Fill(inCellSet.GetNumberOfPoints(), vtkm::CELL_SHAPE_LINE, 2, connectivityArray);
-
-  vtkm::cont::DataSet outData;
-
-  outData.SetCellSet(outCellSet);
-
-  for (vtkm::IdComponent coordSystemIndex = 0;
-       coordSystemIndex < inData.GetNumberOfCoordinateSystems();
-       ++coordSystemIndex)
-  {
-    outData.AddCoordinateSystem(inData.GetCoordinateSystem(coordSystemIndex));
-  }
-
-  return outData;
-}
-
-template <typename T, typename StorageType, typename Policy>
-inline VTKM_CONT bool ExtractEdges::DoMapField(
+VTKM_CONT bool DoMapField(
  vtkm::cont::DataSet& result,
-  const vtkm::cont::ArrayHandle<T, StorageType>& inputArray,
-  const vtkm::filter::FieldMetadata& fieldMeta,
-  const vtkm::filter::PolicyBase<Policy>&)
+  const vtkm::cont::Field& inputField,
+  const vtkm::worklet::ScatterCounting::OutputToInputMapType& OutputToInputCellMap,
+  const vtkm::worklet::Keys<vtkm::Id2>& CellToEdgeKeys)
 {
  vtkm::cont::Field outputField;

-  if (fieldMeta.IsPointField())
+  if (inputField.IsPointField())
  {
-    outputField = fieldMeta.AsField(inputArray); // pass through
+    outputField = inputField; // pass through
  }
-  else if (fieldMeta.IsCellField())
+  else if (inputField.IsCellField())
  {
-    auto outputCellArray = vtkm::worklet::AverageByKey::Run(
-      this->CellToEdgeKeys,
-      vtkm::cont::make_ArrayHandlePermutation(this->OutputToInputCellMap, inputArray));
-    outputField = fieldMeta.AsField(outputCellArray);
+    vtkm::cont::Field permuted;
+    vtkm::filter::MapFieldPermutation(inputField, OutputToInputCellMap, permuted);
+    vtkm::filter::MapFieldMergeAverage(permuted, CellToEdgeKeys, outputField);
  }
  else
  {
@ -217,6 +147,55 @@ inline VTKM_CONT bool ExtractEdges::DoMapField(
  return true;
 }

+class ExtractEdges : public vtkm::filter::NewFilter
+{
+public:
+  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inData) override;
+};
+
+VTKM_CONT vtkm::cont::DataSet ExtractEdges::DoExecute(const vtkm::cont::DataSet& inData)
+{
+  auto inCellSet = inData.GetCellSet();
+
+  // First, count the edges in each cell.
+  vtkm::cont::ArrayHandle<vtkm::IdComponent> edgeCounts;
+  this->Invoke(CountEdgesWorklet{}, inCellSet, edgeCounts);
+
+  // Second, using these counts build a scatter that repeats a cell's visit
+  // for each edge in the cell.
+  vtkm::worklet::ScatterCounting scatter(edgeCounts);
+  vtkm::worklet::ScatterCounting::OutputToInputMapType OutputToInputCellMap;
+  OutputToInputCellMap = scatter.GetOutputToInputMap(inCellSet.GetNumberOfCells());
+  vtkm::worklet::ScatterCounting::VisitArrayType outputToInputEdgeMap =
+    scatter.GetVisitArray(inCellSet.GetNumberOfCells());
+
+  // Third, for each edge, extract a canonical id.
+  vtkm::cont::ArrayHandle<vtkm::Id2> canonicalIds;
+  this->Invoke(EdgeIdsWorklet{}, scatter, inCellSet, canonicalIds);
+
+  // Fourth, construct a Keys object to combine all like edge ids.
+  vtkm::worklet::Keys<vtkm::Id2> CellToEdgeKeys;
+  CellToEdgeKeys = vtkm::worklet::Keys<vtkm::Id2>(canonicalIds);
+
+  // Fifth, use a reduce-by-key to extract indices for each unique edge.
+  vtkm::cont::ArrayHandle<vtkm::Id> connectivityArray;
+  this->Invoke(EdgeIndicesWorklet{},
+               CellToEdgeKeys,
+               inCellSet,
+               OutputToInputCellMap,
+               outputToInputEdgeMap,
+               vtkm::cont::make_ArrayHandleGroupVec<2>(connectivityArray));
+
+  // Sixth, use the created connectivity array to build a cell set.
+  vtkm::cont::CellSetSingleType<> outCellSet;
+  outCellSet.Fill(inCellSet.GetNumberOfPoints(), vtkm::CELL_SHAPE_LINE, 2, connectivityArray);
+
+  auto mapper = [&](auto& outDataSet, const auto& f) {
+    DoMapField(outDataSet, f, OutputToInputCellMap, CellToEdgeKeys);
+  };
+  return this->CreateResult(inData, outCellSet, inData.GetCoordinateSystems(), mapper);
+}
+
 }

 int main(int argc, char** argv)
--- a/tutorial/mag_grad.cxx
+++ b/tutorial/mag_grad.cxx
@ -28,29 +28,30 @@ struct ComputeMagnitude : vtkm::worklet::WorkletMapField
  }
 };

-#include <vtkm/filter/FilterField.h>
+#include <vtkm/filter/NewFilterField.h>

-class FieldMagnitude : public vtkm::filter::FilterField<FieldMagnitude>
+class FieldMagnitude : public vtkm::filter::NewFilterField
 {
 public:
  using SupportedTypes = vtkm::List<vtkm::Vec3f>;

-  template <typename ArrayHandleType, typename Policy>
-  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inDataSet,
-                                          const ArrayHandleType& inField,
-                                          const vtkm::filter::FieldMetadata& fieldMetadata,
-                                          vtkm::filter::PolicyBase<Policy>)
+  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inDataSet) override
  {
+    const auto& inField = this->GetFieldFromDataSet(inDataSet);
    vtkm::cont::ArrayHandle<vtkm::FloatDefault> outField;
-    this->Invoke(ComputeMagnitude{}, inField, outField);
+
+    auto resolveType = [&](const auto& concrete) {
+      this->Invoke(ComputeMagnitude{}, concrete, outField);
+    };
+    this->CastAndCallVecField<3>(inField, resolveType);

    std::string outFieldName = this->GetOutputFieldName();
    if (outFieldName == "")
    {
-      outFieldName = fieldMetadata.GetName() + "_magnitude";
+      outFieldName = inField.GetName() + "_magnitude";
    }

-    return vtkm::filter::CreateResult(inDataSet, outField, outFieldName, fieldMetadata);
+    return this->CreateResultFieldCell(inDataSet, outFieldName, outField);
  }
 };

--- a/tutorial/point_to_cell.cxx
+++ b/tutorial/point_to_cell.cxx
@ -46,46 +46,39 @@ struct ConvertPointFieldToCells : vtkm::worklet::WorkletVisitCellsWithPoints
 } // namespace worklet
 } // namespace vtkm

-#include <vtkm/filter/FilterField.h>
+#include <vtkm/filter/NewFilterField.h>

 namespace vtkm
 {
 namespace filter
 {

-struct ConvertPointFieldToCells : vtkm::filter::FilterField<ConvertPointFieldToCells>
+struct ConvertPointFieldToCells : vtkm::filter::NewFilterField
 {
-  template <typename ArrayHandleType, typename Policy>
-  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inDataSet,
-                                          const ArrayHandleType& inField,
-                                          const vtkm::filter::FieldMetadata& fieldMetadata,
-                                          vtkm::filter::PolicyBase<Policy>);
+  VTKM_CONT vtkm::cont::DataSet DoExecute(const vtkm::cont::DataSet& inDataSet) override;
 };

-template <typename ArrayHandleType, typename Policy>
-VTKM_CONT cont::DataSet ConvertPointFieldToCells::DoExecute(
-  const vtkm::cont::DataSet& inDataSet,
-  const ArrayHandleType& inField,
-  const vtkm::filter::FieldMetadata& fieldMetadata,
-  vtkm::filter::PolicyBase<Policy> policy)
+VTKM_CONT cont::DataSet ConvertPointFieldToCells::DoExecute(const vtkm::cont::DataSet& inDataSet)
 {
-  VTKM_IS_ARRAY_HANDLE(ArrayHandleType);
+  const auto& inField = this->GetFieldFromDataSet(inDataSet);

-  using ValueType = typename ArrayHandleType::ValueType;
+  vtkm::cont::UnknownArrayHandle outArray;
+  auto resolveType = [&](const auto& concrete) {
+    using ValueType = typename std::decay_t<decltype(concrete)>::ValueType;

-  vtkm::cont::ArrayHandle<ValueType> outField;
-  this->Invoke(vtkm::worklet::ConvertPointFieldToCells{},
-               vtkm::filter::ApplyPolicyCellSet(inDataSet.GetCellSet(), policy, *this),
-               inField,
-               outField);
+    vtkm::cont::ArrayHandle<ValueType> outField;
+    this->Invoke(
+      vtkm::worklet::ConvertPointFieldToCells{}, inDataSet.GetCellSet(), concrete, outField);
+    outArray = outField;
+  };
+  this->CastAndCallScalarField(inField, resolveType);

  std::string outFieldName = this->GetOutputFieldName();
  if (outFieldName == "")
  {
-    outFieldName = fieldMetadata.GetName();
+    outFieldName = inField.GetName();
  }
-
-  return vtkm::filter::CreateResultFieldCell(inDataSet, outField, outFieldName);
+  return this->CreateResultFieldCell(inDataSet, outFieldName, outArray);
 }

 } // namespace filter
--- a/tutorial/rendering.cxx
+++ b/tutorial/rendering.cxx
@ -12,7 +12,6 @@
 #include <vtkm/cont/Initialize.h>

 #include <vtkm/io/VTKDataSetReader.h>
-#include <vtkm/io/VTKDataSetWriter.h>
 #include <vtkm/rendering/Actor.h>
 #include <vtkm/rendering/CanvasRayTracer.h>
 #include <vtkm/rendering/MapperRayTracer.h>
--- a/version.txt
+++ b/version.txt
@ -1 +1 @@
-1.8.0
+1.8.9999
--- a/vtkm/List.h
+++ b/vtkm/List.h
@ -14,6 +14,8 @@

 #include <vtkm/internal/Meta.h>

+#include <functional>
+
 namespace vtkm
 {

--- a/vtkm/Particle.h
+++ b/vtkm/Particle.h
@ -55,6 +55,16 @@ public:
  VTKM_EXEC_CONT void ClearInGhostCell() { this->reset(this->IN_GHOST_CELL_BIT); }
  VTKM_EXEC_CONT bool CheckInGhostCell() const { return this->test(this->IN_GHOST_CELL_BIT); }

+  VTKM_EXEC_CONT void SetZeroVelocity() { this->set(this->ZERO_VELOCITY); }
+  VTKM_EXEC_CONT void ClearZeroVelocity() { this->reset(this->ZERO_VELOCITY); }
+  VTKM_EXEC_CONT bool CheckZeroVelocity() const { return this->test(this->ZERO_VELOCITY); }
+
+  VTKM_EXEC_CONT bool CanContinue() const
+  {
+    return this->CheckOk() && !this->CheckTerminate() && !this->CheckSpatialBounds() &&
+      !this->CheckTemporalBounds() && !this->CheckInGhostCell() && !this->CheckZeroVelocity();
+  }
+
 private:
  static constexpr vtkm::Id SUCCESS_BIT = 0;
  static constexpr vtkm::Id TERMINATE_BIT = 1;
@ -62,6 +72,7 @@ private:
  static constexpr vtkm::Id TEMPORAL_BOUNDS_BIT = 3;
  static constexpr vtkm::Id TOOK_ANY_STEPS_BIT = 4;
  static constexpr vtkm::Id IN_GHOST_CELL_BIT = 5;
+  static constexpr vtkm::Id ZERO_VELOCITY = 6;
 };

 inline VTKM_CONT std::ostream& operator<<(std::ostream& s, const vtkm::ParticleStatus& status)
@ -71,6 +82,7 @@ inline VTKM_CONT std::ostream& operator<<(std::ostream& s, const vtkm::ParticleS
  s << " spat= " << status.CheckSpatialBounds();
  s << " temp= " << status.CheckTemporalBounds();
  s << " ghst= " << status.CheckInGhostCell();
+  s << " zvel= " << status.CheckZeroVelocity();
  s << "]";
  return s;
 }
@ -142,6 +154,17 @@ public:
  vtkm::Id NumSteps = 0;
  vtkm::ParticleStatus Status;
  vtkm::FloatDefault Time = 0;
+
+  static size_t Sizeof()
+  {
+    constexpr std::size_t sz = sizeof(vtkm::Vec3f) // Pos
+      + sizeof(vtkm::Id)                           // ID
+      + sizeof(vtkm::Id)                           // NumSteps
+      + sizeof(vtkm::UInt8)                        // Status
+      + sizeof(vtkm::FloatDefault);                // Time
+
+    return sz;
+  }
 };

 class ChargedParticle
@ -153,9 +176,9 @@ public:
  VTKM_EXEC_CONT
  ChargedParticle(const vtkm::Vec3f& position,
                  const vtkm::Id& id,
-                  const vtkm::FloatDefault& mass,
-                  const vtkm::FloatDefault& charge,
-                  const vtkm::FloatDefault& weighting,
+                  const vtkm::Float64& mass,
+                  const vtkm::Float64& charge,
+                  const vtkm::Float64& weighting,
                  const vtkm::Vec3f& momentum,
                  const vtkm::Id& numSteps = 0,
                  const vtkm::ParticleStatus& status = vtkm::ParticleStatus(),
@ -173,16 +196,16 @@ public:
  }

  VTKM_EXEC_CONT
-  vtkm::FloatDefault Gamma(vtkm::Vec3f momentum, bool reciprocal = false) const
+  vtkm::Float64 Gamma(vtkm::Vec3f momentum, bool reciprocal = false) const
  {
    constexpr vtkm::FloatDefault c2 = SPEED_OF_LIGHT * SPEED_OF_LIGHT;
-    const auto fMom2 = vtkm::MagnitudeSquared(momentum);
-    const auto m2 = this->Mass * this->Mass;
-    const auto m2_c2_reci = 1.0 / (m2 * c2);
+    const vtkm::Float64 fMom2 = vtkm::MagnitudeSquared(momentum);
+    const vtkm::Float64 m2 = this->Mass * this->Mass;
+    const vtkm::Float64 m2_c2_reci = 1.0 / (m2 * c2);
    if (reciprocal)
-      return static_cast<vtkm::FloatDefault>(vtkm::RSqrt(1.0 + fMom2 * m2_c2_reci));
+      return vtkm::RSqrt(1.0 + fMom2 * m2_c2_reci);
    else
-      return static_cast<vtkm::FloatDefault>(vtkm::Sqrt(1.0 + fMom2 * m2_c2_reci));
+      return vtkm::Sqrt(1.0 + fMom2 * m2_c2_reci);
  }

  VTKM_EXEC_CONT
@ -197,11 +220,11 @@ public:
    vtkm::Vec3f eField = vectors[0];
    vtkm::Vec3f bField = vectors[1];

-    const vtkm::FloatDefault QoM = this->Charge / this->Mass;
+    const vtkm::Float64 QoM = this->Charge / this->Mass;
    const vtkm::Vec3f mom_minus = this->Momentum + (0.5 * this->Charge * eField * length);

    // Get reciprocal of Gamma
-    vtkm::Vec3f gamma_reci = this->Gamma(mom_minus, true);
+    vtkm::Vec3f gamma_reci = static_cast<vtkm::FloatDefault>(this->Gamma(mom_minus, true));
    const vtkm::Vec3f t = 0.5 * QoM * length * bField * gamma_reci;
    const vtkm::Vec3f s = 2.0f * t * (1.0 / (1.0 + vtkm::Magnitude(t)));
    const vtkm::Vec3f mom_prime = mom_minus + vtkm::Cross(mom_minus, t);
@ -228,6 +251,14 @@ public:
    return this->Pos + translation;
  }

+  inline VTKM_CONT friend std::ostream& operator<<(std::ostream& out,
+                                                   const vtkm::ChargedParticle& p)
+  {
+    out << "v(" << p.Time << ") = " << p.Pos << ", ID: " << p.ID << ", NumSteps: " << p.NumSteps
+        << ", Status: " << p.Status;
+    return out;
+  }
+
  vtkm::Vec3f Pos;
  vtkm::Id ID = -1;
  vtkm::Id NumSteps = 0;
@ -235,14 +266,30 @@ public:
  vtkm::FloatDefault Time = 0;

 private:
-  vtkm::FloatDefault Mass;
-  vtkm::FloatDefault Charge;
-  vtkm::FloatDefault Weighting;
+  vtkm::Float64 Mass;
+  vtkm::Float64 Charge;
+  vtkm::Float64 Weighting;
  vtkm::Vec3f Momentum;
  constexpr static vtkm::FloatDefault SPEED_OF_LIGHT =
    static_cast<vtkm::FloatDefault>(2.99792458e8);

  friend struct mangled_diy_namespace::Serialization<vtkm::ChargedParticle>;
+
+public:
+  static size_t Sizeof()
+  {
+    constexpr std::size_t sz = sizeof(vtkm::Vec3f) // Pos
+      + sizeof(vtkm::Id)                           // ID
+      + sizeof(vtkm::Id)                           // NumSteps
+      + sizeof(vtkm::UInt8)                        // Status
+      + sizeof(vtkm::FloatDefault)                 // Time
+      + sizeof(vtkm::Float64)                      //Mass
+      + sizeof(vtkm::Float64)                      //Charge
+      + sizeof(vtkm::Float64)                      //Weighting
+      + sizeof(vtkm::Vec3f);                       //Momentum
+
+    return sz;
+  }
 };

 } //namespace vtkm
--- a/vtkm/VecTraits.h
+++ b/vtkm/VecTraits.h
@ -498,6 +498,7 @@ struct VTKM_NEVER_EXPORT VecTraits<vtkm::VecCConst<T>>

 namespace internal
 {
+
 /// Used for overriding VecTraits for basic scalar types.
 ///
 template <typename ScalarType>
@ -539,6 +540,44 @@ struct VTKM_NEVER_EXPORT VecTraitsBasic
    dest[0] = src;
  }
 };
+
+namespace detail
+{
+
+template <typename T, typename = vtkm::HasVecTraits<T>>
+struct VTKM_NEVER_EXPORT SafeVecTraitsImpl;
+
+template <typename T>
+struct VTKM_NEVER_EXPORT SafeVecTraitsImpl<T, std::true_type> : vtkm::VecTraits<T>
+{
+};
+
+template <typename T>
+struct VTKM_NEVER_EXPORT SafeVecTraitsImpl<T, std::false_type> : vtkm::internal::VecTraitsBasic<T>
+{
+};
+
+} // namespace detail
+
+/// \brief A version of VecTraits that will be available for any type.
+///
+/// The `VecTraits` template is only defined for types that have a specific specialization
+/// for it. That means if you use `VecTraits` in a template, that template will likely
+/// fail to build for types that are not defined for `VecTraits`.
+///
+/// To use `VecTraits` in a class that should support all types, not just those with
+/// defined `VecTraits`, you can use this "safe" version. `SafeVecTraits` is the same as
+/// `VecTraits` if the latter is defined. If the `VecTraits` are not defined, then
+/// `SafeVecTraits` treats the type as a simple scalar value.
+///
+/// This template ensures that it will work reasonably well for all types. But be careful
+/// as if `VecTraits` is later defined, the template is likely to change.
+///
+template <typename T>
+struct VTKM_NEVER_EXPORT SafeVecTraits : detail::SafeVecTraitsImpl<T>
+{
+};
+
 } // namespace internal

 /// \brief VecTraits for Pair types
@ -554,7 +593,7 @@ struct VTKM_NEVER_EXPORT VecTraits<vtkm::Pair<T, U>>
 {
 };

-} // anonymous namespace
+} // namespace vtkm

 #define VTKM_BASIC_TYPE_VECTOR(type)                                                     \
  namespace vtkm                                                                         \
--- a/vtkm/cont/ArrayExtractComponent.h
+++ b/vtkm/cont/ArrayExtractComponent.h
@ -37,7 +37,7 @@ namespace internal
 // is defined rather than where it is resolved. This causes problems when extracting
 // components of, say, an ArrayHandleMultiplexer holding an ArrayHandleSOA.
 template <typename T, typename S>
-vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType>
+vtkm::cont::ArrayHandleStride<typename vtkm::internal::SafeVecTraits<T>::BaseComponentType>
 ArrayExtractComponentFallback(const vtkm::cont::ArrayHandle<T, S>& src,
                              vtkm::IdComponent componentIndex,
                              vtkm::CopyFlag allowCopy)
@ -53,7 +53,7 @@ ArrayExtractComponentFallback(const vtkm::cont::ArrayHandle<T, S>& src,
                                     << vtkm::cont::TypeToString<vtkm::cont::ArrayHandle<T, S>>()
                                     << " requires an inefficient memory copy.");

-  using BaseComponentType = typename vtkm::VecTraits<T>::BaseComponentType;
+  using BaseComponentType = typename vtkm::internal::SafeVecTraits<T>::BaseComponentType;
  vtkm::Id numValues = src.GetNumberOfValues();
  vtkm::cont::ArrayHandleBasic<BaseComponentType> dest;
  dest.Allocate(numValues);
@ -78,10 +78,10 @@ template <typename S>
 struct ArrayExtractComponentImpl : ArrayExtractComponentImplInefficient
 {
  template <typename T>
-  vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType> operator()(
-    const vtkm::cont::ArrayHandle<T, S>& src,
-    vtkm::IdComponent componentIndex,
-    vtkm::CopyFlag allowCopy) const
+  vtkm::cont::ArrayHandleStride<typename vtkm::internal::SafeVecTraits<T>::BaseComponentType>
+  operator()(const vtkm::cont::ArrayHandle<T, S>& src,
+             vtkm::IdComponent componentIndex,
+             vtkm::CopyFlag allowCopy) const
  {
    // This is the slow "default" implementation. ArrayHandle implementations should provide
    // more efficient overloads where applicable.
@ -93,13 +93,15 @@ template <>
 struct ArrayExtractComponentImpl<vtkm::cont::StorageTagStride>
 {
  template <typename T>
-  vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType> operator()(
-    const vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagStride>& src,
-    vtkm::IdComponent componentIndex,
-    vtkm::CopyFlag allowCopy) const
+  vtkm::cont::ArrayHandleStride<typename vtkm::internal::SafeVecTraits<T>::BaseComponentType>
+  operator()(const vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagStride>& src,
+             vtkm::IdComponent componentIndex,
+             vtkm::CopyFlag allowCopy) const
  {
-    return this->DoExtract(
-      src, componentIndex, allowCopy, typename vtkm::VecTraits<T>::HasMultipleComponents{});
+    return this->DoExtract(src,
+                           componentIndex,
+                           allowCopy,
+                           typename vtkm::internal::SafeVecTraits<T>::HasMultipleComponents{});
  }

 private:
@ -110,7 +112,7 @@ private:
                 vtkm::VecTraitsTagSingleComponent) const
  {
    VTKM_ASSERT(componentIndex == 0);
-    using VTraits = vtkm::VecTraits<T>;
+    using VTraits = vtkm::internal::SafeVecTraits<T>;
    using TBase = typename VTraits::BaseComponentType;
    VTKM_STATIC_ASSERT(VTraits::NUM_COMPONENTS == 1);

@ -133,7 +135,7 @@ private:
                 vtkm::CopyFlag allowCopy,
                 vtkm::VecTraitsTagMultipleComponents) const
  {
-    using VTraits = vtkm::VecTraits<VecType>;
+    using VTraits = vtkm::internal::SafeVecTraits<VecType>;
    using T = typename VTraits::ComponentType;
    constexpr vtkm::IdComponent N = VTraits::NUM_COMPONENTS;

@ -252,10 +254,10 @@ using ArrayExtractComponentIsInefficient = typename std::is_base_of<
 /// `vtkm::cont::internal::ArrayExtractComponentImpl`.
 ///
 template <typename T, typename S>
-vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType> ArrayExtractComponent(
-  const vtkm::cont::ArrayHandle<T, S>& src,
-  vtkm::IdComponent componentIndex,
-  vtkm::CopyFlag allowCopy = vtkm::CopyFlag::On)
+vtkm::cont::ArrayHandleStride<typename vtkm::internal::SafeVecTraits<T>::BaseComponentType>
+ArrayExtractComponent(const vtkm::cont::ArrayHandle<T, S>& src,
+                      vtkm::IdComponent componentIndex,
+                      vtkm::CopyFlag allowCopy = vtkm::CopyFlag::On)
 {
  return internal::ArrayExtractComponentImpl<S>{}(src, componentIndex, allowCopy);
 }
--- a/vtkm/cont/ArrayHandle.h
+++ b/vtkm/cont/ArrayHandle.h
@ -314,7 +314,7 @@ public:
  /// Constructs an empty ArrayHandle.
  ///
  VTKM_CONT ArrayHandle()
-    : Buffers(static_cast<std::size_t>(StorageType::GetNumberOfBuffers()))
+    : Buffers(StorageType::CreateBuffers())
  {
  }

@ -349,17 +349,10 @@ public:
  VTKM_CONT ArrayHandle(const std::vector<vtkm::cont::internal::Buffer>& buffers)
    : Buffers(buffers)
  {
-    VTKM_ASSERT(static_cast<vtkm::IdComponent>(this->Buffers.size()) == this->GetNumberOfBuffers());
  }

  VTKM_CONT ArrayHandle(std::vector<vtkm::cont::internal::Buffer>&& buffers) noexcept
    : Buffers(std::move(buffers))
-  {
-    VTKM_ASSERT(static_cast<vtkm::IdComponent>(this->Buffers.size()) == this->GetNumberOfBuffers());
-  }
-
-  VTKM_CONT ArrayHandle(const vtkm::cont::internal::Buffer* buffers)
-    : Buffers(buffers, buffers + StorageType::GetNumberOfBuffers())
  {
  }
  ///@}
@ -420,9 +413,10 @@ public:
    return true; // different valuetype and/or storage
  }

-  VTKM_CONT static constexpr vtkm::IdComponent GetNumberOfBuffers()
+  VTKM_DEPRECATED(1.9, "Use the size of the std::vector returned from GetBuffers.")
+  VTKM_CONT constexpr vtkm::IdComponent GetNumberOfBuffers()
  {
-    return StorageType::GetNumberOfBuffers();
+    return static_cast<vtkm::IdComponent>(this->GetBuffers().size());
  }

  /// Get the storage.
@ -776,9 +770,15 @@ public:
    }
  }

-  /// Returns the internal `Buffer` structures that hold the data.
+  /// \brief Returns the internal `Buffer` structures that hold the data.
  ///
-  VTKM_CONT vtkm::cont::internal::Buffer* GetBuffers() const { return this->Buffers.data(); }
+  /// Note that great care should be taken when modifying buffers outside of the ArrayHandle.
+  ///
+  VTKM_CONT const std::vector<vtkm::cont::internal::Buffer>& GetBuffers() const
+  {
+    return this->Buffers;
+  }
+  VTKM_CONT std::vector<vtkm::cont::internal::Buffer>& GetBuffers() { return this->Buffers; }

 private:
  mutable std::vector<vtkm::cont::internal::Buffer> Buffers;
@ -789,11 +789,13 @@ protected:
    this->Buffers[static_cast<std::size_t>(index)] = buffer;
  }

-  // BufferContainer must be an iteratable container of Buffer objects.
-  template <typename BufferContainer>
-  VTKM_CONT void SetBuffers(const BufferContainer& buffers)
+  VTKM_CONT void SetBuffers(const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    std::copy(buffers.begin(), buffers.end(), this->Iterators->Buffers.begin());
+    this->Buffers = buffers;
+  }
+  VTKM_CONT void SetBuffers(std::vector<vtkm::cont::internal::Buffer>&& buffers)
+  {
+    this->Buffers = std::move(buffers);
  }
 };

@ -831,9 +833,9 @@ VTKM_NEVER_EXPORT VTKM_CONT inline void printSummary_ArrayHandle_Value(
  std::ostream& out,
  vtkm::VecTraitsTagMultipleComponents)
 {
-  using Traits = vtkm::VecTraits<T>;
+  using Traits = vtkm::internal::SafeVecTraits<T>;
  using ComponentType = typename Traits::ComponentType;
-  using IsVecOfVec = typename vtkm::VecTraits<ComponentType>::HasMultipleComponents;
+  using IsVecOfVec = typename vtkm::internal::SafeVecTraits<ComponentType>::HasMultipleComponents;
  vtkm::IdComponent numComponents = Traits::GetNumberOfComponents(value);
  out << "(";
  printSummary_ArrayHandle_Value(Traits::GetComponent(value, 0), out, IsVecOfVec());
@ -853,10 +855,10 @@ VTKM_NEVER_EXPORT VTKM_CONT inline void printSummary_ArrayHandle_Value(
 {
  out << "{";
  printSummary_ArrayHandle_Value(
-    value.first, out, typename vtkm::VecTraits<T1>::HasMultipleComponents());
+    value.first, out, typename vtkm::internal::SafeVecTraits<T1>::HasMultipleComponents());
  out << ",";
  printSummary_ArrayHandle_Value(
-    value.second, out, typename vtkm::VecTraits<T2>::HasMultipleComponents());
+    value.second, out, typename vtkm::internal::SafeVecTraits<T2>::HasMultipleComponents());
  out << "}";
 }

@ -872,7 +874,7 @@ VTKM_NEVER_EXPORT VTKM_CONT inline void printSummary_ArrayHandle(
 {
  using ArrayType = vtkm::cont::ArrayHandle<T, StorageT>;
  using PortalType = typename ArrayType::ReadPortalType;
-  using IsVec = typename vtkm::VecTraits<T>::HasMultipleComponents;
+  using IsVec = typename vtkm::internal::SafeVecTraits<T>::HasMultipleComponents;

  vtkm::Id sz = array.GetNumberOfValues();

@ -915,6 +917,25 @@ namespace internal
 namespace detail
 {

+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>&);
+template <typename T, typename S, typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const vtkm::cont::ArrayHandle<T, S>& array,
+                                        const Args&... args);
+template <typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const vtkm::cont::internal::Buffer& buffer,
+                                        const Args&... args);
+
+template <typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const std::vector<vtkm::cont::internal::Buffer>& addbuffs,
+                                        const Args&... args);
+template <typename Arg0, typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const Arg0& arg0,
+                                        const Args&... args);
+
 VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>&)
 {
  // Nothing left to add.
@ -925,9 +946,7 @@ VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer
                                        const vtkm::cont::ArrayHandle<T, S>& array,
                                        const Args&... args)
 {
-  vtkm::cont::internal::Buffer* arrayBuffers = array.GetBuffers();
-  buffers.insert(buffers.end(), arrayBuffers, arrayBuffers + array.GetNumberOfBuffers());
-  CreateBuffersImpl(buffers, args...);
+  CreateBuffersImpl(buffers, array.GetBuffers(), args...);
 }

 template <typename... Args>
@ -948,11 +967,6 @@ VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer
  CreateBuffersImpl(buffers, args...);
 }

-template <typename Arg0, typename... Args>
-VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
-                                        const Arg0& arg0,
-                                        const Args&... args);
-
 template <typename T, typename S, typename... Args>
 VTKM_CONT inline void CreateBuffersResolveArrays(std::vector<vtkm::cont::internal::Buffer>& buffers,
                                                 std::true_type,
@ -1001,7 +1015,7 @@ VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer
 ///   - `ArrayHandle`: The buffers from the `ArrayHandle` are added to the list.
 ///   - `Buffer`: A copy of the buffer is added to the list.
 ///   - `std::vector<Buffer>`: A copy of all buffers in this vector are added to the list.
-///   - Anything else: A buffer with the given object attached as metadata is
+///   - Anything else: A buffer with the given object attached as metadata is added to the list.
 ///
 template <typename... Args>
 VTKM_CONT inline std::vector<vtkm::cont::internal::Buffer> CreateBuffers(const Args&... args)
--- a/vtkm/cont/ArrayHandleBasic.h
+++ b/vtkm/cont/ArrayHandleBasic.h
@ -35,10 +35,13 @@ public:
  using ReadPortalType = vtkm::internal::ArrayPortalBasicRead<T>;
  using WritePortalType = vtkm::internal::ArrayPortalBasicWrite<T>;

-  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers() { return 1; }
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers()
+  {
+    return std::vector<vtkm::cont::internal::Buffer>(1);
+  }

  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
-                                      vtkm::cont::internal::Buffer* buffers,
+                                      const std::vector<vtkm::cont::internal::Buffer>& buffers,
                                      vtkm::CopyFlag preserve,
                                      vtkm::cont::Token& token)
  {
@ -46,36 +49,43 @@ public:
      vtkm::internal::NumberOfValuesToNumberOfBytes<T>(numValues), preserve, token);
  }

-  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return static_cast<vtkm::Id>(buffers->GetNumberOfBytes() /
+    VTKM_ASSERT(buffers.size() == 1);
+    return static_cast<vtkm::Id>(buffers[0].GetNumberOfBytes() /
                                 static_cast<vtkm::BufferSizeType>(sizeof(T)));
  }

-  VTKM_CONT static void Fill(vtkm::cont::internal::Buffer* buffers,
+  VTKM_CONT static void Fill(const std::vector<vtkm::cont::internal::Buffer>& buffers,
                             const T& fillValue,
                             vtkm::Id startIndex,
                             vtkm::Id endIndex,
                             vtkm::cont::Token& token)
  {
+    VTKM_ASSERT(buffers.size() == 1);
    constexpr vtkm::BufferSizeType fillValueSize =
      static_cast<vtkm::BufferSizeType>(sizeof(fillValue));
    buffers[0].Fill(
      &fillValue, fillValueSize, startIndex * fillValueSize, endIndex * fillValueSize, token);
  }

-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                                   vtkm::cont::DeviceAdapterId device,
-                                                   vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
+    VTKM_ASSERT(buffers.size() == 1);
    return ReadPortalType(reinterpret_cast<const T*>(buffers[0].ReadPointerDevice(device, token)),
                          GetNumberOfValues(buffers));
  }

-  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::cont::DeviceAdapterId device,
-                                                     vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
+    VTKM_ASSERT(buffers.size() == 1);
    return WritePortalType(reinterpret_cast<T*>(buffers[0].WritePointerDevice(device, token)),
                           GetNumberOfValues(buffers));
  }
@ -345,7 +355,8 @@ struct Serialization<vtkm::cont::ArrayHandleBasic<T>>
    vtkm::cont::internal::Buffer buffer;
    vtkmdiy::load(bb, buffer);

-    obj = vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>(&buffer);
+    obj = vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>(
+      vtkm::cont::internal::CreateBuffers(buffer));
  }
 };

--- a/vtkm/cont/ArrayHandleBitField.h
+++ b/vtkm/cont/ArrayHandleBitField.h
@ -82,10 +82,13 @@ public:
  using ReadPortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstType>;
  using WritePortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalType>;

-  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers() { return 1; }
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers()
+  {
+    return std::vector<vtkm::cont::internal::Buffer>(1);
+  }

  VTKM_CONT static void ResizeBuffers(vtkm::Id numberOfBits,
-                                      vtkm::cont::internal::Buffer* buffers,
+                                      const std::vector<vtkm::cont::internal::Buffer>& buffers,
                                      vtkm::CopyFlag preserve,
                                      vtkm::cont::Token& token)
  {
@ -102,20 +105,23 @@ public:
    buffers[0].GetMetaData<vtkm::cont::internal::BitFieldMetaData>().NumberOfBits = numberOfBits;
  }

-  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
+    VTKM_ASSERT(buffers.size() == 1);
    vtkm::Id numberOfBits =
      buffers[0].GetMetaData<vtkm::cont::internal::BitFieldMetaData>().NumberOfBits;
    VTKM_ASSERT((buffers[0].GetNumberOfBytes() * CHAR_BIT) >= numberOfBits);
    return numberOfBits;
  }

-  VTKM_CONT static void Fill(vtkm::cont::internal::Buffer* buffers,
+  VTKM_CONT static void Fill(const std::vector<vtkm::cont::internal::Buffer>& buffers,
                             bool fillValue,
                             vtkm::Id startBit,
                             vtkm::Id endBit,
                             vtkm::cont::Token& token)
  {
+    VTKM_ASSERT(buffers.size() == 1);
    constexpr vtkm::BufferSizeType wordTypeSize =
      static_cast<vtkm::BufferSizeType>(sizeof(WordType));
    constexpr vtkm::BufferSizeType wordNumBits = wordTypeSize * CHAR_BIT;
@ -141,10 +147,12 @@ public:
    }
  }

-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                                   vtkm::cont::DeviceAdapterId device,
-                                                   vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
+    VTKM_ASSERT(buffers.size() == 1);
    vtkm::Id numberOfBits = GetNumberOfValues(buffers);
    VTKM_ASSERT((buffers[0].GetNumberOfBytes() * CHAR_BIT) >= numberOfBits);

@ -152,10 +160,12 @@ public:
      BitPortalConstType(buffers[0].ReadPointerDevice(device, token), numberOfBits));
  }

-  VTKM_CONT static WritePortalType CreateWritePortal(const vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::cont::DeviceAdapterId device,
-                                                     vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
+    VTKM_ASSERT(buffers.size() == 1);
    vtkm::Id numberOfBits = GetNumberOfValues(buffers);
    VTKM_ASSERT((buffers[0].GetNumberOfBytes() * CHAR_BIT) >= numberOfBits);

--- a/vtkm/cont/ArrayHandleCartesianProduct.h
+++ b/vtkm/cont/ArrayHandleCartesianProduct.h
@ -17,6 +17,8 @@
 #include <vtkm/cont/ErrorBadAllocation.h>
 #include <vtkm/cont/Token.h>

+#include <array>
+
 namespace vtkm
 {
 namespace internal
@ -199,26 +201,27 @@ struct ArrayHandleCartesianProductTraits
 template <typename T, typename ST1, typename ST2, typename ST3>
 class Storage<vtkm::Vec<T, 3>, vtkm::cont::StorageTagCartesianProduct<ST1, ST2, ST3>>
 {
+  struct Info
+  {
+    std::array<std::size_t, 4> BufferOffset;
+  };
+
  using Storage1 = vtkm::cont::internal::Storage<T, ST1>;
  using Storage2 = vtkm::cont::internal::Storage<T, ST2>;
  using Storage3 = vtkm::cont::internal::Storage<T, ST3>;

-  template <typename Buffs>
-  VTKM_CONT constexpr static Buffs* Buffers1(Buffs* buffers)
-  {
-    return buffers;
-  }
+  using Array1 = vtkm::cont::ArrayHandle<T, ST1>;
+  using Array2 = vtkm::cont::ArrayHandle<T, ST2>;
+  using Array3 = vtkm::cont::ArrayHandle<T, ST3>;

-  template <typename Buffs>
-  VTKM_CONT constexpr static Buffs* Buffers2(Buffs* buffers)
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> GetBuffers(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    std::size_t subArray)
  {
-    return buffers + Storage1::GetNumberOfBuffers();
-  }
-
-  template <typename Buffs>
-  VTKM_CONT constexpr static Buffs* Buffers3(Buffs* buffers)
-  {
-    return buffers + Storage1::GetNumberOfBuffers() + Storage2::GetNumberOfBuffers();
+    Info info = buffers[0].GetMetaData<Info>();
+    return std::vector<vtkm::cont::internal::Buffer>(buffers.begin() +
+                                                       info.BufferOffset[subArray - 1],
+                                                     buffers.begin() + info.BufferOffset[subArray]);
  }

 public:
@ -235,20 +238,15 @@ public:
                                                typename Storage2::WritePortalType,
                                                typename Storage3::WritePortalType>;

-  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers()
+  VTKM_CONT static vtkm::Id GetNumberOfValues(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return Storage1::GetNumberOfBuffers() + Storage2::GetNumberOfBuffers() +
-      Storage3::GetNumberOfBuffers();
+    return (Storage1::GetNumberOfValues(GetBuffers(buffers, 1)) *
+            Storage2::GetNumberOfValues(GetBuffers(buffers, 2)) *
+            Storage3::GetNumberOfValues(GetBuffers(buffers, 3)));
  }

-  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
-  {
-    return (Storage1::GetNumberOfValues(Buffers1(buffers)) *
-            Storage2::GetNumberOfValues(Buffers2(buffers)) *
-            Storage3::GetNumberOfValues(Buffers3(buffers)));
-  }
-
-  VTKM_CONT static void Fill(vtkm::cont::internal::Buffer* buffers,
+  VTKM_CONT static void Fill(const std::vector<vtkm::cont::internal::Buffer>& buffers,
                             const vtkm::Vec<T, 3>& fillValue,
                             vtkm::Id startIndex,
                             vtkm::Id endIndex,
@ -259,46 +257,63 @@ public:
      throw vtkm::cont::ErrorBadValue(
        "Fill for ArrayHandleCartesianProduct can only be used to fill entire array.");
    }
-    Storage1::Fill(
-      Buffers1(buffers), fillValue[0], 0, Storage1::GetNumberOfValues(Buffers1(buffers)), token);
-    Storage2::Fill(
-      Buffers2(buffers), fillValue[1], 0, Storage2::GetNumberOfValues(Buffers2(buffers)), token);
-    Storage3::Fill(
-      Buffers3(buffers), fillValue[2], 0, Storage3::GetNumberOfValues(Buffers3(buffers)), token);
+    auto subBuffers = GetBuffers(buffers, 1);
+    Storage1::Fill(subBuffers, fillValue[0], 0, Storage1::GetNumberOfValues(subBuffers), token);
+    subBuffers = GetBuffers(buffers, 2);
+    Storage2::Fill(subBuffers, fillValue[1], 0, Storage2::GetNumberOfValues(subBuffers), token);
+    subBuffers = GetBuffers(buffers, 3);
+    Storage3::Fill(subBuffers, fillValue[2], 0, Storage3::GetNumberOfValues(subBuffers), token);
  }

-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                                   vtkm::cont::DeviceAdapterId device,
-                                                   vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
-    return ReadPortalType(Storage1::CreateReadPortal(Buffers1(buffers), device, token),
-                          Storage2::CreateReadPortal(Buffers2(buffers), device, token),
-                          Storage3::CreateReadPortal(Buffers3(buffers), device, token));
+    return ReadPortalType(Storage1::CreateReadPortal(GetBuffers(buffers, 1), device, token),
+                          Storage2::CreateReadPortal(GetBuffers(buffers, 2), device, token),
+                          Storage3::CreateReadPortal(GetBuffers(buffers, 3), device, token));
  }

-  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::cont::DeviceAdapterId device,
-                                                     vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
-    return WritePortalType(Storage1::CreateWritePortal(Buffers1(buffers), device, token),
-                           Storage2::CreateWritePortal(Buffers2(buffers), device, token),
-                           Storage3::CreateWritePortal(Buffers3(buffers), device, token));
+    return WritePortalType(Storage1::CreateWritePortal(GetBuffers(buffers, 1), device, token),
+                           Storage2::CreateWritePortal(GetBuffers(buffers, 2), device, token),
+                           Storage3::CreateWritePortal(GetBuffers(buffers, 3), device, token));
  }

-  VTKM_CONT static vtkm::cont::ArrayHandle<T, ST1> GetArrayHandle1(
-    const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static Array1 GetArrayHandle1(const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return vtkm::cont::ArrayHandle<T, ST1>(Buffers1(buffers));
+    return Array1(GetBuffers(buffers, 1));
  }
-  VTKM_CONT static vtkm::cont::ArrayHandle<T, ST2> GetArrayHandle2(
-    const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static Array2 GetArrayHandle2(const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return vtkm::cont::ArrayHandle<T, ST2>(Buffers2(buffers));
+    return Array2(GetBuffers(buffers, 2));
  }
-  VTKM_CONT static vtkm::cont::ArrayHandle<T, ST3> GetArrayHandle3(
-    const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static Array3 GetArrayHandle3(const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return vtkm::cont::ArrayHandle<T, ST3>(Buffers3(buffers));
+    return Array3(GetBuffers(buffers, 3));
+  }
+
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers(
+    const Array1& array1 = Array1{},
+    const Array2& array2 = Array2{},
+    const Array3& array3 = Array3{})
+  {
+    const std::vector<vtkm::cont::internal::Buffer>& buffers1 = array1.GetBuffers();
+    const std::vector<vtkm::cont::internal::Buffer>& buffers2 = array2.GetBuffers();
+    const std::vector<vtkm::cont::internal::Buffer>& buffers3 = array3.GetBuffers();
+
+    Info info;
+    info.BufferOffset[0] = 1;
+    info.BufferOffset[1] = info.BufferOffset[0] + buffers1.size();
+    info.BufferOffset[2] = info.BufferOffset[1] + buffers2.size();
+    info.BufferOffset[3] = info.BufferOffset[2] + buffers3.size();
+
+    return vtkm::cont::internal::CreateBuffers(info, buffers1, buffers2, buffers3);
  }
 };
 } // namespace internal
@ -335,7 +350,7 @@ public:
  ArrayHandleCartesianProduct(const FirstHandleType& firstArray,
                              const SecondHandleType& secondArray,
                              const ThirdHandleType& thirdArray)
-    : Superclass(vtkm::cont::internal::CreateBuffers(firstArray, secondArray, thirdArray))
+    : Superclass(StorageType::CreateBuffers(firstArray, secondArray, thirdArray))
  {
  }

--- a/vtkm/cont/ArrayHandleCompositeVector.h
+++ b/vtkm/cont/ArrayHandleCompositeVector.h
@ -20,6 +20,7 @@

 #include <vtkmstd/integer_sequence.h>

+#include <numeric>
 #include <type_traits>

 namespace vtkm
@ -169,31 +170,6 @@ struct VerifyArrayHandle
                         "must be a list of ArrayHandle types.");
 };

-template <std::size_t I>
-struct BufferIndexImpl
-{
-  template <typename... Ts>
-  static constexpr vtkm::IdComponent Value(vtkm::IdComponent n, Ts... remaining)
-  {
-    return n + BufferIndexImpl<I - 1>::Value(remaining...);
-  }
-};
-template <>
-struct BufferIndexImpl<0>
-{
-  template <typename... Ts>
-  static constexpr vtkm::IdComponent Value(Ts...)
-  {
-    return 0;
-  }
-};
-
-template <std::size_t I, typename... StorageTypes>
-constexpr vtkm::IdComponent BufferIndex()
-{
-  return BufferIndexImpl<I>::Value(StorageTypes::GetNumberOfBuffers()...);
-}
-
 } // end namespace compvec

 } // namespace internal
@ -226,21 +202,31 @@ class Storage<vtkm::Vec<T, static_cast<vtkm::IdComponent>(sizeof...(StorageTags)
 {
  using ValueType = vtkm::Vec<T, static_cast<vtkm::IdComponent>(sizeof...(StorageTags))>;

+  struct Info
+  {
+    std::array<std::size_t, sizeof...(StorageTags) + 1> BufferOffset;
+  };
+
  template <typename S>
  using StorageFor = vtkm::cont::internal::Storage<T, S>;

  using StorageTuple = vtkm::Tuple<StorageFor<StorageTags>...>;

-  template <std::size_t I>
-  VTKM_CONT static constexpr vtkm::IdComponent BufferIndex()
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> GetBuffers(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    std::size_t subArray)
  {
-    return compvec::BufferIndex<I, StorageFor<StorageTags>...>();
+    Info info = buffers[0].GetMetaData<Info>();
+    return std::vector<vtkm::cont::internal::Buffer>(buffers.begin() + info.BufferOffset[subArray],
+                                                     buffers.begin() +
+                                                       info.BufferOffset[subArray + 1]);
  }

-  template <std::size_t I, typename Buff>
-  VTKM_CONT static Buff* Buffers(Buff* buffers)
+  template <std::size_t I>
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> Buffers(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return buffers + BufferIndex<I>();
+    return GetBuffers(buffers, I);
  }

  using IndexList = vtkmstd::make_index_sequence<sizeof...(StorageTags)>;
@ -255,19 +241,21 @@ private:
  template <std::size_t... Is>
  static void ResizeBuffersImpl(vtkmstd::index_sequence<Is...>,
                                vtkm::Id numValues,
-                                vtkm::cont::internal::Buffer* buffers,
+                                const std::vector<vtkm::cont::internal::Buffer>& buffers,
                                vtkm::CopyFlag preserve,
                                vtkm::cont::Token& token)
  {
+    std::vector<std::vector<vtkm::cont::internal::Buffer>> bufferPartitions = { Buffers<Is>(
+      buffers)... };
    auto init_list = { (vtkm::tuple_element_t<Is, StorageTuple>::ResizeBuffers(
-                          numValues, Buffers<Is>(buffers), preserve, token),
+                          numValues, bufferPartitions[Is], preserve, token),
                        false)... };
    (void)init_list;
  }

  template <std::size_t... Is>
  static void FillImpl(vtkmstd::index_sequence<Is...>,
-                       vtkm::cont::internal::Buffer* buffers,
+                       const std::vector<vtkm::cont::internal::Buffer>& buffers,
                       const ValueType& fillValue,
                       vtkm::Id startIndex,
                       vtkm::Id endIndex,
@ -284,45 +272,43 @@ private:
  }

  template <std::size_t... Is>
-  static ReadPortalType CreateReadPortalImpl(vtkmstd::index_sequence<Is...>,
-                                             const vtkm::cont::internal::Buffer* buffers,
-                                             vtkm::cont::DeviceAdapterId device,
-                                             vtkm::cont::Token& token)
+  static ReadPortalType CreateReadPortalImpl(
+    vtkmstd::index_sequence<Is...>,
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return ReadPortalType(vtkm::tuple_element_t<Is, StorageTuple>::CreateReadPortal(
      Buffers<Is>(buffers), device, token)...);
  }

  template <std::size_t... Is>
-  static WritePortalType CreateWritePortalImpl(vtkmstd::index_sequence<Is...>,
-                                               vtkm::cont::internal::Buffer* buffers,
-                                               vtkm::cont::DeviceAdapterId device,
-                                               vtkm::cont::Token& token)
+  static WritePortalType CreateWritePortalImpl(
+    vtkmstd::index_sequence<Is...>,
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return WritePortalType(vtkm::tuple_element_t<Is, StorageTuple>::CreateWritePortal(
      Buffers<Is>(buffers), device, token)...);
  }

 public:
-  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers()
+  VTKM_CONT static vtkm::Id GetNumberOfValues(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return BufferIndex<sizeof...(StorageTags)>();
-  }
-
-  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
-  {
-    return vtkm::TupleElement<0, StorageTuple>::GetNumberOfValues(buffers);
+    return vtkm::TupleElement<0, StorageTuple>::GetNumberOfValues(Buffers<0>(buffers));
  }

  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
-                                      vtkm::cont::internal::Buffer* buffers,
+                                      const std::vector<vtkm::cont::internal::Buffer>& buffers,
                                      vtkm::CopyFlag preserve,
                                      vtkm::cont::Token& token)
  {
    ResizeBuffersImpl(IndexList{}, numValues, buffers, preserve, token);
  }

-  VTKM_CONT static void Fill(vtkm::cont::internal::Buffer* buffers,
+  VTKM_CONT static void Fill(const std::vector<vtkm::cont::internal::Buffer>& buffers,
                             const ValueType& fillValue,
                             vtkm::Id startIndex,
                             vtkm::Id endIndex,
@ -331,65 +317,51 @@ public:
    FillImpl(IndexList{}, buffers, fillValue, startIndex, endIndex, token);
  }

-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                                   vtkm::cont::DeviceAdapterId device,
-                                                   vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return CreateReadPortalImpl(IndexList{}, buffers, device, token);
  }

-  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::cont::DeviceAdapterId device,
-                                                     vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return CreateWritePortalImpl(IndexList{}, buffers, device, token);
  }

-private:
-  template <typename ArrayType>
-  VTKM_CONT static bool CopyBuffers(const ArrayType& array,
-                                    vtkm::cont::internal::Buffer* destBuffers)
-  {
-    vtkm::IdComponent numBuffers = array.GetNumberOfBuffers();
-    const vtkm::cont::internal::Buffer* srcBuffers = array.GetBuffers();
-    for (vtkm::IdComponent buffIndex = 0; buffIndex < numBuffers; ++buffIndex)
-    {
-      destBuffers[buffIndex] = srcBuffers[buffIndex];
-    }
-    return false; // Return value does not matter. Hopefully just thrown away by compiler.
-  }
-
-  template <std::size_t... Is, typename... ArrayTs>
-  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffersImpl(
-    vtkmstd::index_sequence<Is...>,
-    const ArrayTs... arrays)
-  {
-    std::vector<vtkm::cont::internal::Buffer> buffers(
-      static_cast<std::size_t>(GetNumberOfBuffers()));
-    auto init_list = { CopyBuffers(arrays, Buffers<Is>(&buffers.front()))... };
-    (void)init_list;
-    return buffers;
-  }
-
 public:
-  template <typename... ArrayTs>
-  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers(const ArrayTs... arrays)
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers(
+    const vtkm::cont::ArrayHandle<T, StorageTags>&... arrays)
  {
-    return CreateBuffersImpl(IndexList{}, arrays...);
+    auto numBuffers = { std::size_t{ 1 }, arrays.GetBuffers().size()... };
+    Info info;
+    std::partial_sum(numBuffers.begin(), numBuffers.end(), info.BufferOffset.begin());
+    return vtkm::cont::internal::CreateBuffers(info, arrays...);
+  }
+
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers()
+  {
+    return CreateBuffers(vtkm::cont::ArrayHandle<T, StorageTags>{}...);
  }

 private:
  using ArrayTupleType = vtkm::Tuple<vtkm::cont::ArrayHandle<T, StorageTags>...>;

  template <std::size_t... Is>
-  VTKM_CONT static ArrayTupleType GetArrayTupleImpl(vtkmstd::index_sequence<Is...>,
-                                                    const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static ArrayTupleType GetArrayTupleImpl(
+    vtkmstd::index_sequence<Is...>,
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return ArrayTupleType(vtkm::cont::ArrayHandle<T, StorageTags>(Buffers<Is>(buffers))...);
  }

 public:
-  VTKM_CONT static ArrayTupleType GetArrayTuple(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static ArrayTupleType GetArrayTuple(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return GetArrayTupleImpl(IndexList{}, buffers);
  }
@ -400,13 +372,13 @@ template <typename T, typename StorageTag>
 struct Storage<T, vtkm::cont::StorageTagCompositeVec<StorageTag>> : Storage<T, StorageTag>
 {
  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers(
-    const vtkm::cont::ArrayHandle<T, StorageTag>& array)
+    const vtkm::cont::ArrayHandle<T, StorageTag>& array = vtkm::cont::ArrayHandle<T, StorageTag>{})
  {
    return vtkm::cont::internal::CreateBuffers(array);
  }

  VTKM_CONT static vtkm::Tuple<vtkm::cont::ArrayHandle<T, StorageTag>> GetArrayTuple(
-    const vtkm::cont::internal::Buffer* buffers)
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return vtkm::cont::ArrayHandle<T, StorageTag>(buffers);
  }
--- a/vtkm/cont/ArrayHandleConcatenate.h
+++ b/vtkm/cont/ArrayHandleConcatenate.h
@ -169,16 +169,26 @@ class Storage<T, StorageTagConcatenate<ST1, ST2>>
  using ArrayHandleType1 = typename detail::ConcatinateTypeArg<T, ST1>::ArrayHandle;
  using ArrayHandleType2 = typename detail::ConcatinateTypeArg<T, ST2>::ArrayHandle;

-  template <typename Buff>
-  VTKM_CONT static Buff* Buffers1(Buff* buffers)
+  struct Info
  {
-    return buffers;
+    std::size_t NumBuffers1;
+    std::size_t NumBuffers2;
+  };
+
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> Buffers1(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
+  {
+    Info info = buffers[0].GetMetaData<Info>();
+    return std::vector<vtkm::cont::internal::Buffer>(buffers.begin() + 1,
+                                                     buffers.begin() + 1 + info.NumBuffers1);
  }

-  template <typename Buff>
-  VTKM_CONT static Buff* Buffers2(Buff* buffers)
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> Buffers2(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return buffers + SourceStorage1::GetNumberOfBuffers();
+    Info info = buffers[0].GetMetaData<Info>();
+    return std::vector<vtkm::cont::internal::Buffer>(buffers.begin() + 1 + info.NumBuffers1,
+                                                     buffers.end());
  }

 public:
@ -191,18 +201,14 @@ public:
    vtkm::internal::ArrayPortalConcatenate<typename SourceStorage1::WritePortalType,
                                           typename SourceStorage2::WritePortalType>;

-  VTKM_CONT static constexpr vtkm::IdComponent GetNumberOfBuffers()
-  {
-    return (SourceStorage1::GetNumberOfBuffers() + SourceStorage2::GetNumberOfBuffers());
-  }
-
-  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return (SourceStorage1::GetNumberOfValues(Buffers1(buffers)) +
            SourceStorage2::GetNumberOfValues(Buffers2(buffers)));
  }

-  VTKM_CONT static void Fill(vtkm::cont::internal::Buffer* buffers,
+  VTKM_CONT static void Fill(const std::vector<vtkm::cont::internal::Buffer>& buffers,
                             const T& fillValue,
                             vtkm::Id startIndex,
                             vtkm::Id endIndex,
@ -225,35 +231,42 @@ public:
    }
  }

-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                                   vtkm::cont::DeviceAdapterId device,
-                                                   vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return ReadPortalType(SourceStorage1::CreateReadPortal(Buffers1(buffers), device, token),
                          SourceStorage2::CreateReadPortal(Buffers2(buffers), device, token));
  }

-  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::cont::DeviceAdapterId device,
-                                                     vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return WritePortalType(SourceStorage1::CreateWritePortal(Buffers1(buffers), device, token),
                           SourceStorage2::CreateWritePortal(Buffers2(buffers), device, token));
  }

-  VTKM_CONT static auto CreateBuffers(const ArrayHandleType1& array1,
-                                      const ArrayHandleType2& array2)
+  VTKM_CONT static auto CreateBuffers(const ArrayHandleType1& array1 = ArrayHandleType1{},
+                                      const ArrayHandleType2& array2 = ArrayHandleType2{})
    -> decltype(vtkm::cont::internal::CreateBuffers())
  {
-    return vtkm::cont::internal::CreateBuffers(array1, array2);
+    Info info;
+    info.NumBuffers1 = array1.GetBuffers().size();
+    info.NumBuffers2 = array2.GetBuffers().size();
+    return vtkm::cont::internal::CreateBuffers(info, array1, array2);
  }

-  VTKM_CONT static const ArrayHandleType1 GetArray1(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static const ArrayHandleType1 GetArray1(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return ArrayHandleType1(Buffers1(buffers));
  }

-  VTKM_CONT static const ArrayHandleType2 GetArray2(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static const ArrayHandleType2 GetArray2(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return ArrayHandleType2(Buffers2(buffers));
  }
--- a/vtkm/cont/ArrayHandleDecorator.h
+++ b/vtkm/cont/ArrayHandleDecorator.h
@ -24,6 +24,7 @@
 #include <vtkmstd/integer_sequence.h>


+#include <numeric>
 #include <type_traits>
 #include <utility>

@ -306,35 +307,22 @@ using GetWritePortalList =
                                   std::declval<vtkm::cont::DeviceAdapterId>(),
                                   std::declval<vtkm::cont::Token&>())))...>;

-template <vtkm::IdComponent I, typename ArrayTupleType>
-struct BufferIndexImpl
-{
-  static constexpr vtkm::IdComponent Value()
-  {
-    return BufferIndexImpl<I - 1, ArrayTupleType>::Value() +
-      vtkm::TupleElement<I - 1, ArrayTupleType>::GetNumberOfBuffers();
-  }
-};
-template <typename ArrayTupleType>
-struct BufferIndexImpl<0, ArrayTupleType>
-{
-  static constexpr vtkm::IdComponent Value()
-  {
-    // One buffer reserved for metadata.
-    return 1;
-  }
-};
-
-template <typename DecoratorImplT>
+template <typename DecoratorImplT, std::size_t NumArrays>
 struct DecoratorMetaData
 {
  DecoratorImplT Implementation;
  vtkm::Id NumberOfValues = 0;
+  std::array<std::size_t, NumArrays + 1> BufferOffsets;

-  DecoratorMetaData(const DecoratorImplT& implementation, vtkm::Id numValues)
+  template <typename... ArrayTs>
+  DecoratorMetaData(const DecoratorImplT& implementation,
+                    vtkm::Id numValues,
+                    const ArrayTs... arrays)
    : Implementation(implementation)
    , NumberOfValues(numValues)
  {
+    auto numBuffers = { std::size_t{ 1 }, arrays.GetBuffers().size()... };
+    std::partial_sum(numBuffers.begin(), numBuffers.end(), this->BufferOffsets.begin());
  }

  DecoratorMetaData() = default;
@ -363,26 +351,22 @@ struct DecoratorStorageTraits
  // size_t integral constants that index ArrayTs:
  using IndexList = vtkmstd::make_index_sequence<sizeof...(ArrayTs)>;

-  // Returns the index into the buffers array for the array at the given index.
-  template <vtkm::IdComponent I>
-  static constexpr vtkm::IdComponent BufferIndex()
+  using MetaData = DecoratorMetaData<DecoratorImplT, sizeof...(ArrayTs)>;
+
+  static MetaData& GetMetaData(const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return BufferIndexImpl<I, ArrayTupleType>::Value();
+    return buffers[0].GetMetaData<MetaData>();
  }

  // Converts a buffers array to the ArrayHandle at the given index.
  template <vtkm::IdComponent I>
  static vtkm::TupleElement<I, ArrayTupleType> BuffersToArray(
-    const vtkm::cont::internal::Buffer* buffers)
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return vtkm::TupleElement<I, ArrayTupleType>(buffers + BufferIndex<I>());
-  }
-
-  using MetaData = DecoratorMetaData<DecoratorImplT>;
-
-  static MetaData& GetMetaData(const vtkm::cont::internal::Buffer* buffers)
-  {
-    return buffers[0].GetMetaData<MetaData>();
+    const MetaData& metaData = GetMetaData(buffers);
+    std::vector<vtkm::cont::internal::Buffer> subBuffers(
+      buffers.begin() + metaData.BufferOffsets[I], buffers.begin() + metaData.BufferOffsets[I + 1]);
+    return vtkm::TupleElement<I, ArrayTupleType>(std::move(subBuffers));
  }

  // true_type/false_type depending on whether the decorator supports Allocate:
@ -440,7 +424,7 @@ struct DecoratorStorageTraits
  // Static dispatch for calling AllocateSourceArrays on supported implementations:
  VTKM_CONT [[noreturn]] static void CallAllocate(std::false_type,
                                                  vtkm::Id,
-                                                  vtkm::cont::internal::Buffer*,
+                                                  const std::vector<vtkm::cont::internal::Buffer>&,
                                                  vtkm::CopyFlag,
                                                  vtkm::cont::Token&,
                                                  ArrayTs...)
@ -450,7 +434,7 @@ struct DecoratorStorageTraits

  VTKM_CONT static void CallAllocate(std::true_type,
                                     vtkm::Id newSize,
-                                     vtkm::cont::internal::Buffer* buffers,
+                                     const std::vector<vtkm::cont::internal::Buffer>& buffers,
                                     vtkm::CopyFlag preserve,
                                     vtkm::cont::Token& token,
                                     ArrayTs... arrays)
@ -463,11 +447,12 @@ struct DecoratorStorageTraits

  // Portal construction methods. These actually create portals.
  template <std::size_t... Indices>
-  VTKM_CONT static WritePortalType CreateWritePortal(const vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::Id numValues,
-                                                     vtkmstd::index_sequence<Indices...>,
-                                                     vtkm::cont::DeviceAdapterId device,
-                                                     vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::Id numValues,
+    vtkmstd::index_sequence<Indices...>,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return CreatePortalDecorator<WritePortalType>(
      numValues,
@ -476,11 +461,12 @@ struct DecoratorStorageTraits
  }

  template <std::size_t... Indices>
-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                                   vtkm::Id numValues,
-                                                   vtkmstd::index_sequence<Indices...>,
-                                                   vtkm::cont::DeviceAdapterId device,
-                                                   vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::Id numValues,
+    vtkmstd::index_sequence<Indices...>,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return CreatePortalDecorator<ReadPortalType>(
      numValues,
@ -489,11 +475,12 @@ struct DecoratorStorageTraits
  }

  template <std::size_t... Indices>
-  VTKM_CONT static void AllocateSourceArrays(vtkm::Id numValues,
-                                             vtkm::cont::internal::Buffer* buffers,
-                                             vtkm::CopyFlag preserve,
-                                             vtkm::cont::Token& token,
-                                             vtkmstd::index_sequence<Indices...>)
+  VTKM_CONT static void AllocateSourceArrays(
+    vtkm::Id numValues,
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::CopyFlag preserve,
+    vtkm::cont::Token& token,
+    vtkmstd::index_sequence<Indices...>)
  {
    CallAllocate(
      IsAllocatable{}, numValues, buffers, preserve, token, BuffersToArray<Indices>(buffers)...);
@ -519,18 +506,14 @@ public:
  using ReadPortalType = typename Traits::ReadPortalType;
  using WritePortalType = typename Traits::WritePortalType;

-  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers()
-  {
-    return Traits::template BufferIndex<static_cast<vtkm::IdComponent>(sizeof...(ArrayTs))>();
-  }
-
-  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return Traits::GetMetaData(buffers).NumberOfValues;
  }

  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
-                                      vtkm::cont::internal::Buffer* buffers,
+                                      const std::vector<vtkm::cont::internal::Buffer>& buffers,
                                      vtkm::CopyFlag preserve,
                                      vtkm::cont::Token& token)
  {
@ -545,17 +528,19 @@ public:
    }
  }

-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                                   vtkm::cont::DeviceAdapterId device,
-                                                   vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return Traits::CreateReadPortal(
      buffers, GetNumberOfValues(buffers), IndexList{}, device, token);
  }

-  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::cont::DeviceAdapterId device,
-                                                     vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return Traits::CreateWritePortal(
      buffers, GetNumberOfValues(buffers), IndexList{}, device, token);
@ -564,7 +549,13 @@ public:
  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer>
  CreateBuffers(const DecoratorImplT& implementation, vtkm::Id numValues, const ArrayTs&... arrays)
  {
-    return vtkm::cont::internal::CreateBuffers(MetaData(implementation, numValues), arrays...);
+    return vtkm::cont::internal::CreateBuffers(MetaData(implementation, numValues, arrays...),
+                                               arrays...);
+  }
+
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers()
+  {
+    return CreateBuffers(DecoratorImplT{}, 0, ArrayTs{}...);
  }
 };

--- a/vtkm/cont/ArrayHandleDiscard.h
+++ b/vtkm/cont/ArrayHandleDiscard.h
@ -103,10 +103,15 @@ public:
  // you actually try to use this read portal.
  using ReadPortalType = vtkm::exec::internal::ArrayPortalDiscard<ValueType>;

-  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers() { return 1; }
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers()
+  {
+    DiscardMetaData metaData;
+    metaData.NumberOfValues = 0;
+    return vtkm::cont::internal::CreateBuffers(metaData);
+  }

  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
-                                      vtkm::cont::internal::Buffer* buffers,
+                                      const std::vector<vtkm::cont::internal::Buffer>& buffers,
                                      vtkm::CopyFlag,
                                      vtkm::cont::Token&)
  {
@ -114,12 +119,13 @@ public:
    buffers[0].GetMetaData<DiscardMetaData>().NumberOfValues = numValues;
  }

-  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return buffers[0].GetMetaData<DiscardMetaData>().NumberOfValues;
  }

-  VTKM_CONT static void Fill(vtkm::cont::internal::Buffer*,
+  VTKM_CONT static void Fill(const std::vector<vtkm::cont::internal::Buffer>&,
                             const ValueType&,
                             vtkm::Id,
                             vtkm::Id,
@ -128,16 +134,17 @@ public:
    // Fill is a NO-OP.
  }

-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer*,
+  VTKM_CONT static ReadPortalType CreateReadPortal(const std::vector<vtkm::cont::internal::Buffer>&,
                                                   vtkm::cont::DeviceAdapterId,
                                                   vtkm::cont::Token&)
  {
    throw vtkm::cont::ErrorBadValue("Cannot read from ArrayHandleDiscard.");
  }

-  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::cont::DeviceAdapterId,
-                                                     vtkm::cont::Token&)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId,
+    vtkm::cont::Token&)
  {
    return WritePortalType(GetNumberOfValues(buffers));
  }
--- a/vtkm/cont/ArrayHandleExtractComponent.h
+++ b/vtkm/cont/ArrayHandleExtractComponent.h
@ -44,13 +44,8 @@ public:
  {
  }

-  // Copy constructor
-  VTKM_EXEC_CONT ArrayPortalExtractComponent(const ArrayPortalExtractComponent<PortalType>& src)
-    : Portal(src.Portal)
-    , Component(src.Component)
-  {
-  }
-
+  ArrayPortalExtractComponent(const ArrayPortalExtractComponent&) = default;
+  ArrayPortalExtractComponent(ArrayPortalExtractComponent&&) = default;
  ArrayPortalExtractComponent& operator=(const ArrayPortalExtractComponent&) = default;
  ArrayPortalExtractComponent& operator=(ArrayPortalExtractComponent&&) = default;

@ -103,15 +98,16 @@ class Storage<typename vtkm::VecTraits<typename ArrayHandleType::ValueType>::Com
  using SourceStorage = vtkm::cont::internal::Storage<SourceValueType, SourceStorageTag>;

 public:
-  VTKM_CONT static vtkm::IdComponent ComponentIndex(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static vtkm::IdComponent ComponentIndex(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return buffers[0].GetMetaData<vtkm::IdComponent>();
  }

-  template <typename Buff>
-  VTKM_CONT static Buff* SourceBuffers(Buff* buffers)
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> SourceBuffers(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
-    return buffers + 1;
+    return std::vector<vtkm::cont::internal::Buffer>(buffers.begin() + 1, buffers.end());
  }

  using ReadPortalType =
@ -119,17 +115,13 @@ public:
  using WritePortalType =
    vtkm::internal::ArrayPortalExtractComponent<typename SourceStorage::WritePortalType>;

-  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers()
-  {
-    return SourceStorage::GetNumberOfBuffers() + 1;
-  }
-
-  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers)
  {
    return SourceStorage::GetNumberOfValues(SourceBuffers(buffers));
  }

-  VTKM_CONT static void Fill(vtkm::cont::internal::Buffer*,
+  VTKM_CONT static void Fill(const std::vector<vtkm::cont::internal::Buffer>&,
                             const ValueType&,
                             vtkm::Id,
                             vtkm::Id,
@ -139,31 +131,33 @@ public:
  }

  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
-                                      vtkm::cont::internal::Buffer* buffers,
+                                      const std::vector<vtkm::cont::internal::Buffer>& buffers,
                                      vtkm::CopyFlag preserve,
                                      vtkm::cont::Token& token)
  {
    SourceStorage::ResizeBuffers(numValues, SourceBuffers(buffers), preserve, token);
  }

-  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                                   vtkm::cont::DeviceAdapterId device,
-                                                   vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return ReadPortalType(SourceStorage::CreateReadPortal(SourceBuffers(buffers), device, token),
                          ComponentIndex(buffers));
  }

-  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
-                                                     vtkm::cont::DeviceAdapterId device,
-                                                     vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(
+    const std::vector<vtkm::cont::internal::Buffer>& buffers,
+    vtkm::cont::DeviceAdapterId device,
+    vtkm::cont::Token& token)
  {
    return WritePortalType(SourceStorage::CreateWritePortal(SourceBuffers(buffers), device, token),
                           ComponentIndex(buffers));
  }

-  VTKM_CONT static auto CreateBuffers(vtkm::IdComponent componentIndex,
-                                      const ArrayHandleType& array)
+  VTKM_CONT static auto CreateBuffers(vtkm::IdComponent componentIndex = 0,
+                                      const ArrayHandleType& array = ArrayHandleType{})
    -> decltype(vtkm::cont::internal::CreateBuffers())
  {
    return vtkm::cont::internal::CreateBuffers(componentIndex, array);
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .8.0
 .8.9999