Merge commit 'd2d1c854adc8c0518802f153b48afd17646b6252' into update-to-1.6.0-rc1

* commit 'd2d1c854adc8c0518802f153b48afd17646b6252': (1346 commits) extend the default clipping plane Fix unintended cast in TBB Reduce's return value follow coding conventions make scalar normilization consistent across rendering correct a potential divide by zero Do not assume CUDA reduce operator is unary Fix casting issues in TBB functors Add casts to FunctorsGeneral.h Allow for different types in basic type operators kick the builds Cleanup per review. Fixes per review Add ArrayHandleSOA to default Disallow references in Variant Be more conservative about is_trivial support Removed two TODO comments after verifying parameters Port bug fix from distributed to augmented contour tree filter Fix hang in distributed contour tree more missing sstream headers add another missing header ...
2024-09-08 13:23:51 +00:00 · 2021-04-02 16:32:05 -04:00 · 2021-04-02 16:32:05 -04:00 · 9d345733bf
commit 9d345733bf
parent 74ffad9bd0 d2d1c854ad
1858 changed files with 139107 additions and 75469 deletions
--- a/.clang-format
+++ b/.clang-format
@ -1,17 +1,19 @@
 ---
-# This configuration requires clang-format 3.8 or higher.
+# This configuration requires clang-format 9 or higher.
 BasedOnStyle: Mozilla
 AlignAfterOpenBracket: Align
+AlignEscapedNewlines: true
 AlignOperands: false
-AlwaysBreakAfterReturnType: None
+AllowAllParametersOfDeclarationOnNextLine: false
 AlwaysBreakAfterDefinitionReturnType: None
-BreakBeforeBraces: Allman
+AlwaysBreakAfterReturnType: None
 BinPackArguments: false
 BinPackParameters: false
+BreakBeforeBraces: Allman
 ColumnLimit: 100
+# FixNamespaceComments: true
 MaxEmptyLinesToKeep: 4
-Standard: Cpp11
-# This requires clang-format 4.0 (at least).
-#FixNamespaceComments: true
 ReflowComments: false
+SpaceAfterTemplateKeyword: true
+Standard: Cpp11
 ...
--- a/.gitattributes
+++ b/.gitattributes
@ -1,5 +1,5 @@
 # Attributes used for formatting.
-[attr]our-c-style   whitespace=tab-in-indent  format.clang-format
+[attr]our-c-style   whitespace=tab-in-indent  format.clang-format=9

 *.cxx   our-c-style
 *.h     our-c-style
--- a/.github/issue_template.md
+++ b/.github/issue_template.md
@ -1,2 +1,2 @@
 Thanks for trying to contribute to VTK-m.  The GitHub repository is a mirror provided
-for convenience, as VTK-m uses https://gitlab.kitware.com/vtk/vtk-m/issues for issue tracking. 
+for convenience, as VTK-m uses https://gitlab.kitware.com/vtk/vtk-m/-/issues for issue tracking. 
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -2,4 +2,4 @@ Thanks for trying to contribute to VTK-m.  The GitHub repository
 is a mirror provided for convenience, as VTK-m uses gitlab.kitware.com
 for all pull requests. Our contribution instructions are located at:

-  https://gitlab.kitware.com/vtk/vtk-m/tree/master/CONTRIBUTING.md
+  https://gitlab.kitware.com/vtk/vtk-m/-/tree/master/CONTRIBUTING.md
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -0,0 +1,209 @@
+
+# Docker Images:
+#
+#   * .gitlab/ci/docker/centos7/cuda10.2/
+#     - cuda
+#     - gcc 7.3.1
+#   * .gitlab/ci/docker/centos8/base/
+#     - gcc 8.3.1
+#     - clang 8.0.1
+#     - openmp
+#     - asan, ubsan
+#   * .gitlab/ci/docker/rhel8/cuda10.2/
+#     - cuda
+#     - gcc 8.2.1
+#   * .gitlab/ci/docker/ubuntu1604/base/
+#     - gcc 5.4.0
+#     - clang 3.8
+#     - clang 5.0
+#     - tbb
+#     - openmpi
+#   * .gitlab/ci/docker/ubuntu1604/cuda9.2/
+#     - cuda
+#     - gcc 5.4
+#     - tbb
+#     - openmp
+#     - openmpi
+#   * .gitlab/ci/docker/ubuntu1804/base/
+#     - gcc 6.5
+#     - gcc 7.4
+#     - gcc 9
+#     - clang 8
+#     - tbb
+#     - openmp
+#     - mpich2
+#     - hdf5
+#   * .gitlab/ci/docker/ubuntu1804/cuda11.1/
+#     - cuda
+#     - gcc 7
+#     - gcc 8
+#     - tbb
+#     - openmp
+#     - mpich2
+#   * .gitlab/ci/docker/ubuntu2004/doxygen/
+#     - gcc 9.3
+#     - tbb
+#     - openmp
+#     - mpich2
+
+.docker_image: &docker_image
+  variables:
+    GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci
+
+.centos7: &centos7
+  image: "kitware/vtkm:ci-centos7_cuda10.2-20210128"
+  extends:
+    - .docker_image
+
+.centos8: &centos8
+  image: "kitware/vtkm:ci-centos8-20201016"
+  extends:
+    - .docker_image
+
+.rhel8: &rhel8
+  image: "kitware/vtkm:ci-rhel8_cuda10.2-20201016"
+  extends:
+    - .docker_image
+
+.ubuntu1604: &ubuntu1604
+  image: "kitware/vtkm:ci-ubuntu1604-20201016"
+  extends:
+    - .docker_image
+
+.ubuntu1604_cuda: &ubuntu1604_cuda
+  image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20201016"
+  extends:
+    - .docker_image
+
+.ubuntu1804: &ubuntu1804
+  image: "kitware/vtkm:ci-ubuntu1804-20210107"
+  extends:
+    - .docker_image
+
+.ubuntu1804_cuda: &ubuntu1804_cuda
+  image: "kitware/vtkm:ci-ubuntu1804_cuda11.1-20201016"
+  extends:
+    - .docker_image
+
+.ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
+  image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20201016"
+  extends:
+    - .docker_image
+
+.ubuntu2004_doxygen: &ubuntu2004_doxygen
+  image: "kitware/vtkm:ci-doxygen-20201016"
+  extends:
+    - .docker_image
+
+.ubuntu2004_kokkos: &ubuntu2004_kokkos
+  image: "kitware/vtkm:ci-ubuntu2004_kokkos-20201016"
+  extends:
+    - .docker_image
+
+.only-default: &only-default
+  only:
+    - master@vtk/vtk-m
+    - tags@vtk/vtk-m
+    - merge_requests
+
+.only-master: &only-master
+  only:
+    - master@vtk/vtk-m
+
+
+# General Longer Term Tasks:
+# - Setup clang tidy as sub-pipeline
+# - Setup a machine to replicate the issue in https://gitlab.kitware.com/vtk/vtk-m/-/issues/447
+#   Note: Centos7 doesn't work as they ship separate standard library
+#   headers for each version. We will have to figure out something else
+#   like using spack or building llvm/clang from source
+
+stages:
+  - build
+  - test
+
+.cmake_build_linux: &cmake_build_linux
+  stage: build
+  timeout:  2 hours
+  interruptible: true
+  before_script:
+    - .gitlab/ci/config/sccache.sh
+    - export PATH=$PWD/.gitlab:$PATH
+    - SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
+    - sccache --show-stats
+    - "cmake --version"
+    - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
+    - "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
+  script:
+    - "ctest -VV -S .gitlab/ci/ctest_build.cmake"
+    - sccache --show-stats
+  artifacts:
+    expire_in: 24 hours
+    when: always
+    paths:
+      # The artifacts of the build.
+      - build/bin/
+      - build/include/
+      - build/vtkm/thirdparty/diy/vtkmdiy/
+      - build/lib/
+
+      # CTest and CMake install files.
+      # XXX(globbing): Can be simplified with support from
+      # https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
+      #
+      # Note: this also captures our CIState.cmake file
+      - build/CMakeCache.txt
+      - build/*.cmake
+      - build/*/*.cmake
+      - build/*/*/*.cmake
+      - build/*/*/*/*.cmake
+      - build/*/*/*/*/*.cmake
+      - build/*/*/*/*/*/*.cmake
+      - build/Testing/
+
+      # CDash files.
+      - build/DartConfiguration.tcl
+
+.cmake_test_linux: &cmake_test_linux
+  stage: test
+  timeout:  50 minutes
+  interruptible: true
+  script:
+    #Need to use our custom ctest-latest symlink
+    #This will allow us to use 3.17+ which has support
+    #for running failed tests multiple times so failures
+    #due to system load are not reported
+    - "ctest-latest -VV -S .gitlab/ci/ctest_test.cmake"
+  artifacts:
+    expire_in: 24 hours
+    when: always
+    paths:
+      # The generated regression testing images
+      - build/*.png
+      - build/*.pnm
+      - build/*.pmm
+
+.cmake_memcheck_linux: &cmake_memcheck_linux
+  stage: test
+  timeout:  2 hours
+  interruptible: true
+  script:
+    - "ctest-latest -VV -S .gitlab/ci/ctest_memcheck.cmake"
+  artifacts:
+    expire_in: 24 hours
+    when: always
+    paths:
+      # The generated regression testing images
+      - build/*.png
+      - build/*.pnm
+      - build/*.pmm
+
+include:
+  - local: '/.gitlab/ci/centos7.yml'
+  - local: '/.gitlab/ci/centos8.yml'
+  - local: '/.gitlab/ci/doxygen.yml'
+  - local: '/.gitlab/ci/rhel8.yml'
+  - local: '/.gitlab/ci/ubuntu1604.yml'
+  - local: '/.gitlab/ci/ubuntu1804.yml'
+  - local: '/.gitlab/ci/ubuntu2004.yml'
+  - local: '/.gitlab/ci/windows10.yml'
--- a/.gitlab/ci/centos7.yml
+++ b/.gitlab/ci/centos7.yml
@ -0,0 +1,55 @@
+
+# Build on centos7 with CUDA and test on rhel8 and centos7
+# gcc 7.3.1
+build:centos7_gcc73:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - large-memory
+  extends:
+    - .centos7
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    CMAKE_GENERATOR: "Unix Makefiles"
+    VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering"
+
+test:centos7_gcc73:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - turing
+  extends:
+    - .centos7
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:centos7_gcc73
+  needs:
+    - build:centos7_gcc73
+
+test:rhel8_test_centos7:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - turing
+  extends:
+    - .rhel8
+    - .cmake_test_linux
+    - .only-default
+  variables:
+      CTEST_EXCLUSIONS: "built_against_test_install"
+  dependencies:
+    - build:centos7_gcc73
+  needs:
+    - build:centos7_gcc73
--- a/.gitlab/ci/centos8.yml
+++ b/.gitlab/ci/centos8.yml
@ -0,0 +1,37 @@
+
+# Build on centos8 with serial and test on centos8
+# Uses gcc 8.2.1
+build:centos8_sanitizer:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .centos8
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    CMAKE_GENERATOR: "Unix Makefiles"
+    VTKM_SETTINGS: "serial+shared+openmp+asan+leak"
+
+test:centos8_sanitizer:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - privileged
+  extends:
+    - .centos8
+    - .cmake_memcheck_linux
+    - .only-default
+  variables:
+    OMP_NUM_THREADS: 4
+    CTEST_MEMORYCHECK_TYPE: LeakSanitizer
+    CTEST_EXCLUSIONS: "RegressionTest.*"
+  dependencies:
+    - build:centos8_sanitizer
+  needs:
+    - build:centos8_sanitizer
--- a/.gitlab/ci/config/gitlab_ci_setup.cmake
+++ b/.gitlab/ci/config/gitlab_ci_setup.cmake
@ -0,0 +1,97 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+if (NOT DEFINED "ENV{GITLAB_CI}")
+  message(FATAL_ERROR
+    "This script assumes it is being run inside of GitLab-CI")
+endif ()
+
+# Set up the source and build paths.
+set(CTEST_SOURCE_DIRECTORY "$ENV{CI_PROJECT_DIR}")
+set(CTEST_BINARY_DIRECTORY "${CTEST_SOURCE_DIRECTORY}/build")
+
+if ("$ENV{VTKM_SETTINGS}" STREQUAL "")
+  message(FATAL_ERROR
+    "The VTKM_SETTINGS environment variable is required to know what "
+    "build options should be used.")
+endif ()
+
+# Default to Release builds.
+if (NOT "$ENV{CMAKE_BUILD_TYPE}" STREQUAL "")
+  set(CTEST_BUILD_CONFIGURATION "$ENV{CMAKE_BUILD_TYPE}")
+endif ()
+if (NOT CTEST_BUILD_CONFIGURATION)
+  set(CTEST_BUILD_CONFIGURATION "Release")
+endif ()
+
+# Set the build metadata.
+string(TOLOWER ${CTEST_BUILD_CONFIGURATION} build_type)
+set(CTEST_BUILD_NAME "${build_type}+$ENV{VTKM_SETTINGS}")
+
+set(site_name "$ENV{CI_JOB_NAME}")
+string(REPLACE "build" "" site_name "${site_name}")
+string(REPLACE "test" "" site_name "${site_name}")
+string(REPLACE ":" "" site_name "${site_name}")
+set(CTEST_SITE ${site_name})
+
+# Default to using Ninja.
+if (NOT "$ENV{CMAKE_GENERATOR}" STREQUAL "")
+  set(CTEST_CMAKE_GENERATOR "$ENV{CMAKE_GENERATOR}")
+endif ()
+if (NOT CTEST_CMAKE_GENERATOR)
+  set(CTEST_CMAKE_GENERATOR "Ninja")
+endif ()
+
+# Determine the track to submit to.
+set(CTEST_TRACK "Experimental")
+if ("$ENV{CI_COMMIT_REF_NAME}" STREQUAL "master")
+  set(CTEST_TRACK "Nightly")
+endif ()
+
+if (CTEST_CMAKE_GENERATOR STREQUAL "Unix Makefiles")
+  include(ProcessorCount)
+  ProcessorCount(nproc)
+  set(CTEST_BUILD_FLAGS "-j${nproc}")
+endif ()
+
+if(DEFINED ENV{CTEST_MEMORYCHECK_TYPE})
+  set(env_value "$ENV{CTEST_MEMORYCHECK_TYPE}")
+  list(APPEND optional_variables "set(CTEST_MEMORYCHECK_TYPE ${env_value})")
+endif()
+
+if(DEFINED ENV{CTEST_MEMORYCHECK_SANITIZER_OPTIONS})
+  set(env_value "$ENV{CTEST_MEMORYCHECK_SANITIZER_OPTIONS}")
+  list(APPEND optional_variables "set(CTEST_MEMORYCHECK_SANITIZER_OPTIONS ${env_value})")
+endif()
+
+#We need to do write this information out to a file in the build directory
+file(TO_CMAKE_PATH "${CTEST_SOURCE_DIRECTORY}" src_path) #converted so we can run on windows
+file(TO_CMAKE_PATH "${CTEST_BINARY_DIRECTORY}" bin_path) #converted so we can run on windows
+
+set(state
+"
+  set(CTEST_SOURCE_DIRECTORY \"${src_path}\")
+  set(CTEST_BINARY_DIRECTORY \"${bin_path}\")
+
+  set(CTEST_BUILD_NAME ${CTEST_BUILD_NAME})
+  set(CTEST_SITE ${CTEST_SITE})
+
+  set(CTEST_CMAKE_GENERATOR \"${CTEST_CMAKE_GENERATOR}\")
+  set(CTEST_BUILD_CONFIGURATION ${CTEST_BUILD_CONFIGURATION})
+  set(CTEST_BUILD_FLAGS \"${CTEST_BUILD_FLAGS}\")
+
+  set(CTEST_TRACK ${CTEST_TRACK})
+
+  ${optional_variables}
+"
+)
+file(WRITE ${CTEST_BINARY_DIRECTORY}/CIState.cmake "${state}")
--- a/.gitlab/ci/config/initial_config.cmake
+++ b/.gitlab/ci/config/initial_config.cmake
@ -0,0 +1,121 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Default to Release builds.
+if ("$ENV{CMAKE_BUILD_TYPE}" STREQUAL "")
+  set(CMAKE_BUILD_TYPE "Release" CACHE STRING "")
+else ()
+  set(CMAKE_BUILD_TYPE "$ENV{CMAKE_BUILD_TYPE}" CACHE STRING "")
+endif ()
+
+string(REPLACE "+" ";" options "$ENV{VTKM_SETTINGS}")
+
+foreach(option IN LISTS options)
+  if(static STREQUAL option)
+    set(BUILD_SHARED_LIBS "OFF" CACHE STRING "")
+
+  elseif(shared STREQUAL option)
+    set(BUILD_SHARED_LIBS "ON" CACHE STRING "")
+
+  elseif(vtk_types STREQUAL option)
+    set(VTKm_USE_DEFAULT_TYPES_FOR_VTK "ON" CACHE STRING "")
+
+  elseif(ascent_types STREQUAL option)
+    # Note: ascent_types also requires 32bit_ids and 64bit_floats
+    set(VTKm_USE_DEFAULT_TYPES_FOR_ASCENT "ON" CACHE STRING "")
+
+  elseif(32bit_ids STREQUAL option)
+    set(VTKm_USE_64BIT_IDS "OFF" CACHE STRING "")
+
+  elseif(64bit_floats STREQUAL option)
+    set(VTKm_USE_DOUBLE_PRECISION "ON" CACHE STRING "")
+
+  elseif(asan STREQUAL option)
+    set(VTKm_ENABLE_SANITIZER "ON" CACHE STRING "")
+    list(APPEND sanitizers "address")
+
+  elseif(leak STREQUAL option)
+    set(VTKm_ENABLE_SANITIZER "ON" CACHE STRING "")
+    list(APPEND sanitizers "leak")
+
+  elseif(rendering STREQUAL option)
+    set(VTKm_ENABLE_RENDERING "ON" CACHE STRING "")
+
+  elseif(no_rendering STREQUAL option)
+    set(VTKm_ENABLE_RENDERING "OFF" CACHE STRING "")
+
+  elseif(no_virtual STREQUAL option)
+    set(VTKm_NO_DEPRECATED_VIRTUAL "ON" CACHE STRING "")
+
+  elseif(examples STREQUAL option)
+    set(VTKm_ENABLE_EXAMPLES "ON" CACHE STRING "")
+
+  elseif(docs STREQUAL option)
+    set(VTKm_ENABLE_DOCUMENTATION "ON" CACHE STRING "")
+
+  elseif(benchmarks STREQUAL option)
+    set(VTKm_ENABLE_BENCHMARKS "ON" CACHE STRING "")
+
+  elseif(mpi STREQUAL option)
+    set(VTKm_ENABLE_MPI "ON" CACHE STRING "")
+
+  elseif(tbb STREQUAL option)
+    set(VTKm_ENABLE_TBB "ON" CACHE STRING "")
+
+  elseif(openmp STREQUAL option)
+    set(VTKm_ENABLE_OPENMP "ON" CACHE STRING "")
+
+  elseif(cuda STREQUAL option)
+    set(VTKm_ENABLE_CUDA "ON" CACHE STRING "")
+
+  elseif(kokkos STREQUAL option)
+    set(VTKm_ENABLE_KOKKOS "ON" CACHE STRING "")
+
+  elseif(hdf5 STREQUAL option)
+    set(VTKm_ENABLE_HDF5_IO "ON" CACHE STRING "")
+
+  elseif(maxwell STREQUAL option)
+    set(VTKm_CUDA_Architecture "maxwell" CACHE STRING "")
+
+  elseif(pascal STREQUAL option)
+    set(VTKm_CUDA_Architecture "pascal" CACHE STRING "")
+
+  elseif(volta STREQUAL option)
+    set(VTKm_CUDA_Architecture "volta" CACHE STRING "")
+
+  elseif(turing STREQUAL option)
+    set(VTKm_CUDA_Architecture "turing" CACHE STRING "")
+  endif()
+
+endforeach()
+
+set(CTEST_USE_LAUNCHERS "ON" CACHE STRING "")
+
+# We need to store the absolute path so that
+# the launcher still work even when sccache isn't
+# on our path.
+find_program(SCCACHE_COMMAND NAMES sccache)
+if(SCCACHE_COMMAND)
+  set(CMAKE_C_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
+  set(CMAKE_CXX_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
+
+  # Use VTKm_CUDA_Architecture to determine if we need CUDA sccache setup
+  # since this will also capture when kokkos is being used with CUDA backing
+  if(DEFINED VTKm_CUDA_Architecture)
+    set(CMAKE_CUDA_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
+  endif()
+endif()
+
+# Setup all the sanitizers as a list
+if(sanitizers)
+  set(VTKm_USE_SANITIZER "${sanitizers}"  CACHE STRING "" FORCE)
+endif()
--- a/.gitlab/ci/config/sccache.sh
+++ b/.gitlab/ci/config/sccache.sh
@ -0,0 +1,16 @@
+#!/bin/sh
+
+set -e
+
+readonly version="nvcc_v4"
+readonly sha256sum="260779b4a740fe8373d251d1e318541a98dd5cd2f8051eedd55227a5a852fdf7"
+readonly filename="sccache-0.2.14-$version-x86_64-unknown-linux-musl"
+readonly tarball="$filename.tar.gz"
+
+cd .gitlab
+
+echo "$sha256sum  $tarball" > sccache.sha256sum
+curl --insecure -OL "https://github.com/robertmaynard/sccache/releases/download/$version/$tarball"
+sha256sum --check sccache.sha256sum
+tar xf "$tarball"
+#mv "$filename/sccache" .
--- a/.gitlab/ci/config/vcvarsall.ps1
+++ b/.gitlab/ci/config/vcvarsall.ps1
@ -0,0 +1,9 @@
+$erroractionpreference = "stop"
+
+cmd /c "`"$env:VCVARSALL`" $VCVARSPLATFORM -vcvars_ver=$VCVARSVERSION & set" |
+foreach {
+    if ($_ -match "=") {
+        $v = $_.split("=")
+        [Environment]::SetEnvironmentVariable($v[0], $v[1])
+    }
+}
--- a/.gitlab/ci/ctest_build.cmake
+++ b/.gitlab/ci/ctest_build.cmake
@ -0,0 +1,40 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Read the files from the build directory that contain
+# host information ( name, parallel level, etc )
+include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
+ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
+
+
+# Pick up from where the configure left off.
+ctest_start(APPEND)
+message(STATUS "CTEST_BUILD_FLAGS: ${CTEST_BUILD_FLAGS}")
+ctest_build(APPEND
+  NUMBER_WARNINGS num_warnings
+  RETURN_VALUE build_result)
+
+if(NOT DEFINED ENV{GITLAB_CI_EMULATION})
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.15)
+    ctest_submit(PARTS Build BUILD_ID build_id)
+    message(STATUS "Build submission build_id: ${build_id}")
+  else()
+    ctest_submit(PARTS Build)
+  endif()
+
+
+endif()
+
+if (build_result)
+  message(FATAL_ERROR
+    "Failed to build")
+endif ()
--- a/.gitlab/ci/ctest_configure.cmake
+++ b/.gitlab/ci/ctest_configure.cmake
@ -0,0 +1,55 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Read the files from the build directory that contain
+# host information ( name, parallel level, etc )
+include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
+ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
+
+set(cmake_args
+  -C "${CMAKE_CURRENT_LIST_DIR}/config/initial_config.cmake")
+
+# Create an entry in CDash.
+ctest_start(Experimental TRACK "${CTEST_TRACK}")
+
+# Gather update information.
+find_package(Git)
+set(CTEST_UPDATE_VERSION_ONLY ON)
+set(CTEST_UPDATE_COMMAND "${GIT_EXECUTABLE}")
+
+# Don't do updates when running via reproduce_ci_env.py
+if(NOT DEFINED ENV{GITLAB_CI_EMULATION})
+  ctest_update()
+endif()
+
+# Configure the project.
+ctest_configure(APPEND
+  OPTIONS "${cmake_args}"
+  RETURN_VALUE configure_result)
+
+# We can now submit because we've configured.
+if(NOT DEFINED ENV{GITLAB_CI_EMULATION})
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.15)
+    ctest_submit(PARTS Update BUILD_ID build_id)
+    message(STATUS "Update submission build_id: ${build_id}")
+    ctest_submit(PARTS Configure BUILD_ID build_id)
+    message(STATUS "Configure submission build_id: ${build_id}")
+  else()
+    ctest_submit(PARTS Update)
+    ctest_submit(PARTS Configure)
+  endif()
+endif()
+
+if (configure_result)
+  message(FATAL_ERROR
+    "Failed to configure")
+endif ()
--- a/.gitlab/ci/ctest_memcheck.cmake
+++ b/.gitlab/ci/ctest_memcheck.cmake
@ -0,0 +1,62 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Read the files from the build directory that contain
+# host information ( name, parallel level, etc )
+include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
+ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
+
+# Pick up from where the configure left off.
+ctest_start(APPEND)
+
+if(NOT CTEST_MEMORYCHECK_TYPE)
+  set(CTEST_MEMORYCHECK_TYPE "$ENV{CTEST_MEMORYCHECK_TYPE}")
+endif()
+
+if(NOT CTEST_MEMORYCHECK_SANITIZER_OPTIONS)
+  set(CTEST_MEMORYCHECK_SANITIZER_OPTIONS "$ENV{CTEST_MEMORYCHECK_SANITIZER_OPTIONS}")
+endif()
+
+if(NOT CTEST_MEMORYCHECK_SUPPRESSIONS_FILE)
+  if(CTEST_MEMORYCHECK_TYPE STREQUAL "LeakSanitizer")
+    set(CTEST_MEMORYCHECK_SUPPRESSIONS_FILE "${CTEST_SOURCE_DIRECTORY}/CMake/testing/lsan.supp")
+  endif()
+endif()
+
+set(test_exclusions
+  # placeholder for tests to exclude provided by the env
+  $ENV{CTEST_EXCLUSIONS}
+)
+
+string(REPLACE ";" "|" test_exclusions "${test_exclusions}")
+if (test_exclusions)
+  set(test_exclusions "(${test_exclusions})")
+endif ()
+
+# reduced parallel level so we don't exhaust system resources
+ctest_memcheck(
+  PARALLEL_LEVEL "4"
+  RETURN_VALUE test_result
+  EXCLUDE "${test_exclusions}"
+  DEFECT_COUNT defects)
+
+ctest_submit(PARTS Memcheck BUILD_ID build_id)
+  message(STATUS "Memcheck submission build_id: ${build_id}")
+
+if (defects)
+  message(FATAL_ERROR "Found ${defects} memcheck defects")
+endif ()
+
+
+if (test_result)
+  message(FATAL_ERROR "Failed to test")
+endif ()
--- a/.gitlab/ci/ctest_test.cmake
+++ b/.gitlab/ci/ctest_test.cmake
@ -0,0 +1,46 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Read the files from the build directory that contain
+# host information ( name, parallel level, etc )
+include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
+ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
+
+# Pick up from where the configure left off.
+ctest_start(APPEND)
+
+set(test_exclusions
+  # placeholder for tests to exclude provided by the env
+  $ENV{CTEST_EXCLUSIONS}
+)
+
+string(REPLACE ";" "|" test_exclusions "${test_exclusions}")
+if (test_exclusions)
+  set(test_exclusions "(${test_exclusions})")
+endif ()
+
+ctest_test(APPEND
+  PARALLEL_LEVEL "10"
+  RETURN_VALUE test_result
+  EXCLUDE "${test_exclusions}"
+  REPEAT "UNTIL_PASS:3"
+  )
+  message(STATUS "ctest_test RETURN_VALUE: ${test_result}")
+
+if(NOT DEFINED ENV{GITLAB_CI_EMULATION})
+  ctest_submit(PARTS Test BUILD_ID build_id)
+  message(STATUS "Test submission build_id: ${build_id}")
+endif()
+
+if (test_result)
+  message(FATAL_ERROR "Failed to test")
+endif ()
--- a/.gitlab/ci/docker/centos7/cuda10.2/Dockerfile
+++ b/.gitlab/ci/docker/centos7/cuda10.2/Dockerfile
@ -0,0 +1,23 @@
+FROM nvidia/cuda:10.2-devel-centos7
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 centos-release-scl -y
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
+RUN yum install git git-lfs devtoolset-7-gcc-c++ -y
+
+# Provide a consistent CMake path across all images
+# Install CMake 3.13 as it is the minium for cuda builds
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.13.5/cmake-3.13.5-Linux-x86_64.sh > cmake-3.13.5-Linux-x86_64.sh && \
+    sh cmake-3.13.5-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.13.5-Linux-x86_64.sh
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/rh/devtoolset-7/root/bin:/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/centos8/base/Dockerfile
+++ b/.gitlab/ci/docker/centos8/base/Dockerfile
@ -0,0 +1,16 @@
+FROM centos:8
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+RUN yum install make gcc gcc-c++ curl libasan libubsan libomp clang -y
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
+RUN yum install git git-lfs -y
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:${PATH}"
--- a/.gitlab/ci/docker/rhel8/cuda10.2/Dockerfile
+++ b/.gitlab/ci/docker/rhel8/cuda10.2/Dockerfile
@ -0,0 +1,16 @@
+FROM nvidia/cuda:10.2-devel-ubi8
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 -y
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
+RUN yum install git git-lfs -y
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:${PATH}"
--- a/.gitlab/ci/docker/ubuntu1604/base/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1604/base/Dockerfile
@ -0,0 +1,54 @@
+FROM ubuntu:16.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      autoconf \
+      automake \
+      autotools-dev \
+      clang-3.8 \
+      clang-5.0 \
+      curl \
+      g++ \
+      g++-4.8 \
+      libtbb-dev \
+      make \
+      ninja-build \
+      software-properties-common \
+      ssh
+
+# extra dependencies for dejagore machine
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
+RUN apt-get install -y --no-install-recommends \
+      git \
+      git-lfs \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Provide a modern OpenMPI verion that supports
+# running as root via environment variables
+RUN mkdir /opt/openmpi && \
+    curl -L https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.3.tar.gz > openmpi-4.0.3.tar.gz && \
+    tar -xf openmpi-4.0.3.tar.gz && \
+    cd openmpi-4.0.3 && \
+    ./configure --prefix=/opt/openmpi && \
+    make -j all && \
+    make install
+
+# Provide a consistent CMake path across all images
+# Install CMake 3.12 as it is the minium for non-cuda builds
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.12.4/cmake-3.12.4-Linux-x86_64.sh > cmake-3.12.4-Linux-x86_64.sh && \
+    sh cmake-3.12.4-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.12.4-Linux-x86_64.sh
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
+ENV LD_LIBRARY_PATH "/opt/openmpi/lib:${LD_LIBRARY_PATH}"
--- a/.gitlab/ci/docker/ubuntu1604/cuda9.2/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1604/cuda9.2/Dockerfile
@ -0,0 +1,52 @@
+FROM nvidia/cuda:9.2-devel-ubuntu16.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      autoconf \
+      automake \
+      autotools-dev \
+      curl \
+      g++ \
+      libomp-dev \
+      libtbb-dev \
+      make \
+      ninja-build \
+      software-properties-common \
+      ssh
+
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      git \
+      git-lfs \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Provide a modern OpenMPI verion that supports
+# running as root via environment variables
+RUN mkdir /opt/openmpi && \
+    curl -L https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.3.tar.gz > openmpi-4.0.3.tar.gz && \
+    tar -xf openmpi-4.0.3.tar.gz && \
+    cd openmpi-4.0.3 && \
+    ./configure --prefix=/opt/openmpi && \
+    make -j all && \
+    make install
+
+# Provide a consistent CMake path across all images
+# Allow tests that require CMake to work correctly
+# Install CMake 3.13 as it is the minium for cuda builds
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.13.5/cmake-3.13.5-Linux-x86_64.sh > cmake-3.13.5-Linux-x86_64.sh && \
+    sh cmake-3.13.5-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.13.5-Linux-x86_64.sh
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
+ENV LD_LIBRARY_PATH "/opt/openmpi/lib:${LD_LIBRARY_PATH}"
--- a/.gitlab/ci/docker/ubuntu1804/base/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/base/Dockerfile
@ -0,0 +1,47 @@
+FROM ubuntu:18.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      cmake \
+      curl \
+      g++ \
+      g++-6 \
+      git \
+      git-lfs \
+      libmpich-dev \
+      libomp-dev \
+      libtbb-dev \
+      libhdf5-dev \
+      mpich \
+      ninja-build \
+      software-properties-common
+
+# extra dependencies for charm machine
+RUN add-apt-repository ppa:jonathonf/gcc
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      clang-8 \
+      g++-9 \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# Provide a consistent CMake path across all images
+# Allow tests that require CMake to work correctly
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.16.7/cmake-3.16.7-Linux-x86_64.sh > cmake-3.16.7-Linux-x86_64.sh && \
+    sh cmake-3.16.7-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.16.7-Linux-x86_64.sh
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/ubuntu1804/cuda11.1/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/cuda11.1/Dockerfile
@ -0,0 +1,38 @@
+FROM nvidia/cuda:11.1-devel-ubuntu18.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      curl \
+      g++-8 \
+      clang-8 \
+      git \
+      git-lfs \
+      libmpich-dev \
+      libomp-dev \
+      libtbb-dev \
+      mpich \
+      ninja-build \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# Provide a consistent CMake path across all images
+# Allow tests that require CMake to work correctly
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.16.7/cmake-3.16.7-Linux-x86_64.sh > cmake-3.16.7-Linux-x86_64.sh && \
+    sh cmake-3.16.7-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.16.7-Linux-x86_64.sh
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/ubuntu1804/kokkos-cuda/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/kokkos-cuda/Dockerfile
@ -0,0 +1,47 @@
+FROM nvidia/cuda:11.0-devel-ubuntu18.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      curl \
+      g++ \
+      git \
+      git-lfs \
+      ninja-build \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# kokkos backend requires cmake 3.18
+RUN mkdir /opt/cmake/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.18.1/cmake-3.18.1-Linux-x86_64.sh > cmake-3.18.1-Linux-x86_64.sh && \
+    sh cmake-3.18.1-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.18.1-Linux-x86_64.sh && \
+    ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:${PATH}"
+
+# Build and install Kokkos
+RUN mkdir -p /opt/kokkos/build && \
+    cd /opt/kokkos/build && \
+    curl -L https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz > kokkos-3.1.01.tar.gz && \
+    tar -xf kokkos-3.1.01.tar.gz && \
+    mkdir bld && cd bld && \
+    CXX=/opt/kokkos/build/kokkos-3.1.01/bin/nvcc_wrapper \
+    cmake -B . -S ../kokkos-3.1.01 \
+          -DCMAKE_BUILD_TYPE=Release \
+          -DCMAKE_INSTALL_PREFIX=/opt/kokkos \
+          -DCMAKE_CXX_FLAGS=-fPIC \
+          -DCMAKE_CXX_STANDARD=14 \
+          -DKokkos_ENABLE_CUDA=ON \
+          -DKokkos_ENABLE_CUDA_CONSTEXPR=ON \
+          -DKokkos_ENABLE_CUDA_LAMBDA=ON \
+          -DKokkos_ENABLE_CUDA_LDG_INTRINSIC=ON \
+          -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
+          -DKokkos_ENABLE_CUDA_UVM=ON \
+          -DKokkos_ARCH_TURING75=ON && \
+    cmake --build . -j 8 && \
+    cmake --install .
--- a/.gitlab/ci/docker/ubuntu2004/doxygen/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu2004/doxygen/Dockerfile
@ -0,0 +1,33 @@
+FROM ubuntu:20.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+      cmake \
+      curl \
+      doxygen \
+      g++ \
+      git \
+      git-lfs \
+      libmpich-dev \
+      libomp-dev \
+      libtbb-dev \
+      mpich \
+      ninja-build \
+      rsync \
+      ssh \
+      software-properties-common
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
+
+ENV PATH "${PATH}:/opt/cmake/bin"
--- a/.gitlab/ci/docker/ubuntu2004/kokkos/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu2004/kokkos/Dockerfile
@ -0,0 +1,41 @@
+FROM ubuntu:20.04
+LABEL maintainer "Sujin Philip<sujin.philip@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+      cmake \
+      curl \
+      g++ \
+      git \
+      git-lfs \
+      libmpich-dev \
+      libomp-dev \
+      mpich \
+      ninja-build \
+      rsync \
+      ssh \
+      software-properties-common
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
+
+ENV PATH "${PATH}:/opt/cmake/bin"
+
+# Build and install Kokkos
+RUN mkdir -p /opt/kokkos/build && \
+    cd /opt/kokkos/build && \
+    curl -L https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz > kokkos-3.1.01.tar.gz && \
+    tar -xf kokkos-3.1.01.tar.gz && \
+    mkdir bld && cd bld && \
+    cmake -GNinja -DCMAKE_INSTALL_PREFIX=/opt/kokkos -DCMAKE_CXX_FLAGS=-fPIC -DKokkos_ENABLE_SERIAL=ON ../kokkos-3.1.01 &&\
+    ninja all && \
+    ninja install
--- a/.gitlab/ci/docker/update_all.sh
+++ b/.gitlab/ci/docker/update_all.sh
@ -0,0 +1,51 @@
+#!/bin/sh
+
+set -e
+set -x
+
+# data is expected to be a string of the form YYYYMMDD
+readonly date="$1"
+
+cd centos7/cuda10.2
+sudo docker build -t kitware/vtkm:ci-centos7_cuda10.2-$date .
+cd ../..
+
+cd centos8/base
+sudo docker build -t kitware/vtkm:ci-centos8-$date .
+cd ../..
+
+cd rhel8/cuda10.2
+sudo docker build -t kitware/vtkm:ci-rhel8_cuda10.2-$date .
+cd ../..
+
+cd ubuntu1604/base
+sudo docker build -t kitware/vtkm:ci-ubuntu1604-$date .
+cd ../..
+
+cd ubuntu1604/cuda9.2
+sudo docker build -t kitware/vtkm:ci-ubuntu1604_cuda9.2-$date .
+cd ../..
+
+cd ubuntu1804/base
+sudo docker build -t kitware/vtkm:ci-ubuntu1804-$date .
+cd ../..
+
+cd ubuntu1804/cuda11.1
+sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11.1-$date .
+cd ../..
+
+cd ubuntu1804/kokkos-cuda
+sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-$date .
+cd ../..
+
+cd ubuntu2004/doxygen/
+sudo docker build -t kitware/vtkm:ci-doxygen-$date .
+cd ../..
+
+cd ubuntu2004/kokkos
+sudo docker build -t kitware/vtkm:ci-ubuntu2004_kokkos-$date .
+cd ../..
+
+# sudo docker login --username=<docker_hub_name>
+sudo docker push kitware/vtkm
+sudo docker system prune
--- a/.gitlab/ci/doxygen.yml
+++ b/.gitlab/ci/doxygen.yml
@ -0,0 +1,33 @@
+
+# Build on documentation for VTK-m on ubuntu2004 with TBB and OpenMP
+# Uses gcc 9
+# Uses MPICH2
+doxygen:
+  stage: build
+  environment:
+    #establish that we need doxygen related
+    #env variables
+    name: doxygen-upload
+  only:
+    #make sure we are only trigged on
+    #the vtk-m primary project
+    variables:
+      - '$DOXYGEN_UPLOAD_ENABLE == "true"'
+  timeout:  30 minutes
+  interruptible: true
+  tags:
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu2004_doxygen
+  before_script:
+    - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
+    - "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
+  script:
+    - doxygen build/docs/doxyfile
+    - chmod 400 $DOC_KEY_FILE
+    - rsync -tv --recursive --delete -e "ssh -i $DOC_KEY_FILE -o StrictHostKeyChecking=no" build/docs/doxygen/html/ kitware@public.kitware.com:vtkm_documentation/
+  variables:
+    CMAKE_BUILD_TYPE: Release
+    VTKM_SETTINGS: "tbb+openmp+mpi+shared+docs"
--- a/.gitlab/ci/rhel8.yml
+++ b/.gitlab/ci/rhel8.yml
@ -0,0 +1,62 @@
+
+# Build on rhel8 with serial and test on rhel8
+# Uses gcc 8.2.1
+build:rhel8:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_GENERATOR: "Unix Makefiles"
+    VTKM_SETTINGS: "serial+shared+64bit_floats+32bit_ids"
+
+test:rhel8:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:rhel8
+  needs:
+    - build:rhel8
+
+# Build on rhel8 with serial and the VTK-supported types
+# Uses gcc 8.2.1
+build:rhel8_vtk_types:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_GENERATOR: "Unix Makefiles"
+    VTKM_SETTINGS: "serial+vtk_types"
+
+test:rhel8_vtk_types:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:rhel8_vtk_types
+  needs:
+    - build:rhel8_vtk_types
--- a/.gitlab/ci/ubuntu1604.yml
+++ b/.gitlab/ci/ubuntu1604.yml
@ -0,0 +1,109 @@
+
+# Build on ubuntu1604 with CUDA 9.2 and test on ubuntu1604 and ubuntu1804
+# Uses gcc 5, and build for pascal as CUDA 9.2 doesn't support turing
+build:ubuntu1604_gcc5:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - large-memory
+  extends:
+    - .ubuntu1604_cuda
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-5"
+    CXX: "g++-5"
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    VTKM_SETTINGS: "cuda+pascal+no_virtual+ascent_types+32bit_ids+64bit_floats"
+
+test:ubuntu1604_gcc5:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - pascal
+  extends:
+    - .ubuntu1604_cuda
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1604_gcc5
+  needs:
+    - build:ubuntu1604_gcc5
+
+# Build on ubuntu1704 with OpenMP + CUDA
+# Runs only on nightlies
+build:ubuntu1604_gcc5_2:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - large-memory
+  extends:
+    - .ubuntu1604_cuda
+    - .cmake_build_linux
+    - .only-master
+  variables:
+    CC: "gcc-5"
+    CXX: "g++-5"
+    CMAKE_BUILD_TYPE: Release
+    VTKM_SETTINGS: "openmp+cuda+pascal+examples"
+
+test:ubuntu1804_test_ubuntu1604_gcc5_2:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - pascal
+  extends:
+    - .ubuntu1804_cuda
+    - .cmake_test_linux
+    - .only-master
+  variables:
+      CTEST_EXCLUSIONS: "built_against_test_install"
+  dependencies:
+    - build:ubuntu1604_gcc5_2
+  needs:
+    - build:ubuntu1604_gcc5_2
+
+# Build on ubuntu1604 with tbb and test on ubuntu1604
+# Uses clang 5
+build:ubuntu1604_clang5:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1604
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "clang-5.0"
+    CXX: "clang++-5.0"
+    CMAKE_BUILD_TYPE: Debug
+    VTKM_SETTINGS: "tbb+static+64bit_floats"
+
+test:ubuntu1604_clang5:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1604
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1604_clang5
+  needs:
+    - build:ubuntu1604_clang5
--- a/.gitlab/ci/ubuntu1804.yml
+++ b/.gitlab/ci/ubuntu1804.yml
@ -0,0 +1,222 @@
+
+# Build on ubuntu1804 with TBB and OpenMP and test on ubuntu1804
+# Uses gcc 9
+# Uses MPICH2
+build:ubuntu1804_gcc9:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-9"
+    CXX: "g++-9"
+    CMAKE_BUILD_TYPE: Debug
+    VTKM_SETTINGS: "tbb+openmp+mpi+shared+hdf5"
+
+test:ubuntu1804_gcc9:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_test_linux
+    - .only-default
+  variables:
+    #Restrict OpenMP number of threads since multiple test stages
+    #execute on the same hardware concurrently
+    OMP_NUM_THREADS: 4
+  dependencies:
+    - build:ubuntu1804_gcc9
+  needs:
+    - build:ubuntu1804_gcc9
+
+# Build on ubuntu1804 with CUDA + MPI and test on ubuntu1804
+# Uses gcc 7
+# Uses MPICH2
+build:ubuntu1804_gcc7:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - large-memory
+  extends:
+    - .ubuntu1804_cuda
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-7"
+    CXX: "g++-7"
+    CUDAHOSTCXX: "g++-7"
+    VTKM_SETTINGS: "cuda+turing+mpi+64bit_floats+no_virtual"
+
+test:ubuntu1804_gcc7:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - turing
+  extends:
+    - .ubuntu1804_cuda
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1804_gcc7
+  needs:
+    - build:ubuntu1804_gcc7
+
+
+# Build on ubuntu1804 with CUDA+TBB and test on ubuntu1804
+# Uses clang as CUDA host compiler
+# Runs only on nightlies
+build:ubuntu1804_clang_cuda:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - large-memory
+  extends:
+    - .ubuntu1804_cuda
+    - .cmake_build_linux
+    - .only-default
+    # - .only-master
+  variables:
+    CC: "clang-8"
+    CXX: "clang++-8"
+    CUDAHOSTCXX: "clang++-8"
+    VTKM_SETTINGS: "cuda+pascal+tbb+static+examples"
+
+test:ubuntu1804_clang_cuda:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - pascal
+  extends:
+    - .ubuntu1804_cuda
+    - .cmake_test_linux
+    - .only-default
+    # - .only-master
+  dependencies:
+    - build:ubuntu1804_clang_cuda
+  needs:
+    - build:ubuntu1804_clang_cuda
+
+# Build on ubuntu1804 with OpenMP and test on ubuntu1804
+# Uses gcc 6.5
+build:ubuntu1804_gcc6:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-6"
+    CXX: "g++-6"
+    VTKM_SETTINGS: "openmp+shared+examples"
+
+test:ubuntu1804_gcc6:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_test_linux
+    - .only-default
+  variables:
+    #Restrict OpenMP number of threads since multiple test stages
+    #execute on the same hardware concurrently
+    OMP_NUM_THREADS: 3
+  dependencies:
+    - build:ubuntu1804_gcc6
+  needs:
+    - build:ubuntu1804_gcc6
+
+# Build on ubuntu1804 with TBB and test on ubuntu1804
+# Uses clang 8
+build:ubuntu1804_clang8:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "clang-8"
+    CXX: "clang++-8"
+    CMAKE_BUILD_TYPE: Debug
+    VTKM_SETTINGS: "tbb+shared+examples"
+
+test:ubuntu1804_clang8:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1804_clang8
+  needs:
+    - build:ubuntu1804_clang8
+
+# Build on ubuntu1804 with kokkos and test on ubuntu1804
+# Uses CUDA 11
+build:ubuntu1804_kokkos:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - large-memory
+  extends:
+    - .ubuntu1804_cuda_kokkos
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_GENERATOR: "Ninja"
+    CMAKE_BUILD_TYPE: Release
+    VTKM_SETTINGS: "kokkos+turing+static+64bit_floats"
+
+test:ubuntu1804_kokkos:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - turing
+  extends:
+    - .ubuntu1804_cuda_kokkos
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1804_kokkos
+  needs:
+    - build:ubuntu1804_kokkos
--- a/.gitlab/ci/ubuntu2004.yml
+++ b/.gitlab/ci/ubuntu2004.yml
@ -0,0 +1,28 @@
+build:ubuntu2004_kokkos:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu2004_kokkos
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    VTKM_SETTINGS: "kokkos+shared+64bit_floats"
+
+test:ubuntu2004_kokkos:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu2004_kokkos
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu2004_kokkos
+  needs:
+    - build:ubuntu2004_kokkos
--- a/.gitlab/ci/windows10.yml
+++ b/.gitlab/ci/windows10.yml
@ -0,0 +1,113 @@
+.windows_build:
+  variables:
+    # Note that shell runners only support runners with a single
+    # concurrency level. We can't use `$CI_CONCURRENCY_ID` because this may
+    # change between the build and test stages which CMake doesn't support.
+    # Even if we could, it could change if other runners on the machine
+    # could run at the same time, so we drop it.
+    GIT_CLONE_PATH: "$CI_BUILDS_DIR\\vtkm ci"
+
+.windows_vs2019:
+  variables:
+    VCVARSALL: "${VS160COMNTOOLS}\\..\\..\\VC\\Auxiliary\\Build\\vcvarsall.bat"
+    VCVARSPLATFORM: "x64"
+    VCVARSVERSION: "14.25"
+
+.cmake_build_windows: &cmake_build_windows
+  extends:
+    - .windows_build
+    - .windows_vs2019
+  stage: build
+  timeout:  2 hours
+  interruptible: true
+  before_script:
+    - Invoke-Expression -Command .gitlab/ci/config/vcvarsall.ps1
+    - "cmake --version"
+    - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
+    - "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
+  script:
+    - "ctest -VV -S .gitlab/ci/ctest_build.cmake"
+  artifacts:
+    expire_in: 24 hours
+    when: always
+    paths:
+      # The artifacts of the build.
+      - build/bin/
+      - build/include/
+      - build/lib/
+      - build/vtkm/thirdparty/diy/vtkmdiy/include
+
+      # CTest and CMake install files.
+      # XXX(globbing): Can be simplified with support from
+      # https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
+      #
+      # Note: this also captures our CIState.cmake file
+      - build/CMakeCache.txt
+      - build/*.cmake
+      - build/*/*.cmake
+      - build/*/*/*.cmake
+      - build/*/*/*/*.cmake
+      - build/*/*/*/*/*.cmake
+      - build/*/*/*/*/*/*.cmake
+      - build/Testing/
+
+      # CDash files.
+      - build/DartConfiguration.tcl
+
+
+.cmake_test_windows: &cmake_test_windows
+  extends:
+    - .windows_build
+    - .windows_vs2019
+  stage: test
+  timeout:  50 minutes
+  interruptible: true
+  before_script:
+    - Invoke-Expression -Command .gitlab/ci/config/vcvarsall.ps1
+  script:
+    #Need to use our custom ctest-latest symlink
+    #This will allow us to use 3.17+ which has support
+    #for running failed tests multiple times so failures
+    #due to system load are not reported
+    - "ctest-latest -VV -S .gitlab/ci/ctest_test.cmake"
+
+# Build on windows10 with Visual Studio
+# Will have CUDA 10.2 once build issues are resolved
+build:windows_vs2019:
+  tags:
+    - vtkm # Since this is a bare runner, pin to a project.
+    - nonconcurrent
+    - build
+    - windows
+    - shell
+    - vs2019
+    - msvc-19.25
+    - large-memory
+  extends:
+    - .cmake_build_windows
+    - .only-default
+  variables:
+    CMAKE_GENERATOR: "Ninja"
+    CMAKE_BUILD_TYPE: Release
+    # Disabled while we track down cub allocator issues with vtkm/io tests
+    # VTKM_SETTINGS: "cuda+turing"
+    VTKM_SETTINGS: "serial"
+
+test:windows_vs2019:
+  tags:
+    - vtkm # Since this is a bare runner, pin to a project.
+    - nonconcurrent
+    - test
+    - windows
+    - shell
+    - vs2019
+    - msvc-19.25
+    - cuda-rt
+    - turing
+  extends:
+    - .cmake_test_windows
+    - .only-default
+  dependencies:
+    - build:windows_vs2019
+  needs:
+    - build:windows_vs2019
--- a/.hooks-config
+++ b/.hooks-config
@ -0,0 +1,6 @@
+# Loaded by .git/hooks/(pre-commit|commit-msg|prepare-commit-msg)
+# during git commit after local hooks have been installed.
+
+[hooks "chain"]
+	pre-commit = utilities/git/pre-commit
+	pre-push = utilities/git/pre-push
--- a/CMake/FindOpenGL.cmake
+++ b/CMake/FindOpenGL.cmake
@ -1,527 +0,0 @@
-##============================================================================
-##  Copyright (c) Kitware, Inc.
-##  All rights reserved.
-##  See LICENSE.txt for details.
-##
-##  This software is distributed WITHOUT ANY WARRANTY; without even
-##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-##  PURPOSE.  See the above copyright notice for more information.
-##============================================================================
-
-# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
-# file Copyright.txt or https://cmake.org/licensing for details.
-
-#.rst:
-# FindOpenGL
-# ----------
-#
-# FindModule for OpenGL and GLU.
-#
-# Optional COMPONENTS
-# ^^^^^^^^^^^^^^^^^^^
-#
-# This module respects several optional COMPONENTS: ``EGL``, ``GLX``, and
-# ``OpenGL``.  There are corresponding import targets for each of these flags.
-#
-# IMPORTED Targets
-# ^^^^^^^^^^^^^^^^
-#
-# This module defines the :prop_tgt:`IMPORTED` targets:
-#
-# ``OpenGL::GL``
-#  Defined to the platform-specific OpenGL libraries if the system has OpenGL.
-# ``OpenGL::OpenGL``
-#  Defined to libOpenGL if the system is GLVND-based.
-#  ``OpenGL::GL``
-# ``OpenGL::GLU``
-#  Defined if the system has GLU.
-# ``OpenGL::GLX``
-#  Defined if the system has GLX.
-# ``OpenGL::EGL``
-#  Defined if the system has EGL.
-#
-# Result Variables
-# ^^^^^^^^^^^^^^^^
-#
-# This module sets the following variables:
-#
-# ``OPENGL_FOUND``
-#  True, if the system has OpenGL and all components are found.
-# ``OPENGL_XMESA_FOUND``
-#  True, if the system has XMESA.
-# ``OPENGL_GLU_FOUND``
-#  True, if the system has GLU.
-# ``OpenGL_OpenGL_FOUND``
-#  True, if the system has an OpenGL library.
-# ``OpenGL_GLX_FOUND``
-#  True, if the system has GLX.
-# ``OpenGL_EGL_FOUND``
-#  True, if the system has EGL.
-# ``OPENGL_INCLUDE_DIR``
-#  Path to the OpenGL include directory.
-# ``OPENGL_EGL_INCLUDE_DIRS``
-#  Path to the EGL include directory.
-# ``OPENGL_LIBRARIES``
-#  Paths to the OpenGL library, windowing system libraries, and GLU libraries.
-#  On Linux, this assumes GLX and is never correct for EGL-based targets.
-#  Clients are encouraged to use the ``OpenGL::*`` import targets instead.
-#
-# Cache variables
-# ^^^^^^^^^^^^^^^
-#
-# The following cache variables may also be set:
-#
-# ``OPENGL_egl_LIBRARY``
-#  Path to the EGL library.
-# ``OPENGL_glu_LIBRARY``
-#  Path to the GLU library.
-# ``OPENGL_glx_LIBRARY``
-#  Path to the GLVND 'GLX' library.
-# ``OPENGL_opengl_LIBRARY``
-#  Path to the GLVND 'OpenGL' library
-# ``OPENGL_gl_LIBRARY``
-#  Path to the OpenGL library.  New code should prefer the ``OpenGL::*`` import
-#  targets.
-#
-# Linux-specific
-# ^^^^^^^^^^^^^^
-#
-# Some Linux systems utilize GLVND as a new ABI for OpenGL.  GLVND separates
-# context libraries from OpenGL itself; OpenGL lives in "libOpenGL", and
-# contexts are defined in "libGLX" or "libEGL".  GLVND is currently the only way
-# to get OpenGL 3+ functionality via EGL in a manner portable across vendors.
-# Projects may use GLVND explicitly with target ``OpenGL::OpenGL`` and either
-# ``OpenGL::GLX`` or ``OpenGL::EGL``.
-#
-# Projects may use the ``OpenGL::GL`` target (or ``OPENGL_LIBRARIES`` variable)
-# to use legacy GL interfaces.  These will use the legacy GL library located
-# by ``OPENGL_gl_LIBRARY``, if available.  If ``OPENGL_gl_LIBRARY`` is empty or
-# not found and GLVND is available, the ``OpenGL::GL`` target will use GLVND
-# ``OpenGL::OpenGL`` and ``OpenGL::GLX`` (and the ``OPENGL_LIBRARIES``
-# variable will use the corresponding libraries).  Thus, for non-EGL-based
-# Linux targets, the ``OpenGL::GL`` target is most portable.
-#
-# A ``OpenGL_GL_PREFERENCE`` variable may be set to specify the preferred way
-# to provide legacy GL interfaces in case multiple choices are available.
-# The value may be one of:
-#
-# ``GLVND``
-#  If the GLVND OpenGL and GLX libraries are available, prefer them.
-#  This forces ``OPENGL_gl_LIBRARY`` to be empty.
-#  This is the default if components were requested (since components
-#  correspond to GLVND libraries).
-#
-# ``LEGACY``
-#  Prefer to use the legacy libGL library, if available.
-#  This is the default if no components were requested.
-#
-# For EGL targets the client must rely on GLVND support on the user's system.
-# Linking should use the ``OpenGL::OpenGL OpenGL::EGL`` targets.  Using GLES*
-# libraries is theoretically possible in place of ``OpenGL::OpenGL``, but this
-# module does not currently support that; contributions welcome.
-#
-# ``OPENGL_egl_LIBRARY`` and ``OPENGL_EGL_INCLUDE_DIRS`` are defined in the case of
-# GLVND.  For non-GLVND Linux and other systems these are left undefined.
-#
-# macOS-Specific
-# ^^^^^^^^^^^^^^
-#
-# On OSX FindOpenGL defaults to using the framework version of OpenGL. People
-# will have to change the cache values of OPENGL_glu_LIBRARY and
-# OPENGL_gl_LIBRARY to use OpenGL with X11 on OSX.
-
-set(_OpenGL_REQUIRED_VARS OPENGL_gl_LIBRARY)
-
-# Provide OPENGL_USE_<C> variables for each component.
-foreach(component ${OpenGL_FIND_COMPONENTS})
-  string(TOUPPER ${component} _COMPONENT)
-  set(OPENGL_USE_${_COMPONENT} 1)
-endforeach()
-
-if (CYGWIN)
-  find_path(OPENGL_INCLUDE_DIR GL/gl.h )
-  list(APPEND _OpenGL_REQUIRED_VARS OPENGL_INCLUDE_DIR)
-
-  find_library(OPENGL_gl_LIBRARY opengl32 )
-  find_library(OPENGL_glu_LIBRARY glu32 )
-
-elseif (WIN32)
-
-  if(BORLAND)
-    set (OPENGL_gl_LIBRARY import32 CACHE STRING "OpenGL library for win32")
-    set (OPENGL_glu_LIBRARY import32 CACHE STRING "GLU library for win32")
-  else()
-    set (OPENGL_gl_LIBRARY opengl32 CACHE STRING "OpenGL library for win32")
-    set (OPENGL_glu_LIBRARY glu32 CACHE STRING "GLU library for win32")
-  endif()
-
-elseif (APPLE)
-  # The OpenGL.framework provides both gl and glu
-  find_library(OPENGL_gl_LIBRARY OpenGL DOC "OpenGL library for OS X")
-  find_library(OPENGL_glu_LIBRARY OpenGL DOC
-    "GLU library for OS X (usually same as OpenGL library)")
-  find_path(OPENGL_INCLUDE_DIR OpenGL/gl.h DOC "Include for OpenGL on OS X")
-  list(APPEND _OpenGL_REQUIRED_VARS OPENGL_INCLUDE_DIR)
-
-else()
-  if (CMAKE_SYSTEM_NAME MATCHES "HP-UX")
-    # Handle HP-UX cases where we only want to find OpenGL in either hpux64
-    # or hpux32 depending on if we're doing a 64 bit build.
-    if(CMAKE_SIZEOF_VOID_P EQUAL 4)
-      set(_OPENGL_LIB_PATH
-        /opt/graphics/OpenGL/lib/hpux32/)
-    else()
-      set(_OPENGL_LIB_PATH
-        /opt/graphics/OpenGL/lib/hpux64/
-        /opt/graphics/OpenGL/lib/pa20_64)
-    endif()
-  elseif(CMAKE_SYSTEM_NAME STREQUAL Haiku)
-    set(_OPENGL_LIB_PATH
-      /boot/develop/lib/x86)
-    set(_OPENGL_INCLUDE_PATH
-      /boot/develop/headers/os/opengl)
-  endif()
-
-  # The first line below is to make sure that the proper headers
-  # are used on a Linux machine with the NVidia drivers installed.
-  # They replace Mesa with NVidia's own library but normally do not
-  # install headers and that causes the linking to
-  # fail since the compiler finds the Mesa headers but NVidia's library.
-  # Make sure the NVIDIA directory comes BEFORE the others.
-  #  - Atanas Georgiev <atanas@cs.columbia.edu>
-  find_path(OPENGL_INCLUDE_DIR GL/gl.h
-    /usr/share/doc/NVIDIA_GLX-1.0/include
-    /usr/openwin/share/include
-    /opt/graphics/OpenGL/include /usr/X11R6/include
-    ${_OPENGL_INCLUDE_PATH}
-  )
-  find_path(OPENGL_GLX_INCLUDE_DIR GL/glx.h ${_OPENGL_INCLUDE_PATH})
-  find_path(OPENGL_EGL_INCLUDE_DIR EGL/egl.h ${_OPENGL_INCLUDE_PATH})
-  find_path(OPENGL_xmesa_INCLUDE_DIR GL/xmesa.h
-    /usr/share/doc/NVIDIA_GLX-1.0/include
-    /usr/openwin/share/include
-    /opt/graphics/OpenGL/include /usr/X11R6/include
-  )
-
-  # Search for the GLVND libraries.  We do this regardless of COMPONENTS; we'll
-  # take into account the COMPONENTS logic later.
-  find_library(OPENGL_opengl_LIBRARY
-    NAMES OpenGL
-    PATHS /usr/X11R6/lib
-          ${_OPENGL_LIB_PATH}
-  )
-
-  find_library(OPENGL_glx_LIBRARY
-    NAMES GLX
-    PATHS /usr/X11R6/lib
-          ${_OPENGL_LIB_PATH}
-  )
-
-  find_library(OPENGL_egl_LIBRARY
-    NAMES EGL
-    PATHS ${_OPENGL_LIB_PATH}
-  )
-
-  find_library(OPENGL_glu_LIBRARY
-    NAMES GLU MesaGLU
-    PATHS ${OPENGL_gl_LIBRARY}
-          /opt/graphics/OpenGL/lib
-          /usr/openwin/lib
-          /usr/shlib /usr/X11R6/lib
-  )
-
-  if(NOT DEFINED OpenGL_GL_PREFERENCE)
-    set(OpenGL_GL_PREFERENCE "")
-  endif()
-  if(NOT OpenGL_GL_PREFERENCE STREQUAL "")
-    # A preference has been explicitly specified.
-    if(NOT OpenGL_GL_PREFERENCE MATCHES "^(GLVND|LEGACY)$")
-      message(FATAL_ERROR
-        "OpenGL_GL_PREFERENCE value '${OpenGL_GL_PREFERENCE}' not recognized.  "
-        "Allowed values are 'GLVND' and 'LEGACY'."
-        )
-    endif()
-  elseif(OpenGL_FIND_COMPONENTS)
-    # No preference was explicitly specified, but the caller did request
-    # at least one GLVND component.  Prefer GLVND for legacy GL.
-    set(OpenGL_GL_PREFERENCE "GLVND")
-  else()
-    # No preference was explicitly specified and no GLVND components were
-    # requested.  Prefer libGL for legacy GL.
-    set(OpenGL_GL_PREFERENCE "LEGACY")
-  endif()
-
-  if("x${OpenGL_GL_PREFERENCE}x" STREQUAL "xGLVNDx" AND OPENGL_opengl_LIBRARY AND OPENGL_glx_LIBRARY)
-    # We can provide legacy GL using GLVND libraries.
-    # Do not use any legacy GL library.
-    set(OPENGL_gl_LIBRARY "")
-  else()
-    # We cannot provide legacy GL using GLVND libraries.
-    # Search for the legacy GL library.
-    find_library(OPENGL_gl_LIBRARY
-      NAMES GL MesaGL
-      PATHS /opt/graphics/OpenGL/lib
-            /usr/openwin/lib
-            /usr/shlib /usr/X11R6/lib
-            ${_OPENGL_LIB_PATH}
-      )
-  endif()
-
-  # FPHSA cannot handle "this OR that is required", so we conditionally set what
-  # it must look for.  First clear any previous config we might have done:
-  set(_OpenGL_REQUIRED_VARS)
-
-  # now we append the libraries as appropriate.  The complicated logic
-  # basically comes down to "use libOpenGL when we can, and add in specific
-  # context mechanisms when requested, or we need them to preserve the previous
-  # default where glx is always available."
-  if((NOT OPENGL_USE_EGL AND
-      NOT OPENGL_opengl_LIBRARY AND
-          OPENGL_glx_LIBRARY AND
-      NOT OPENGL_gl_LIBRARY) OR
-     (NOT OPENGL_USE_EGL AND
-      NOT OPENGL_glx_LIBRARY AND
-      NOT OPENGL_gl_LIBRARY) OR
-     (NOT OPENGL_USE_EGL AND
-          OPENGL_opengl_LIBRARY AND
-          OPENGL_glx_LIBRARY) OR
-     (    OPENGL_USE_EGL))
-    list(APPEND _OpenGL_REQUIRED_VARS OPENGL_opengl_LIBRARY)
-  endif()
-
-  # GLVND GLX library.  Preferred when available.
-  if((NOT OPENGL_USE_OPENGL AND
-      NOT OPENGL_USE_GLX AND
-      NOT OPENGL_USE_EGL AND
-      NOT OPENGL_glx_LIBRARY AND
-      NOT OPENGL_gl_LIBRARY) OR
-     (    OPENGL_USE_GLX AND
-      NOT OPENGL_USE_EGL AND
-      NOT OPENGL_glx_LIBRARY AND
-      NOT OPENGL_gl_LIBRARY) OR
-     (NOT OPENGL_USE_EGL AND
-          OPENGL_opengl_LIBRARY AND
-          OPENGL_glx_LIBRARY) OR
-     (OPENGL_USE_GLX AND OPENGL_USE_EGL))
-    list(APPEND _OpenGL_REQUIRED_VARS OPENGL_glx_LIBRARY)
-  endif()
-
-  # GLVND EGL library.
-  if(OPENGL_USE_EGL)
-    list(APPEND _OpenGL_REQUIRED_VARS OPENGL_egl_LIBRARY)
-  endif()
-
-  # Old-style "libGL" library: used as a fallback when GLVND isn't available.
-  if((NOT OPENGL_USE_EGL AND
-      NOT OPENGL_opengl_LIBRARY AND
-          OPENGL_glx_LIBRARY AND
-          OPENGL_gl_LIBRARY) OR
-     (NOT OPENGL_USE_EGL AND
-      NOT OPENGL_glx_LIBRARY AND
-          OPENGL_gl_LIBRARY))
-    list(APPEND _OpenGL_REQUIRED_VARS OPENGL_gl_LIBRARY)
-  endif()
-
-  # We always need the 'gl.h' include dir.
-  list(APPEND _OpenGL_REQUIRED_VARS OPENGL_INCLUDE_DIR)
-
-  unset(_OPENGL_INCLUDE_PATH)
-  unset(_OPENGL_LIB_PATH)
-
-  find_library(OPENGL_glu_LIBRARY
-    NAMES GLU MesaGLU
-    PATHS ${OPENGL_gl_LIBRARY}
-          /opt/graphics/OpenGL/lib
-          /usr/openwin/lib
-          /usr/shlib /usr/X11R6/lib
-  )
-endif ()
-
-if(OPENGL_xmesa_INCLUDE_DIR)
-  set( OPENGL_XMESA_FOUND "YES" )
-else()
-  set( OPENGL_XMESA_FOUND "NO" )
-endif()
-
-if(OPENGL_glu_LIBRARY)
-  set( OPENGL_GLU_FOUND "YES" )
-else()
-  set( OPENGL_GLU_FOUND "NO" )
-endif()
-
-# OpenGL_OpenGL_FOUND is a bit unique in that it is okay if /either/ libOpenGL
-# or libGL is found.
-# Using libGL with libEGL is never okay, though; we handle that case later.
-if(NOT OPENGL_opengl_LIBRARY AND NOT OPENGL_gl_LIBRARY)
-  set(OpenGL_OpenGL_FOUND FALSE)
-else()
-  set(OpenGL_OpenGL_FOUND TRUE)
-endif()
-
-if(OPENGL_glx_LIBRARY AND OPENGL_GLX_INCLUDE_DIR)
-  set(OpenGL_GLX_FOUND TRUE)
-else()
-  set(OpenGL_GLX_FOUND FALSE)
-endif()
-
-if(OPENGL_egl_LIBRARY AND OPENGL_EGL_INCLUDE_DIR)
-  set(OpenGL_EGL_FOUND TRUE)
-else()
-  set(OpenGL_EGL_FOUND FALSE)
-endif()
-
-# User-visible names should be plural.
-if(OPENGL_EGL_INCLUDE_DIR)
-  set(OPENGL_EGL_INCLUDE_DIRS ${OPENGL_EGL_INCLUDE_DIR})
-endif()
-
-include(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(OpenGL REQUIRED_VARS ${_OpenGL_REQUIRED_VARS}
-                                  HANDLE_COMPONENTS)
-unset(_OpenGL_REQUIRED_VARS)
-
-# OpenGL:: targets
-if(OPENGL_FOUND)
-  # ::OpenGL is a GLVND library, and thus Linux-only: we don't bother checking
-  # for a framework version of this library.
-  if(OPENGL_opengl_LIBRARY AND NOT TARGET OpenGL::OpenGL)
-    if(IS_ABSOLUTE "${OPENGL_opengl_LIBRARY}")
-      add_library(OpenGL::OpenGL UNKNOWN IMPORTED)
-      set_target_properties(OpenGL::OpenGL PROPERTIES IMPORTED_LOCATION
-                            "${OPENGL_opengl_LIBRARY}")
-    else()
-      add_library(OpenGL::OpenGL INTERFACE IMPORTED)
-      set_target_properties(OpenGL::OpenGL PROPERTIES IMPORTED_LIBNAME
-                            "${OPENGL_opengl_LIBRARY}")
-    endif()
-    set_target_properties(OpenGL::OpenGL PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                          "${OPENGL_INCLUDE_DIR}")
-  endif()
-
-  # ::GLX is a GLVND library, and thus Linux-only: we don't bother checking
-  # for a framework version of this library.
-  if(OpenGL_GLX_FOUND AND NOT TARGET OpenGL::GLX)
-    if(IS_ABSOLUTE "${OPENGL_glx_LIBRARY}")
-      add_library(OpenGL::GLX UNKNOWN IMPORTED)
-      set_target_properties(OpenGL::GLX PROPERTIES IMPORTED_LOCATION
-                            "${OPENGL_glx_LIBRARY}")
-    else()
-      add_library(OpenGL::GLX INTERFACE IMPORTED)
-      set_target_properties(OpenGL::GLX PROPERTIES IMPORTED_LIBNAME
-                            "${OPENGL_glx_LIBRARY}")
-    endif()
-    set_target_properties(OpenGL::GLX PROPERTIES INTERFACE_LINK_LIBRARIES
-                          OpenGL::OpenGL)
-    set_target_properties(OpenGL::GLX PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                          "${OPENGL_GLX_INCLUDE_DIR}")
-  endif()
-
-  if(OPENGL_gl_LIBRARY AND NOT TARGET OpenGL::GL)
-    # A legacy GL library is available, so use it for the legacy GL target.
-    if(IS_ABSOLUTE "${OPENGL_gl_LIBRARY}")
-      add_library(OpenGL::GL UNKNOWN IMPORTED)
-      if(OPENGL_gl_LIBRARY MATCHES "/([^/]+)\\.framework$")
-        set(_gl_fw "${OPENGL_gl_LIBRARY}/${CMAKE_MATCH_1}")
-        if(EXISTS "${_gl_fw}.tbd")
-          set(_gl_fw "${_gl_fw}.tbd")
-        endif()
-        set_target_properties(OpenGL::GL PROPERTIES
-          IMPORTED_LOCATION "${_gl_fw}")
-      else()
-        set_target_properties(OpenGL::GL PROPERTIES
-          IMPORTED_LOCATION "${OPENGL_gl_LIBRARY}")
-      endif()
-    else()
-      add_library(OpenGL::GL INTERFACE IMPORTED)
-      set_target_properties(OpenGL::GL PROPERTIES
-        IMPORTED_LIBNAME "${OPENGL_gl_LIBRARY}")
-    endif()
-    set_target_properties(OpenGL::GL PROPERTIES
-      INTERFACE_INCLUDE_DIRECTORIES "${OPENGL_INCLUDE_DIR}")
-  elseif(NOT TARGET OpenGL::GL AND TARGET OpenGL::OpenGL AND TARGET OpenGL::GLX)
-    # A legacy GL library is not available, but we can provide the legacy GL
-    # target using GLVND OpenGL+GLX.
-    add_library(OpenGL::GL INTERFACE IMPORTED)
-    set_target_properties(OpenGL::GL PROPERTIES INTERFACE_LINK_LIBRARIES
-                          OpenGL::OpenGL)
-    set_property(TARGET OpenGL::GL APPEND PROPERTY INTERFACE_LINK_LIBRARIES
-                 OpenGL::GLX)
-    set_target_properties(OpenGL::GL PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                          "${OPENGL_INCLUDE_DIR}")
-  endif()
-
-  # ::EGL is a GLVND library, and thus Linux-only: we don't bother checking
-  # for a framework version of this library.
-  # Note we test for OpenGL::OpenGL as a target.  When this module is updated to
-  # support GLES, we would additionally want to check for the hypothetical GLES
-  # target and enable EGL if either ::GLES or ::OpenGL is created.
-  if(TARGET OpenGL::OpenGL AND OpenGL_EGL_FOUND AND NOT TARGET OpenGL::EGL)
-    if(IS_ABSOLUTE "${OPENGL_egl_LIBRARY}")
-      add_library(OpenGL::EGL UNKNOWN IMPORTED)
-      set_target_properties(OpenGL::EGL PROPERTIES IMPORTED_LOCATION
-                            "${OPENGL_egl_LIBRARY}")
-    else()
-      add_library(OpenGL::EGL INTERFACE IMPORTED)
-      set_target_properties(OpenGL::EGL PROPERTIES IMPORTED_LIBNAME
-                            "${OPENGL_egl_LIBRARY}")
-    endif()
-    set_target_properties(OpenGL::EGL PROPERTIES INTERFACE_LINK_LIBRARIES
-                          OpenGL::OpenGL)
-    # Note that EGL's include directory is different from OpenGL/GLX's!
-    set_target_properties(OpenGL::EGL PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
-                          "${OPENGL_EGL_INCLUDE_DIR}")
-  endif()
-
-  if(OPENGL_GLU_FOUND AND NOT TARGET OpenGL::GLU)
-    if(IS_ABSOLUTE "${OPENGL_glu_LIBRARY}")
-      add_library(OpenGL::GLU UNKNOWN IMPORTED)
-      if(OPENGL_glu_LIBRARY MATCHES "/([^/]+)\\.framework$")
-        set(_glu_fw "${OPENGL_glu_LIBRARY}/${CMAKE_MATCH_1}")
-        if(EXISTS "${_glu_fw}.tbd")
-          set(_glu_fw "${_glu_fw}.tbd")
-        endif()
-        set_target_properties(OpenGL::GLU PROPERTIES
-          IMPORTED_LOCATION "${_glu_fw}")
-      else()
-        set_target_properties(OpenGL::GLU PROPERTIES
-          IMPORTED_LOCATION "${OPENGL_glu_LIBRARY}")
-      endif()
-    else()
-      add_library(OpenGL::GLU INTERFACE IMPORTED)
-      set_target_properties(OpenGL::GLU PROPERTIES
-        IMPORTED_LIBNAME "${OPENGL_glu_LIBRARY}")
-    endif()
-    set_target_properties(OpenGL::GLU PROPERTIES
-      INTERFACE_LINK_LIBRARIES OpenGL::GL)
-  endif()
-
-  # OPENGL_LIBRARIES mirrors OpenGL::GL's logic ...
-  if(OPENGL_gl_LIBRARY)
-    set(OPENGL_LIBRARIES ${OPENGL_gl_LIBRARY})
-  elseif(TARGET OpenGL::OpenGL AND TARGET OpenGL::GLX)
-    set(OPENGL_LIBRARIES ${OPENGL_opengl_LIBRARY} ${OPENGL_glx_LIBRARY})
-  else()
-    set(OPENGL_LIBRARIES "")
-  endif()
-  # ... and also includes GLU, if available.
-  if(TARGET OpenGL::GLU)
-    list(APPEND OPENGL_LIBRARIES ${OPENGL_glu_LIBRARY})
-  endif()
-endif()
-
-# This deprecated setting is for backward compatibility with CMake1.4
-set(OPENGL_LIBRARY ${OPENGL_LIBRARIES})
-# This deprecated setting is for backward compatibility with CMake1.4
-set(OPENGL_INCLUDE_PATH ${OPENGL_INCLUDE_DIR})
-
-mark_as_advanced(
-  OPENGL_INCLUDE_DIR
-  OPENGL_xmesa_INCLUDE_DIR
-  OPENGL_egl_LIBRARY
-  OPENGL_glu_LIBRARY
-  OPENGL_glx_LIBRARY
-  OPENGL_gl_LIBRARY
-  OPENGL_opengl_LIBRARY
-  OPENGL_EGL_INCLUDE_DIR
-  OPENGL_GLX_INCLUDE_DIR
-)
--- a/CMake/FindTBB.cmake
+++ b/CMake/FindTBB.cmake
@ -9,6 +9,7 @@
 #  TBB_INCLUDE_DIRS - the TBB include directories
 #  TBB_LIBRARIES - TBB libraries to be lined, doesn't include malloc or
 #                  malloc proxy
+#  TBB::tbb - imported target for the TBB library
 #
 #  TBB_VERSION_MAJOR - Major Product Version Number
 #  TBB_VERSION_MINOR - Minor Product Version Number
@ -20,10 +21,12 @@
 #  TBB_MALLOC_FOUND - system has TBB malloc library
 #  TBB_MALLOC_INCLUDE_DIRS - the TBB malloc include directories
 #  TBB_MALLOC_LIBRARIES - The TBB malloc libraries to be lined
+#  TBB::malloc - imported target for the TBB malloc library
 #
 #  TBB_MALLOC_PROXY_FOUND - system has TBB malloc proxy library
 #  TBB_MALLOC_PROXY_INCLUDE_DIRS = the TBB malloc proxy include directories
 #  TBB_MALLOC_PROXY_LIBRARIES - The TBB malloc proxy libraries to be lined
+#  TBB::malloc_proxy - imported target for the TBB malloc proxy library
 #
 #
 # This module reads hints about search locations from variables:
@ -65,28 +68,84 @@
 #  FindTBB helper functions and macros
 #

+#====================================================
+# Fix the library path in case it is a linker script
+#====================================================
+function(tbb_extract_real_library library real_library)
+  if(NOT UNIX OR NOT EXISTS ${library})
+    set(${real_library} "${library}" PARENT_SCOPE)
+    return()
+  endif()
+
+  #Read in the first 4 bytes and see if they are the ELF magic number
+  set(_elf_magic "7f454c46")
+  file(READ ${library} _hex_data OFFSET 0 LIMIT 4 HEX)
+  if(_hex_data STREQUAL _elf_magic)
+    #we have opened a elf binary so this is what
+    #we should link to
+    set(${real_library} "${library}" PARENT_SCOPE)
+    return()
+  endif()
+
+  file(READ ${library} _data OFFSET 0 LIMIT 1024)
+  if("${_data}" MATCHES "INPUT \\(([^(]+)\\)")
+    #extract out the .so name from REGEX MATCH command
+    set(_proper_so_name "${CMAKE_MATCH_1}")
+
+    #construct path to the real .so which is presumed to be in the same directory
+    #as the input file
+    get_filename_component(_so_dir "${library}" DIRECTORY)
+    set(${real_library} "${_so_dir}/${_proper_so_name}" PARENT_SCOPE)
+  else()
+    #unable to determine what this library is so just hope everything works
+    #and pass it unmodified.
+    set(${real_library} "${library}" PARENT_SCOPE)
+  endif()
+endfunction()
+
 #===============================================
 # Do the final processing for the package find.
 #===============================================
-macro(findpkg_finish PREFIX)
-  # skip if already processed during this run
-  if (NOT ${PREFIX}_FOUND)
-    if (${PREFIX}_INCLUDE_DIR AND ${PREFIX}_LIBRARY)
-      set(${PREFIX}_FOUND TRUE)
-      set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIR})
-      set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARY})
-    else ()
-      if (${PREFIX}_FIND_REQUIRED AND NOT ${PREFIX}_FIND_QUIETLY)
-        message(FATAL_ERROR "Required library ${PREFIX} not found.")
-      endif ()
+macro(findpkg_finish PREFIX TARGET_NAME)
+  if (${PREFIX}_INCLUDE_DIR AND ${PREFIX}_LIBRARY)
+    set(${PREFIX}_FOUND TRUE)
+    set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIR})
+    set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARY})
+  else ()
+    if (${PREFIX}_FIND_REQUIRED AND NOT ${PREFIX}_FIND_QUIETLY)
+      message(FATAL_ERROR "Required library ${PREFIX} not found.")
    endif ()
-
-   #mark the following variables as internal variables
-   mark_as_advanced(${PREFIX}_INCLUDE_DIR
-                    ${PREFIX}_LIBRARY
-                    ${PREFIX}_LIBRARY_DEBUG
-                    ${PREFIX}_LIBRARY_RELEASE)
  endif ()
+
+  if (NOT TARGET "TBB::${TARGET_NAME}")
+    if (${PREFIX}_LIBRARY_RELEASE)
+      tbb_extract_real_library(${${PREFIX}_LIBRARY_RELEASE} real_release)
+    endif ()
+    if (${PREFIX}_LIBRARY_DEBUG)
+      tbb_extract_real_library(${${PREFIX}_LIBRARY_DEBUG} real_debug)
+    endif ()
+    add_library(TBB::${TARGET_NAME} UNKNOWN IMPORTED)
+    set_target_properties(TBB::${TARGET_NAME} PROPERTIES
+      INTERFACE_INCLUDE_DIRECTORIES "${${PREFIX}_INCLUDE_DIR}")
+    if (${PREFIX}_LIBRARY_DEBUG AND ${PREFIX}_LIBRARY_RELEASE)
+      set_target_properties(TBB::${TARGET_NAME} PROPERTIES
+        IMPORTED_LOCATION "${real_release}"
+        IMPORTED_LOCATION_DEBUG "${real_debug}"
+        IMPORTED_LOCATION_RELEASE "${real_release}")
+    elseif (${PREFIX}_LIBRARY_RELEASE)
+      set_target_properties(TBB::${TARGET_NAME} PROPERTIES
+        IMPORTED_LOCATION "${real_release}")
+    elseif (${PREFIX}_LIBRARY_DEBUG)
+      set_target_properties(TBB::${TARGET_NAME} PROPERTIES
+        IMPORTED_LOCATION "${real_debug}")
+    endif ()
+  endif ()
+
+  #mark the following variables as internal variables
+  mark_as_advanced(${PREFIX}_INCLUDE_DIR
+                   ${PREFIX}_LIBRARY
+                   ${PREFIX}_LIBRARY_DEBUG
+                   ${PREFIX}_LIBRARY_RELEASE)
 endmacro()

 #===============================================
@ -136,20 +195,6 @@ set(TBB_INC_SEARCH_PATH "")
 set(TBB_LIB_SEARCH_PATH "")


-# If we found parts of TBB in a previous pass, add the directories for those
-# components to the list of those we look for.
-if(TBB_INCLUDE_DIR)
-  list(APPEND TBB_INC_SEARCH_PATH ${TBB_INCLUDE_DIR})
-endif()
-
-if(TBB_LIBRARY_RELEASE)
-  get_filename_component(dir ${TBB_LIBRARY_RELEASE} DIRECTORY)
-  list(APPEND TBB_LIB_SEARCH_PATH ${dir})
-elseif(TBB_LIBRARY_DEBUG)
-  get_filename_component(dir ${TBB_LIBRARY_DEBUG} DIRECTORY)
-  list(APPEND TBB_LIB_SEARCH_PATH ${dir})
-endif()
-
 # If user built from sources
 set(TBB_BUILD_PREFIX $ENV{TBB_BUILD_PREFIX})
 if (TBB_BUILD_PREFIX AND ENV_TBB_ROOT)
@ -203,12 +248,23 @@ if (WIN32 AND MSVC)
      list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia32/${COMPILER_PREFIX})
    endif ()
  endforeach ()
-elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND
-       NOT CMAKE_SYSTEM_VERSION VERSION_LESS 13.0)
-  set (USE_LIBCXX OFF)
+endif ()

-  if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    set (USE_LIBCXX ON)
+# For OS X binary distribution, choose libc++ based libraries for Mavericks (10.9)
+# and above and AppleClang
+if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND
+    NOT CMAKE_SYSTEM_VERSION VERSION_LESS 13.0)
+  set (USE_LIBCXX OFF)
+  cmake_policy(GET CMP0025 POLICY_VAR)
+
+  if (POLICY_VAR STREQUAL "NEW")
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+      set (USE_LIBCXX ON)
+    endif ()
+  else ()
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+      set (USE_LIBCXX ON)
+    endif ()
  endif ()

  if (USE_LIBCXX)
@ -216,8 +272,10 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND
      list (APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/libc++ ${dir}/libc++/lib)
    endforeach ()
  endif ()
-elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  # check compiler ABI
+endif ()
+
+# check compiler ABI
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  set(COMPILER_PREFIX)
  if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
    list(APPEND COMPILER_PREFIX "gcc4.7")
@ -285,7 +343,7 @@ find_library(TBB_LIBRARY_DEBUG
             PATHS ${TBB_LIB_SEARCH_PATH})
 make_library_set(TBB_LIBRARY)

-findpkg_finish(TBB)
+findpkg_finish(TBB tbb)

 #if we haven't found TBB no point on going any further
 if (NOT TBB_FOUND)
@ -309,7 +367,7 @@ find_library(TBB_MALLOC_LIBRARY_DEBUG
             PATHS ${TBB_LIB_SEARCH_PATH})
 make_library_set(TBB_MALLOC_LIBRARY)

-findpkg_finish(TBB_MALLOC)
+findpkg_finish(TBB_MALLOC tbbmalloc)

 #=============================================================================
 # Look for TBB's malloc proxy package
@ -328,7 +386,7 @@ find_library(TBB_MALLOC_PROXY_LIBRARY_DEBUG
             PATHS ${TBB_LIB_SEARCH_PATH})
 make_library_set(TBB_MALLOC_PROXY_LIBRARY)

-findpkg_finish(TBB_MALLOC_PROXY)
+findpkg_finish(TBB_MALLOC_PROXY tbbmalloc_proxy)


 #=============================================================================
@ -336,10 +394,10 @@ findpkg_finish(TBB_MALLOC_PROXY)
 if(NOT TBB_VERSION)

 #only read the start of the file
- file(READ
+ file(STRINGS
      "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h"
      TBB_VERSION_CONTENTS
-      LIMIT 2048)
+      REGEX "VERSION")

  string(REGEX REPLACE
    ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1"
--- a/CMake/VTKmCMakeBackports.cmake
+++ b/CMake/VTKmCMakeBackports.cmake
@ -0,0 +1,23 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+
+file(GLOB cmake_version_backports
+  LIST_DIRECTORIES true
+  RELATIVE "${CMAKE_CURRENT_LIST_DIR}/patches"
+  "${CMAKE_CURRENT_LIST_DIR}/patches/*")
+
+foreach (cmake_version_backport IN LISTS cmake_version_backports)
+  if (NOT IS_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/patches/${cmake_version_backport}")
+    continue ()
+  endif ()
+  if (CMAKE_VERSION VERSION_LESS "${cmake_version_backport}")
+    list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_LIST_DIR}/patches/${cmake_version_backport}")
+  endif ()
+endforeach ()
--- a/CMake/VTKmCPUVectorization.cmake
+++ b/CMake/VTKmCPUVectorization.cmake
@ -77,7 +77,7 @@ endif()
 set(vec_levels none native)

 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  #for now we presume gcc >= 4.8
+  #for now we presume gcc >= 5.4
  list(APPEND vec_levels avx avx2)

  #common flags for the avx and avx2 instructions for the gcc compiler
--- a/CMake/VTKmCheckCopyright.cmake
+++ b/CMake/VTKmCheckCopyright.cmake
@ -15,7 +15,6 @@
 ## cmake -DVTKm_SOURCE_DIR=<VTKm_SOURCE_DIR> -P <VTKm_SOURCE_DIR>/CMake/VTKMCheckCopyright.cmake
 ##

-cmake_minimum_required(VERSION 3.8...3.15 FATAL_ERROR)
 set(FILES_TO_CHECK
  *.txt
  *.cmake
--- a/CMake/VTKmCompilerDynamicAnalysisFlags.cmake
+++ b/CMake/VTKmCompilerDynamicAnalysisFlags.cmake
@ -1,32 +0,0 @@
-##============================================================================
-##  Copyright (c) Kitware, Inc.
-##  All rights reserved.
-##  See LICENSE.txt for details.
-##
-##  This software is distributed WITHOUT ANY WARRANTY; without even
-##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-##  PURPOSE.  See the above copyright notice for more information.
-##============================================================================
-
-#-----------------------------------------------------------------------------
-# check if this is a sanitizer build. If so, set up the environment.
-
-function(vtkm_check_sanitizer_build)
-  string (FIND "${CTEST_MEMORYCHECK_TYPE}" "Sanitizer" SANITIZER_BUILD)
-  if (${SANITIZER_BUILD} GREATER -1)
-    # This is a sanitizer build.
-    # Configure the sanitizer blacklist file
-    set (SANITIZER_BLACKLIST "${VTKm_BINARY_DIR}/sanitizer_blacklist.txt")
-    configure_file (
-      "${VTKm_SOURCE_DIR}/Utilities/DynamicAnalysis/sanitizer_blacklist.txt.in"
-      ${SANITIZER_BLACKLIST}
-      @ONLY
-      )
-
-    # Add the compiler flags for blacklist
-    set (FSANITIZE_BLACKLIST "\"-fsanitize-blacklist=${SANITIZER_BLACKLIST}\"")
-    foreach (entity C CXX SHARED_LINKER EXE_LINKER MODULE_LINKER)
-      set (CMAKE_${entity}_FLAGS "${CMAKE_${entity}_FLAGS} ${FSANITIZE_BLACKLIST}")
-    endforeach ()
-  endif ()
-endfunction()
--- a/CMake/VTKmCompilerFlags.cmake
+++ b/CMake/VTKmCompilerFlags.cmake
@ -22,6 +22,8 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
  set(VTKM_COMPILER_IS_CLANG 1)
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  set(VTKM_COMPILER_IS_GNU 1)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "XLClang")
+  set(VTKM_COMPILER_IS_XL 1)
 endif()

 #-----------------------------------------------------------------------------
@ -51,7 +53,7 @@ if(VTKM_COMPILER_IS_MSVC)
  if(TARGET vtkm::cuda)
    target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler="/Gy">)
  endif()
-elseif(NOT VTKM_COMPILER_IS_PGI) #can't find an equivalant PGI flag
+elseif(NOT (VTKM_COMPILER_IS_PGI OR VTKM_COMPILER_IS_XL)) #can't find an equivalant PGI/XL flag
  target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-ffunction-sections>)
  if(TARGET vtkm::cuda)
    target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-ffunction-sections>)
@ -122,8 +124,15 @@ elseif(VTKM_COMPILER_IS_ICC)
  target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-wd1478 -wd13379>)

 elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
-  set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common)
-  set(cuda_flags -Xcompiler=-Wall,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-local-typedefs,-Wno-unused-function,-Wcast-align,-Wchar-subscripts,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-Wunused,-fno-common)
+  set(cxx_flags -Wall -Wcast-align -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common -Wno-unused-function)
+  set(cuda_flags -Xcompiler=-Wall,-Wcast-align,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-fno-common,-Wunused,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-function)
+
+  #Clang does not support the -Wchar-subscripts flag for warning if an array
+  #subscript has a char type.
+  if (VTKM_COMPILER_IS_GNU)
+    list(APPEND cxx_flags -Wchar-subscripts)
+    set(cuda_flags "${cuda_flags},-Wchar-subscripts")
+  endif()

  #Only add float-conversion warnings for gcc as the integer warnigns in GCC
  #include the implicit casting of all types smaller than int to ints.
@ -161,17 +170,21 @@ elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
  endif()
 endif()

-#common warnings for all platforms when building cuda
-if(TARGET vtkm::cuda)
+function(setup_cuda_flags)
  if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
    #nvcc 9 introduced specific controls to disable the stack size warning
    #otherwise we let the warning occur. We have to set this in CMAKE_CUDA_FLAGS
    #as it is passed to the device link step, unlike compile_options
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink=--suppress-stack-size-warning")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink=--suppress-stack-size-warning" PARENT_SCOPE)
  endif()

  set(display_error_nums -Xcudafe=--display_error_number)
  target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:${display_error_nums}>)
+endfunction()
+
+#common warnings for all platforms when building cuda
+if ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
+  setup_cuda_flags()
 endif()

 if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
--- a/CMake/VTKmConfig.cmake.in
+++ b/CMake/VTKmConfig.cmake.in
@ -39,6 +39,7 @@
 #  VTKm_ENABLE_CUDA           Will be enabled if VTK-m was built with CUDA support
 #  VTKm_ENABLE_TBB            Will be enabled if VTK-m was built with TBB support
 #  VTKm_ENABLE_OPENMP         Will be enabled if VTK-m was built with OpenMP support
+#  VTKm_ENABLE_KOKKOS         Will be enabled if VTK-m was built with Kokkos support
 #  VTKm_ENABLE_LOGGING        Will be enabled if VTK-m was built with logging support
 #  VTKm_ENABLE_MPI            Will be enabled if VTK-m was built with MPI support
 #  VTKm_ENABLE_RENDERING      Will be enabled if VTK-m was built with rendering support
@ -48,8 +49,8 @@
 #
 #

-if (CMAKE_VERSION VERSION_LESS "3.8")
-  message(FATAL_ERROR "VTK-m requires CMake 3.8+")
+if (CMAKE_VERSION VERSION_LESS "3.12")
+  message(FATAL_ERROR "VTK-m requires CMake 3.12+")
 endif()
 if("${CMAKE_GENERATOR}" MATCHES "Visual Studio" AND
   CMAKE_VERSION VERSION_LESS "3.11")
@ -67,8 +68,9 @@ set(VTKm_VERSION "@VTKm_VERSION@")

 set(VTKm_BUILD_SHARED_LIBS "@VTKm_BUILD_SHARED_LIBS@")
 set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@")
-set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
+set(VTKm_ENABLE_KOKKOS "@VTKm_ENABLE_KOKKOS@")
 set(VTKm_ENABLE_OPENMP "@VTKm_ENABLE_OPENMP@")
+set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
 set(VTKm_ENABLE_LOGGING "@VTKm_ENABLE_LOGGING@")
 set(VTKm_ENABLE_RENDERING "@VTKm_ENABLE_RENDERING@")
 set(VTKm_ENABLE_GL_CONTEXT "@VTKm_ENABLE_GL_CONTEXT@")
@ -101,6 +103,12 @@ endif()
 if(VTKm_ENABLE_CUDA AND VTKM_FROM_INSTALL_DIR)
  set_target_properties(vtkm::cuda PROPERTIES cuda_architecture_flags "@VTKm_CUDA_Architecture_Flags@")
  set_target_properties(vtkm::cuda PROPERTIES requires_static_builds TRUE)
+
+  # If VTK-m is built with 3.18+ and the consumer is < 3.18 we need to drop
+  # these properties as they break the VTK-m cuda flag logic
+  if(CMAKE_VERSION VERSION_LESS 3.18)
+    set_target_properties(vtkm::cuda PROPERTIES INTERFACE_LINK_OPTIONS "")
+  endif()
 endif()

 # VTKm requires some CMake Find modules not included with CMake, so
@ -116,3 +124,7 @@ endif()
 # This includes a host of functions used by VTK-m CMake.
 include(VTKmWrappers)
 include(VTKmRenderingContexts)
+
+# Setup diy magic of chosing the appropriate mpi/no_mpi library to link against
+include(VTKmDIYUtils)
+vtkm_diy_init_target()
--- a/CMake/VTKmDIYUtils.cmake
+++ b/CMake/VTKmDIYUtils.cmake
@ -0,0 +1,59 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+
+macro(_vtkm_diy_target flag target)
+  set(${target} "vtkmdiympi")
+  if (NOT ${flag})
+    set(${target} "vtkmdiympi_nompi")
+  endif()
+endmacro()
+
+function(vtkm_diy_init_target)
+  set(vtkm_diy_default_flag "${VTKm_ENABLE_MPI}")
+  _vtkm_diy_target(vtkm_diy_default_flag vtkm_diy_default_target)
+
+  set_target_properties(vtkm_diy PROPERTIES
+    vtkm_diy_use_mpi_stack ${vtkm_diy_default_flag}
+    vtkm_diy_target ${vtkm_diy_default_target})
+endfunction()
+
+#-----------------------------------------------------------------------------
+function(vtkm_diy_use_mpi_push)
+  set(topval ${VTKm_ENABLE_MPI})
+  if (NOT ARGC EQUAL 0)
+    set(topval ${ARGV0})
+  endif()
+  get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
+  list (APPEND stack ${topval})
+  _vtkm_diy_target(topval target)
+  set_target_properties(vtkm_diy PROPERTIES
+    vtkm_diy_use_mpi_stack "${stack}"
+    vtkm_diy_target "${target}")
+endfunction()
+
+function(vtkm_diy_use_mpi value)
+  get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
+  list (REMOVE_AT stack -1)
+  list (APPEND stack ${value})
+  _vtkm_diy_target(value target)
+  set_target_properties(vtkm_diy PROPERTIES
+    vtkm_diy_use_mpi_stack "${stack}"
+    vtkm_diy_target "${target}")
+endfunction()
+
+function(vtkm_diy_use_mpi_pop)
+  get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
+  list (GET stack -1 value)
+  list (REMOVE_AT stack -1)
+  _vtkm_diy_target(value target)
+  set_target_properties(vtkm_diy PROPERTIES
+    vtkm_diy_use_mpi_stack "${stack}"
+    vtkm_diy_target "${target}")
+endfunction()
--- a/CMake/VTKmDeviceAdapters.cmake
+++ b/CMake/VTKmDeviceAdapters.cmake
@ -43,14 +43,7 @@ endfunction()

 if(VTKm_ENABLE_TBB AND NOT TARGET vtkm::tbb)
  find_package(TBB REQUIRED)
-
-  # Workaround a bug in older versions of cmake prevents linking with UNKNOWN IMPORTED libraries
-  # refer to CMake issue #17245
-  if (CMAKE_VERSION VERSION_LESS 3.10)
-    add_library(vtkm::tbb SHARED IMPORTED GLOBAL)
-  else()
-    add_library(vtkm::tbb UNKNOWN IMPORTED GLOBAL)
-  endif()
+  add_library(vtkm::tbb UNKNOWN IMPORTED GLOBAL)

  set_target_properties(vtkm::tbb PROPERTIES
      INTERFACE_INCLUDE_DIRECTORIES "${TBB_INCLUDE_DIRS}")
@ -86,7 +79,6 @@ endif()


 if(VTKm_ENABLE_OPENMP AND NOT TARGET vtkm::openmp)
-  cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
  find_package(OpenMP 4.0 REQUIRED COMPONENTS CXX QUIET)

  add_library(vtkm::openmp INTERFACE IMPORTED GLOBAL)
@ -135,10 +127,13 @@ if(VTKm_ENABLE_CUDA)
      requires_static_builds TRUE
    )

+    target_compile_options(vtkm_cuda INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)

-    set_target_properties(vtkm_cuda PROPERTIES
-      INTERFACE_COMPILE_OPTIONS $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
-    )
+    if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND
+      CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
+      # CUDA 11+ deprecated C++11 support
+      target_compile_features(vtkm_cuda INTERFACE cxx_std_14)
+    endif()

    # add the -gencode flags so that all cuda code
    # way compiled properly
@ -172,7 +167,10 @@ if(VTKm_ENABLE_CUDA)
    # 6 - volta
    #   - Uses: --generate-code=arch=compute_70,code=sm_70
    # 7 - turing
-    #   - Uses: --generate-code=arch=compute_75code=sm_75
+    #   - Uses: --generate-code=arch=compute_75,code=sm_75
+    # 8 - ampere
+    #   - Uses: --generate-code=arch=compute_80,code=sm_80
+    #   - Uses: --generate-code=arch=compute_86,code=sm_86
    # 8 - all
    #   - Uses: --generate-code=arch=compute_30,code=sm_30
    #   - Uses: --generate-code=arch=compute_35,code=sm_35
@ -180,12 +178,14 @@ if(VTKm_ENABLE_CUDA)
    #   - Uses: --generate-code=arch=compute_60,code=sm_60
    #   - Uses: --generate-code=arch=compute_70,code=sm_70
    #   - Uses: --generate-code=arch=compute_75,code=sm_75
+    #   - Uses: --generate-code=arch=compute_80,code=sm_80
+    #   - Uses: --generate-code=arch=compute_86,code=sm_86
    # 8 - none
    #

    #specify the property
    set(VTKm_CUDA_Architecture "native" CACHE STRING "Which GPU Architecture(s) to compile for")
-    set_property(CACHE VTKm_CUDA_Architecture PROPERTY STRINGS native fermi kepler maxwell pascal volta turing all none)
+    set_property(CACHE VTKm_CUDA_Architecture PROPERTY STRINGS native fermi kepler maxwell pascal volta turing ampere all none)

    #detect what the property is set too
    if(VTKm_CUDA_Architecture STREQUAL "native")
@ -239,23 +239,124 @@ if(VTKm_ENABLE_CUDA)
      set(arch_flags --generate-code=arch=compute_70,code=sm_70)
    elseif(VTKm_CUDA_Architecture STREQUAL "turing")
      set(arch_flags --generate-code=arch=compute_75,code=sm_75)
+    elseif(VTKm_CUDA_Architecture STREQUAL "ampere")
+      set(arch_flags --generate-code=arch=compute_80,code=sm_80)
+      set(arch_flags --generate-code=arch=compute_86,code=sm_86)
    elseif(VTKm_CUDA_Architecture STREQUAL "all")
      set(arch_flags --generate-code=arch=compute_30,code=sm_30
                     --generate-code=arch=compute_35,code=sm_35
                     --generate-code=arch=compute_50,code=sm_50
                     --generate-code=arch=compute_60,code=sm_60
                     --generate-code=arch=compute_70,code=sm_70
-                     --generate-code=arch=compute_75,code=sm_75)
+                     --generate-code=arch=compute_75,code=sm_75
+                     --generate-code=arch=compute_80,code=sm_80
+                     --generate-code=arch=compute_86,code=sm_86)
    endif()

    string(REPLACE ";" " " arch_flags "${arch_flags}")
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
+    if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
+      #We propagate cuda flags via target* options so that they
+      #export cleanly
+      set(CMAKE_CUDA_ARCHITECTURES OFF)
+      target_compile_options(vtkm_cuda INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:${arch_flags}>)
+      target_link_options(vtkm_cuda INTERFACE $<DEVICE_LINK:${arch_flags}>)
+    else()
+      # Before 3.18 we had to use CMAKE_CUDA_FLAGS as we had no way
+      # to propagate flags to the device link step
+      set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
+    endif()

    # This needs to be lower-case for the property to be properly exported
    # CMake 3.15 we can add `cuda_architecture_flags` to the EXPORT_PROPERTIES
    # target property to have this automatically exported for us
-    set_target_properties(vtkm_cuda PROPERTIES cuda_architecture_flags "${arch_flags}")
    set(VTKm_CUDA_Architecture_Flags "${arch_flags}")
+    set_target_properties(vtkm_cuda PROPERTIES cuda_architecture_flags "${arch_flags}")
+    unset(arch_flags)
+  endif()
+endif()
+
+#-----------------------------------------------------------------------------
+# Kokkos with its Cuda backend enabled, expects everything to be compiled using its
+# `nvcc-wrapper` as the CXX compiler. As the name suggests, nvcc-wrapper is a wrapper around
+# Cuda's nvcc compiler. Kokkos targets have all of the flags meant for the nvcc compiler set as the
+# CXX compiler flags. This function changes all such flags to be CUDA flags so that we can use
+# CMake and vtk-m's existing infrastructure to compile for Cuda and Host separately. Without this
+# all of the files will be compiled using nvcc which can be very time consuming. It can also have
+# issues with calling host functions from device functions when compiling code for other backends.
+function(kokkos_fix_compile_options)
+  set(targets Kokkos::kokkos)
+  set(seen_targets)
+  set(cuda_arch)
+
+  while(targets)
+    list(GET targets 0 target_name)
+    list(REMOVE_AT targets 0)
+
+    get_target_property(link_libraries ${target_name} INTERFACE_LINK_LIBRARIES)
+    foreach(lib_target IN LISTS link_libraries)
+      if (TARGET ${lib_target})
+        if (lib_target IN_LIST seen_targets)
+          continue()
+        endif()
+
+        list(APPEND seen_targets ${lib_target})
+        list(APPEND targets ${lib_target})
+        get_target_property(compile_options ${lib_target} INTERFACE_COMPILE_OPTIONS)
+        if (compile_options)
+          string(REGEX MATCH "[$]<[$]<COMPILE_LANGUAGE:CXX>:-Xcompiler;.*>" cxx_compile_options "${compile_options}")
+          string(REGEX MATCH "-arch=sm_[0-9][0-9]" cuda_arch "${compile_options}")
+          string(REPLACE "-Xcompiler;" "" cxx_compile_options "${cxx_compile_options}")
+          list(TRANSFORM compile_options REPLACE "--relocatable-device-code=true" "") #We use CMake for this flag
+          list(TRANSFORM compile_options REPLACE "COMPILE_LANGUAGE:CXX" "COMPILE_LANGUAGE:CUDA")
+          list(APPEND compile_options "${cxx_compile_options}")
+          set_property(TARGET ${lib_target} PROPERTY INTERFACE_COMPILE_OPTIONS ${compile_options})
+        endif()
+
+        set_property(TARGET ${lib_target} PROPERTY INTERFACE_LINK_OPTIONS "")
+      endif()
+    endforeach()
+  endwhile()
+
+  set_property(TARGET vtkm::kokkos PROPERTY INTERFACE_LINK_OPTIONS "$<DEVICE_LINK:${cuda_arch}>")
+  if (OPENMP IN_LIST Kokkos_DEVICES)
+    set_property(TARGET vtkm::kokkos PROPERTY INTERFACE_LINK_OPTIONS "$<HOST_LINK:-fopenmp>")
+  endif()
+endfunction()
+
+if(VTKm_ENABLE_KOKKOS AND NOT TARGET vtkm::kokkos)
+  cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
+
+  find_package(Kokkos REQUIRED)
+  if (CUDA IN_LIST Kokkos_DEVICES)
+    cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
+    enable_language(CUDA)
+
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND
+       CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.0" AND
+       CMAKE_BUILD_TYPE STREQUAL "Release")
+      message(WARNING "There is a known issue with Cuda 10 and -O3 optimization. Switching to -O2. Please refer to issue #555.")
+      string(REPLACE "-O3" "-O2" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
+      string(REPLACE "-O3" "-O2" CMAKE_CUDA_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
+    endif()
+
+    string(REGEX MATCH "[0-9][0-9]$" cuda_arch ${Kokkos_ARCH})
+    set(CMAKE_CUDA_ARCHITECTURES ${cuda_arch})
+    message(STATUS "Detected Cuda arch from Kokkos: ${cuda_arch}")
+
+    add_library(vtkm::kokkos_cuda INTERFACE IMPORTED GLOBAL)
+  elseif(HIP IN_LIST Kokkos_DEVICES)
+    cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
+    enable_language(HIP)
+    add_library(vtkm::kokkos_hip INTERFACE IMPORTED GLOBAL)
+    set_property(TARGET Kokkos::kokkoscore PROPERTY INTERFACE_COMPILE_OPTIONS "")
+    set_property(TARGET Kokkos::kokkoscore PROPERTY INTERFACE_LINK_OPTIONS "")
+  endif()
+
+  add_library(vtkm::kokkos INTERFACE IMPORTED GLOBAL)
+  set_target_properties(vtkm::kokkos PROPERTIES INTERFACE_LINK_LIBRARIES "Kokkos::kokkos")
+
+  if (TARGET vtkm::kokkos_cuda)
+    kokkos_fix_compile_options()
  endif()
 endif()

--- a/CMake/VTKmMPI.cmake
+++ b/CMake/VTKmMPI.cmake
@ -1,24 +0,0 @@
-##============================================================================
-##  Copyright (c) Kitware, Inc.
-##  All rights reserved.
-##  See LICENSE.txt for details.
-##
-##  This software is distributed WITHOUT ANY WARRANTY; without even
-##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-##  PURPOSE.  See the above copyright notice for more information.
-##============================================================================
-
-if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
-  if(CMAKE_VERSION VERSION_LESS 3.15)
-    #While CMake 3.10 introduced the new MPI module.
-    #Fixes related to MPI+CUDA that VTK-m needs are
-    #only found in CMake 3.15+.
-    find_package(MPI REQUIRED MODULE)
-  else()
-    #clunky but we need to make sure we use the upstream module if it exists
-    set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
-    set(CMAKE_MODULE_PATH "")
-    find_package(MPI REQUIRED MODULE)
-    set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
-  endif()
-endif()
--- a/CMake/VTKmRenderingContexts.cmake
+++ b/CMake/VTKmRenderingContexts.cmake
@ -51,15 +51,7 @@ function(vtkm_find_gl)

  #Find GL
  if(DO_GL_FIND AND NOT TARGET OpenGL::GL)
-    if(CMAKE_VERSION VERSION_LESS 3.10)
-      find_package(OpenGL ${GL_REQUIRED} ${QUIETLY} MODULE)
-    else()
-      #clunky but we need to make sure we use the upstream module if it exists
-      set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
-      set(CMAKE_MODULE_PATH "")
-      find_package(OpenGL ${GL_REQUIRED} ${QUIETLY} MODULE)
-      set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
-    endif()
+    find_package(OpenGL ${GL_REQUIRED} ${QUIETLY} MODULE)
  endif()

  #Find GLEW
@ -69,6 +61,20 @@ function(vtkm_find_gl)

  if(DO_GLUT_FIND AND NOT TARGET GLUT::GLUT)
    find_package(GLUT ${GLUT_REQUIRED} ${QUIETLY})
+
+    if(APPLE AND CMAKE_VERSION VERSION_LESS 3.19.2)
+      get_target_property(lib_path GLUT::GLUT IMPORTED_LOCATION)
+      if(EXISTS "${lib_path}.tbd")
+        set_target_properties(GLUT::GLUT PROPERTIES
+          IMPORTED_LOCATION "${lib_path}.tbd")
+      endif()
+
+      get_target_property(lib_path GLUT::Cocoa IMPORTED_LOCATION)
+      if(EXISTS "${lib_path}.tbd")
+        set_target_properties(GLUT::Cocoa PROPERTIES
+          IMPORTED_LOCATION "${lib_path}.tbd")
+      endif()
+    endif()
  endif()

 endfunction()
--- a/CMake/VTKmWrappers.cmake
+++ b/CMake/VTKmWrappers.cmake
@ -10,9 +10,13 @@

 include(CMakeParseArguments)

+include(VTKmCMakeBackports)
 include(VTKmDeviceAdapters)
 include(VTKmCPUVectorization)
-include(VTKmMPI)
+
+if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
+  find_package(MPI REQUIRED MODULE)
+endif()

 #-----------------------------------------------------------------------------
 # INTERNAL FUNCTIONS
@ -29,17 +33,17 @@ function(vtkm_get_kit_name kitvar)
  # Optional second argument to get dir_prefix.
  if (${ARGC} GREATER 1)
    set(${ARGV1} "${dir_prefix}" PARENT_SCOPE)
-  endif (${ARGC} GREATER 1)
+  endif ()
 endfunction(vtkm_get_kit_name)

 #-----------------------------------------------------------------------------
 function(vtkm_pyexpander_generated_file generated_file_name)
  # If pyexpander is available, add targets to build and check
-  if(PYEXPANDER_FOUND AND PYTHONINTERP_FOUND)
+  if(PYEXPANDER_FOUND AND TARGET Python::Interpreter)
    add_custom_command(
      OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${generated_file_name}.checked
      COMMAND ${CMAKE_COMMAND}
-        -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}
+        -DPYTHON_EXECUTABLE=${Python_EXECUTABLE}
        -DPYEXPANDER_COMMAND=${PYEXPANDER_COMMAND}
        -DSOURCE_FILE=${CMAKE_CURRENT_SOURCE_DIR}/${generated_file_name}
        -DGENERATED_FILE=${CMAKE_CURRENT_BINARY_DIR}/${generated_file_name}
@ -62,7 +66,7 @@ function(vtkm_generate_export_header lib_name)

  # Now generate a header that holds the macros needed to easily export
  # template classes. This
-  string(TOUPPER ${kit_name} BASE_NAME_UPPER)
+  string(TOUPPER ${lib_name} BASE_NAME_UPPER)
  set(EXPORT_MACRO_NAME "${BASE_NAME_UPPER}")

  set(EXPORT_IS_BUILT_STATIC 0)
@ -77,17 +81,17 @@ function(vtkm_generate_export_header lib_name)
  if(NOT EXPORT_IMPORT_CONDITION)
    #set EXPORT_IMPORT_CONDITION to what the DEFINE_SYMBOL would be when
    #building shared
-    set(EXPORT_IMPORT_CONDITION ${kit_name}_EXPORTS)
+    set(EXPORT_IMPORT_CONDITION ${lib_name}_EXPORTS)
  endif()


  configure_file(
      ${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
-      ${VTKm_BINARY_DIR}/include/${dir_prefix}/${kit_name}_export.h
+      ${VTKm_BINARY_DIR}/include/${dir_prefix}/${lib_name}_export.h
    @ONLY)

  if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
-    install(FILES ${VTKm_BINARY_DIR}/include/${dir_prefix}/${kit_name}_export.h
+    install(FILES ${VTKm_BINARY_DIR}/include/${dir_prefix}/${lib_name}_export.h
      DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/${dir_prefix}
      )
  endif()
@ -110,6 +114,35 @@ function(vtkm_declare_headers)
  vtkm_install_headers("${dir_prefix}" ${ARGN})
 endfunction(vtkm_declare_headers)

+#-----------------------------------------------------------------------------
+function(vtkm_setup_job_pool)
+  # The VTK-m job pool is only used for components that use large amounts
+  # of memory such as worklet tests, filters, and filter tests
+  get_property(vtkm_pool_established
+    GLOBAL PROPERTY VTKM_JOB_POOL_ESTABLISHED SET)
+  if(NOT vtkm_pool_established)
+    # The VTK-m filters uses large amounts of memory to compile as it does lots
+    # of template expansion. To reduce the amount of tension on the machine when
+    # using generators such as ninja we restrict the number of VTK-m enabled
+    # compilation units to be built at the same time.
+    #
+    # We try to allocate a pool size where we presume each compilation process
+    # will require 3GB of memory. To allow for other NON VTK-m jobs we leave at
+    # least 3GB of memory as 'slop'.
+    cmake_host_system_information(RESULT vtkm_mem_ QUERY TOTAL_PHYSICAL_MEMORY)
+    math(EXPR vtkm_pool_size "(${vtkm_mem_}/3072)-1")
+
+    if (vtkm_pool_size LESS 1)
+      set(vtkm_pool_size 1)
+    endif ()
+
+    set_property(GLOBAL APPEND
+      PROPERTY
+        JOB_POOLS vtkm_pool=${vtkm_pool_size})
+    set_property(GLOBAL PROPERTY VTKM_JOB_POOL_ESTABLISHED TRUE)
+  endif()
+endfunction()
+
 #-----------------------------------------------------------------------------
 # FORWARD FACING API

@ -117,9 +150,14 @@ endfunction(vtkm_declare_headers)
 # Pass to consumers extra compile flags they need to add to CMAKE_CUDA_FLAGS
 # to have CUDA compatibility.
 #
-# This is required as currently the -sm/-gencode flags when specified inside
-# COMPILE_OPTIONS / target_compile_options are not propagated to the device
-# linker. Instead they must be specified in CMAKE_CUDA_FLAGS
+# If VTK-m was built with CMake 3.18+ and you are using CMake 3.18+ and have
+# a cmake_minimum_required of 3.18 or have set policy CMP0105 to new, this will
+# return an empty string as the `vtkm::cuda` target will correctly propagate
+# all the necessary flags.
+#
+# This is required for CMake < 3.18 as they don't support the `$<DEVICE_LINK>`
+# generator expression for `target_link_options`. Instead they need to be
+# specified in CMAKE_CUDA_FLAGS
 #
 #
 # add_library(lib_that_uses_vtkm ...)
@ -127,7 +165,18 @@ endfunction(vtkm_declare_headers)
 # target_link_libraries(lib_that_uses_vtkm PRIVATE vtkm_filter)
 #
 function(vtkm_get_cuda_flags settings_var)
+
  if(TARGET vtkm::cuda)
+    if(POLICY CMP0105)
+      cmake_policy(GET CMP0105 does_device_link)
+      get_property(arch_flags
+        TARGET vtkm::cuda
+        PROPERTY INTERFACE_LINK_OPTIONS)
+      if(arch_flags AND CMP0105 STREQUAL "NEW")
+        return()
+      endif()
+    endif()
+
    get_property(arch_flags
      TARGET    vtkm::cuda
      PROPERTY  cuda_architecture_flags)
@ -203,8 +252,14 @@ endfunction()
 #
 #
 #  MODIFY_CUDA_FLAGS: If enabled will add the required -arch=<ver> flags
-#  that VTK-m was compiled with. If you have multiple libraries that use
-#  VTK-m calling `vtkm_add_target_information` multiple times with
+#  that VTK-m was compiled with.
+#
+#  If VTK-m was built with CMake 3.18+ and you are using CMake 3.18+ and have
+#  a cmake_minimum_required of 3.18 or have set policy CMP0105 to new, this will
+#  return an empty string as the `vtkm::cuda` target will correctly propagate
+#  all the necessary flags.
+#
+#  Note: calling `vtkm_add_target_information` multiple times with
 #  `MODIFY_CUDA_FLAGS` will cause duplicate compiler flags. To resolve this issue
 #  you can; pass all targets and sources to a single `vtkm_add_target_information`
 #  call, have the first one use `MODIFY_CUDA_FLAGS`, or use the provided
@ -246,10 +301,11 @@ function(vtkm_add_target_information uses_vtkm_target)
    ${ARGN}
    )

-
  if(VTKm_TI_MODIFY_CUDA_FLAGS)
-    vtkm_get_cuda_flags(CMAKE_CUDA_FLAGS)
-    set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} PARENT_SCOPE)
+    vtkm_get_cuda_flags(cuda_flags)
+    if(cuda_flags)
+      set(CMAKE_CUDA_FLAGS ${cuda_flags} PARENT_SCOPE)
+    endif()
  endif()

  set(targets ${uses_vtkm_target})
@ -262,6 +318,8 @@ function(vtkm_add_target_information uses_vtkm_target)
  # set the required target properties
  set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
  set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+  # CUDA_ARCHITECTURES added in CMake 3.18
+  set_target_properties(${targets} PROPERTIES CUDA_ARCHITECTURES OFF)

  if(VTKm_TI_DROP_UNUSED_SYMBOLS)
    foreach(target IN LISTS targets)
@ -269,6 +327,12 @@ function(vtkm_add_target_information uses_vtkm_target)
    endforeach()
  endif()

+  if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
+    set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "CUDA")
+  elseif(TARGET vtkm::kokkos_hip)
+    set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "HIP")
+  endif()
+
  # Validate that following:
  #   - We are building with CUDA enabled.
  #   - We are building a VTK-m library or a library that wants cross library
@ -276,11 +340,15 @@ function(vtkm_add_target_information uses_vtkm_target)
  #
  # This is required as CUDA currently doesn't support device side calls across
  # dynamic library boundaries.
-  if(TARGET vtkm::cuda)
-    set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "CUDA")
+  if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
    foreach(target IN LISTS targets)
      get_target_property(lib_type ${target} TYPE)
-      get_target_property(requires_static vtkm::cuda requires_static_builds)
+      if (TARGET vtkm::cuda)
+        get_target_property(requires_static vtkm::cuda requires_static_builds)
+      endif()
+      if (TARGET vtkm::kokkos)
+        get_target_property(requires_static vtkm::kokkos requires_static_builds)
+      endif()

      if(requires_static AND ${lib_type} STREQUAL "SHARED_LIBRARY" AND VTKm_TI_EXTENDS_VTKM)
        #We provide different error messages based on if we are building VTK-m
@ -313,10 +381,11 @@ endfunction()
 #   SOURCES <source_list>
 #   TEMPLATE_SOURCES <.hxx >
 #   HEADERS <header list>
+#   USE_VTKM_JOB_POOL
 #   [ DEVICE_SOURCES <source_list> ]
 #   )
 function(vtkm_library)
-  set(options OBJECT STATIC SHARED)
+  set(options OBJECT STATIC SHARED USE_VTKM_JOB_POOL)
  set(oneValueArgs NAME)
  set(multiValueArgs SOURCES HEADERS TEMPLATE_SOURCES DEVICE_SOURCES)
  cmake_parse_arguments(VTKm_LIB
@ -348,7 +417,7 @@ function(vtkm_library)
                              EXTENDS_VTKM
                              DEVICE_SOURCES ${VTKm_LIB_DEVICE_SOURCES}
                              )
-  if(NOT VTKm_USE_DEFAULT_SYMBOL_VISIBILITY)
+  if(VTKm_HIDE_PRIVATE_SYMBOLS)
    set_property(TARGET ${lib_name} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
    set_property(TARGET ${lib_name} PROPERTY CXX_VISIBILITY_PRESET "hidden")
  endif()
@ -357,7 +426,6 @@ function(vtkm_library)
  set_property(TARGET ${lib_name} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
  set_property(TARGET ${lib_name} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})

-
  # allow the static cuda runtime find the driver (libcuda.dyllib) at runtime.
  if(APPLE)
    set_property(TARGET ${lib_name} PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
@ -402,4 +470,9 @@ function(vtkm_library)
    RUNTIME DESTINATION ${VTKm_INSTALL_BIN_DIR}
    )

+  if(VTKm_LIB_USE_VTKM_JOB_POOL)
+    vtkm_setup_job_pool()
+    set_property(TARGET ${lib_name} PROPERTY JOB_POOL_COMPILE vtkm_pool)
+  endif()
+
 endfunction(vtkm_library)
--- a/CMake/doxyfile.in
+++ b/CMake/doxyfile.in
@ -357,8 +357,6 @@ ALLEXTERNALS           = NO

 EXTERNAL_GROUPS        = YES

-PERL_PATH              = /usr/bin/perl
-
 #---------------------------------------------------------------------------
 # Configuration options related to the dot tool
 #---------------------------------------------------------------------------
--- a/CMake/patches/3.15/FindMPI.cmake
+++ b/CMake/patches/3.15/FindMPI.cmake
@ -306,18 +306,6 @@ set(_MPI_XL_Fortran_COMPILER_NAMES         mpixlf95   mpixlf95_r mpxlf95 mpxlf95
                                           mpixlf77   mpixlf77_r mpxlf77 mpxlf77_r
                                           mpixlf     mpixlf_r   mpxlf   mpxlf_r)

-# Allow CMake 3.8.0 to use OS specific `separate_arguments` signature.
-# Otherwise use the 3.9 NATIVE_COMMAND feature that does this for us
-# automatically
-if (CMAKE_VERSION VERSION_LESS "3.9.0")
-  if (WIN32 AND NOT CYGWIN)
-    set(_MPI_parse_kind WINDOWS_COMMAND)
-  else ()
-    set(_MPI_parse_kind UNIX_COMMAND)
-  endif ()
-else ()
-  set(_MPI_parse_kind NATIVE_COMMAND)
-endif ()

 # Prepend vendor-specific compiler wrappers to the list. If we don't know the compiler,
 # attempt all of them.
@ -353,9 +341,9 @@ unset(_MPIEXEC_NAMES_BASE)

 function (_MPI_check_compiler LANG QUERY_FLAG OUTPUT_VARIABLE RESULT_VARIABLE)
  if(DEFINED MPI_${LANG}_COMPILER_FLAGS)
-    separate_arguments(_MPI_COMPILER_WRAPPER_OPTIONS ${_MPI_parse_kind} "${MPI_${LANG}_COMPILER_FLAGS}")
+    separate_arguments(_MPI_COMPILER_WRAPPER_OPTIONS NATIVE_COMMAND "${MPI_${LANG}_COMPILER_FLAGS}")
  else()
-    separate_arguments(_MPI_COMPILER_WRAPPER_OPTIONS ${_MPI_parse_kind} "${MPI_COMPILER_FLAGS}")
+    separate_arguments(_MPI_COMPILER_WRAPPER_OPTIONS NATIVE_COMMAND "${MPI_COMPILER_FLAGS}")
  endif()
  execute_process(
    COMMAND ${MPI_${LANG}_COMPILER} ${_MPI_COMPILER_WRAPPER_OPTIONS} ${QUERY_FLAG}
@ -643,7 +631,7 @@ function (_MPI_interrogate_compiler LANG)
  if (NOT MPI_ALL_INCLUDE_PATHS)
    _MPI_check_compiler(${LANG} "-showme:incdirs" MPI_INCDIRS_CMDLINE MPI_INCDIRS_COMPILER_RETURN)
    if(MPI_INCDIRS_COMPILER_RETURN)
-      separate_arguments(MPI_ALL_INCLUDE_PATHS ${_MPI_parse_kind} "${MPI_INCDIRS_CMDLINE}")
+      separate_arguments(MPI_ALL_INCLUDE_PATHS NATIVE_COMMAND "${MPI_INCDIRS_CMDLINE}")
    endif()
  endif()

@ -711,7 +699,7 @@ function (_MPI_interrogate_compiler LANG)
  if (NOT MPI_ALL_LINK_PATHS)
    _MPI_check_compiler(${LANG} "-showme:libdirs" MPI_LIBDIRS_CMDLINE MPI_LIBDIRS_COMPILER_RETURN)
    if(MPI_LIBDIRS_COMPILER_RETURN)
-      separate_arguments(MPI_ALL_LINK_PATHS ${_MPI_parse_kind} "${MPI_LIBDIRS_CMDLINE}")
+      separate_arguments(MPI_ALL_LINK_PATHS NATIVE_COMMAND "${MPI_LIBDIRS_CMDLINE}")
    endif()
  endif()

@ -1170,7 +1158,7 @@ macro(_MPI_create_imported_target LANG)

  set_property(TARGET MPI::MPI_${LANG} PROPERTY INTERFACE_LINK_LIBRARIES "")
  if(MPI_${LANG}_LINK_FLAGS)
-    separate_arguments(_MPI_${LANG}_LINK_FLAGS ${_MPI_parse_kind} "${MPI_${LANG}_LINK_FLAGS}")
+    separate_arguments(_MPI_${LANG}_LINK_FLAGS NATIVE_COMMAND "${MPI_${LANG}_LINK_FLAGS}")
    if(CMAKE_VERSION VERSION_LESS 3.13)
      set_property(TARGET MPI::MPI_${LANG} APPEND PROPERTY INTERFACE_LINK_LIBRARIES "${_MPI_${LANG}_LINK_FLAGS}")
    else()
@ -1362,7 +1350,7 @@ if(NOT MPI_IGNORE_LEGACY_VARIABLES)
    unset(MPI_${LANG}_EXTRA_COMPILE_DEFINITIONS)
    unset(MPI_${LANG}_EXTRA_COMPILE_OPTIONS)
    if(MPI_${LANG}_COMPILE_FLAGS)
-      separate_arguments(MPI_SEPARATE_FLAGS ${_MPI_parse_kind} "${MPI_${LANG}_COMPILE_FLAGS}")
+      separate_arguments(MPI_SEPARATE_FLAGS NATIVE_COMMAND "${MPI_${LANG}_COMPILE_FLAGS}")
      foreach(_MPI_FLAG IN LISTS MPI_SEPARATE_FLAGS)
        if("${_MPI_FLAG}" MATCHES "^ *-D([^ ]+)")
          list(APPEND MPI_${LANG}_EXTRA_COMPILE_DEFINITIONS "${CMAKE_MATCH_1}")
--- a/CMake/patches/FindMPI.cmake
+++ b/CMake/patches/FindMPI.cmake
@ -0,0 +1,18 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# This module is already included in new versions of CMake
+if(CMAKE_VERSION VERSION_LESS 3.15)
+  include(${CMAKE_CURRENT_LIST_DIR}/3.15/FindMPI.cmake)
+else()
+  include(${CMAKE_ROOT}/Modules/FindMPI.cmake)
+endif()
--- a/CMake/patches/README.md
+++ b/CMake/patches/README.md
@ -0,0 +1,7 @@
+# CMake backports
+
+This directory contains backports from newer CMake versions to help support
+actually using older CMake versions for building VTK-m. The directory name is the
+minimum version of CMake for which the contained files are no longer necessary.
+For example, the files under the `3.15` directory are not needed for 3.15 or
+3.16, but are for 3.14.
--- a/CMake/testing/VTKmCheckPyexpander.cmake
+++ b/CMake/testing/VTKmCheckPyexpander.cmake
@ -37,11 +37,19 @@ if(NOT GENERATED_FILE)
  return()
 endif()

-execute_process(
-  COMMAND ${PYTHON_EXECUTABLE} ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
-  RESULT_VARIABLE pyexpander_result
-  OUTPUT_VARIABLE pyexpander_output
+if(MSVC)
+  execute_process(
+    COMMAND ${PYTHON_EXECUTABLE} ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
+    RESULT_VARIABLE pyexpander_result
+    OUTPUT_VARIABLE pyexpander_output
  )
+else()
+  execute_process(
+    COMMAND ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
+    RESULT_VARIABLE pyexpander_result
+    OUTPUT_VARIABLE pyexpander_output
+  )
+endif()

 if(pyexpander_result)
  # If pyexpander returned non-zero, it failed.
--- a/CMake/testing/VTKmCheckSourceInInstall.cmake
+++ b/CMake/testing/VTKmCheckSourceInInstall.cmake
@ -18,6 +18,7 @@
 #        -DVTKm_INSTALL_INCLUDE_DIR=<VTKm_INSTALL_INCLUDE_DIR>
 #        -DVTKm_ENABLE_RENDERING=<VTKm_ENABLE_RENDERING>
 #        -DVTKm_ENABLE_LOGGING=<VTKm_ENABLE_LOGGING>
+#        -DVTKm_ENABLE_HDF5_IO=<VTKm_ENABLE_HDF5_IO>
 #        -P <VTKm_SOURCE_DIR>/CMake/testing/VTKMCheckSourceInInstall.cmake
 ##

@ -39,7 +40,9 @@ endif ()
 if (NOT DEFINED VTKm_ENABLE_LOGGING)
  message(FATAL_ERROR "VTKm_ENABLE_LOGGING not defined.")
 endif ()
-
+if (NOT DEFINED VTKm_ENABLE_HDF5_IO)
+  message(FATAL_ERROR "VTKm_ENABLE_HDF5_IO not defined.")
+endif()

 include(CMakeParseArguments)
 # -----------------------------------------------------------------------------
@ -104,13 +107,25 @@ function(do_verify root_dir prefix)
  #Step 1. Setup the extensions to check, and all file and directory
  # extensions
  set(files_extensions
-    *.hpp #needed for diy and taotuple
+    *.hpp #needed for diy
    *.h
    *.hxx
    )

  set(file_exceptions
-    cont/ColorTablePrivate.hxx
+    thirdparty/diy/vtkmdiy/cmake/mpi_types.h
+
+    # Ignore deprecated virtual classes (which are not installed if VTKm_NO_DEPRECATED_VIRTUAL
+    # is on). These exceptions can be removed when these files are completely removed.
+    cont/ArrayHandleVirtual.h
+    cont/ArrayHandleVirtual.hxx
+    cont/ArrayHandleVirtualCoordinates.h
+    cont/CellLocator.h
+    cont/PointLocator.h
+    cont/StorageVirtual.h
+    cont/StorageVirtual.hxx
+    exec/CellLocator.h
+    exec/PointLocator.h
    )

  #by default every header in a testing directory doesn't need to be installed
@ -123,7 +138,12 @@ function(do_verify root_dir prefix)
  if(NOT VTKm_ENABLE_LOGGING)
    list(APPEND directory_exceptions thirdparty/loguru)
  endif()
-
+  if (NOT VTKm_ENABLE_HDF5_IO)
+    list(APPEND file_exceptions
+      io/ImageWriterHDF5.h
+      io/ImageReaderHDF5.h
+      )
+  endif()
  #Step 2. Verify the installed files match what headers are listed in each
  # source directory
  verify_install_per_dir("${VTKm_SOURCE_DIR}/vtkm"
--- a/CMake/testing/VTKmCompilerDynamicAnalysisFlags.cmake
+++ b/CMake/testing/VTKmCompilerDynamicAnalysisFlags.cmake
@ -0,0 +1,53 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+
+#-----------------------------------------------------------------------------
+# check if this is a sanitizer build. If so, set up the environment.
+
+function(vtkm_check_sanitizer_build)
+
+  # each line is a separate entry
+  set(blacklist_file_content "
+src:${VTKm_SOURCE_DIR}/vtkm/thirdparty/
+")
+  set (sanitizer_blacklist "${VTKm_BINARY_DIR}/sanitizer_blacklist.txt")
+  file(WRITE "${sanitizer_blacklist}" "${blacklist_file_content}")
+
+  set(sanitizer_flags )
+  foreach(sanitizer IN LISTS VTKm_USE_SANITIZER)
+    string(APPEND sanitizer_flags "-fsanitize=${sanitizer} ")
+  endforeach()
+  # Add the compiler flags for blacklist
+  if(VTKM_COMPILER_IS_CLANG)
+    string(APPEND sanitizer_flags "\"-fsanitize-blacklist=${sanitizer_blacklist}\"")
+  endif()
+  foreach (entity C CXX SHARED_LINKER EXE_LINKER)
+    set (CMAKE_${entity}_FLAGS "${CMAKE_${entity}_FLAGS} ${sanitizer_flags}" PARENT_SCOPE)
+  endforeach ()
+
+endfunction()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
+   CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+  set(CMAKE_COMPILER_IS_CLANGXX 1)
+endif()
+
+if(VTKM_COMPILER_IS_CLANG OR VTKM_COMPILER_IS_GNU)
+  vtkm_option(VTKm_ENABLE_SANITIZER "Build with sanitizer support." OFF)
+  mark_as_advanced(VTKm_ENABLE_SANITIZER)
+
+  set(VTKm_USE_SANITIZER "address" CACHE STRING "The sanitizer to use")
+  mark_as_advanced(VTKm_USE_SANITIZER)
+
+  if(VTKm_ENABLE_SANITIZER)
+    vtkm_check_sanitizer_build()
+  endif()
+
+endif()
--- a/CMake/testing/VTKmTestInstall.cmake
+++ b/CMake/testing/VTKmTestInstall.cmake
@ -17,6 +17,7 @@ function(vtkm_test_install )
      "-DVTKm_INSTALL_INCLUDE_DIR=${VTKm_INSTALL_INCLUDE_DIR}"
      "-DVTKm_ENABLE_RENDERING=${VTKm_ENABLE_RENDERING}"
      "-DVTKm_ENABLE_LOGGING=${VTKm_ENABLE_LOGGING}"
+      "-DVTKm_ENABLE_HDF5_IO=${VTKm_ENABLE_HDF5_IO}"
      )

    #By having this as separate tests using fixtures, it will allow us in
@ -110,6 +111,10 @@ function(vtkm_test_against_install dir)
    )
  endif()

+  if(TARGET vtkm::kokkos)
+    list(APPEND args "-DKokkos_DIR=${Kokkos_DIR}")
+  endif()
+
  #determine if the test is expected to compile or fail to build. We use
  #this information to built the test name to make it clear to the user
  #what a 'passing' test means
--- a/CMake/testing/VTKmTestWrappers.cmake
+++ b/CMake/testing/VTKmTestWrappers.cmake
@ -10,6 +10,74 @@

 include(VTKmWrappers)

+function(vtkm_create_test_executable
+  prog_name
+  sources
+  libraries
+  defines
+  is_mpi_test
+  use_mpi
+  enable_all_backends
+  use_job_pool)
+
+  vtkm_diy_use_mpi_push()
+
+  set(prog ${prog_name})
+
+  # for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
+  if (is_mpi_test)
+    set(extraArgs EXTRA_INCLUDE "vtkm/thirdparty/diy/environment.h")
+
+    if (use_mpi)
+      vtkm_diy_use_mpi(ON)
+      set(prog "${prog}_mpi")
+    else()
+      vtkm_diy_use_mpi(OFF)
+      set(prog "${prog}_nompi")
+    endif()
+  else()
+    set(CMAKE_TESTDRIVER_BEFORE_TESTMAIN "")
+  endif()
+
+  #the creation of the test source list needs to occur before the labeling as
+  #cuda. This is so that we get the correctly named entry points generated
+  create_test_sourcelist(test_sources ${prog}.cxx ${sources} ${extraArgs})
+
+  add_executable(${prog} ${prog}.cxx ${sources})
+  vtkm_add_drop_unused_function_flags(${prog})
+  target_compile_definitions(${prog} PRIVATE ${defines})
+
+  #determine if we have a device that requires a separate compiler enabled
+  set(device_lang_enabled FALSE)
+  if( (TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda) OR (TARGET vtkm::kokkos_hip))
+    set(device_lang_enabled TRUE)
+  endif()
+
+  #if all backends are enabled, we can use the device compiler to handle all possible backends.
+  set(device_sources)
+  if(device_lang_enabled AND enable_all_backends)
+    set(device_sources ${sources})
+  endif()
+  vtkm_add_target_information(${prog} DEVICE_SOURCES ${device_sources})
+
+  if(NOT VTKm_USE_DEFAULT_SYMBOL_VISIBILITY)
+    set_property(TARGET ${prog} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
+    set_property(TARGET ${prog} PROPERTY CXX_VISIBILITY_PRESET "hidden")
+  endif()
+  set_property(TARGET ${prog} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
+  set_property(TARGET ${prog} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
+  set_property(TARGET ${prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
+
+  target_link_libraries(${prog} PRIVATE vtkm_cont_testing ${libraries})
+
+  if(use_job_pool)
+    vtkm_setup_job_pool()
+    set_property(TARGET ${prog} PROPERTY JOB_POOL_COMPILE vtkm_pool)
+  endif()
+
+  vtkm_diy_use_mpi_pop()
+endfunction()
+
 #-----------------------------------------------------------------------------
 # Declare unit tests, which should be in the same directory as a kit
 # (package, module, whatever you call it).  Usage:
@ -22,6 +90,7 @@ include(VTKmWrappers)
 #   TEST_ARGS <argument_list>
 #   MPI
 #   ALL_BACKENDS
+#   USE_VTKM_JOB_POOL
 #   <options>
 #   )
 #
@ -35,7 +104,9 @@ include(VTKmWrappers)
 #               test executable
 #
 # [MPI]       : when specified, the tests should be run in parallel if
-#               MPI is enabled.
+#               MPI is enabled. The tests should also be able to build and run
+#               When MPI is not available, i.e., they should not make explicit
+#               use of MPI and instead completely rely on DIY.
 # [ALL_BACKENDS] : when specified, the tests would test against all enabled
 #                  backends. Otherwise we expect the tests to manage the
 #                  backends at runtime.
@ -46,7 +117,7 @@ function(vtkm_unit_tests)
  endif()

  set(options)
-  set(global_options ${options} MPI ALL_BACKENDS)
+  set(global_options ${options} USE_VTKM_JOB_POOL MPI ALL_BACKENDS)
  set(oneValueArgs BACKEND NAME LABEL)
  set(multiValueArgs SOURCES LIBRARIES DEFINES TEST_ARGS)
  cmake_parse_arguments(VTKm_UT
@ -55,9 +126,6 @@ function(vtkm_unit_tests)
    )
  vtkm_parse_test_options(VTKm_UT_SOURCES "${options}" ${VTKm_UT_SOURCES})

-  set(test_prog)
-
-
  set(per_device_command_line_arguments "NONE")
  set(per_device_suffix "")
  set(per_device_timeout 180)
@ -90,8 +158,16 @@ function(vtkm_unit_tests)
      #serially
      list(APPEND per_device_serial TRUE)
    endif()
+    if (VTKm_ENABLE_KOKKOS)
+      list(APPEND per_device_command_line_arguments --device=kokkos)
+      list(APPEND per_device_suffix "KOKKOS")
+      #may require more time because of kernel generation.
+      list(APPEND per_device_timeout 1500)
+      list(APPEND per_device_serial FALSE)
+    endif()
  endif()

+  set(test_prog)
  if(VTKm_UT_NAME)
    set(test_prog "${VTKm_UT_NAME}")
  else()
@ -102,41 +178,50 @@ function(vtkm_unit_tests)
  # For Testing Purposes, we will set the default logging level to INFO
  list(APPEND vtkm_default_test_log_level "-v" "INFO")

+  # Add the path to the data directory so tests can find and use data files for testing
+  list(APPEND VTKm_UT_TEST_ARGS "--data-dir=${VTKm_SOURCE_DIR}/data/data")
+
+  # Add the path to the location where regression test images are to be stored
+  list(APPEND VTKm_UT_TEST_ARGS "--baseline-dir=${VTKm_SOURCE_DIR}/data/baseline")
+
+  # Add the path to the location where generated regression test images should be written
+  list(APPEND VTKm_UT_TEST_ARGS "--write-dir=${VTKm_BINARY_DIR}")
+
  if(VTKm_UT_MPI)
-    # for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
-    set(test_prog "${test_prog}_mpi")
-    set(extraArgs EXTRA_INCLUDE "vtkm/cont/testing/Testing.h"
-                  FUNCTION "vtkm::cont::testing::Environment env")
+    if (VTKm_ENABLE_MPI)
+      vtkm_create_test_executable(
+        ${test_prog}
+        "${VTKm_UT_SOURCES}"
+        "${VTKm_UT_LIBRARIES}"
+        "${VTKm_UT_DEFINES}"
+        ON   # is_mpi_test
+        ON   # use_mpi
+        ${enable_all_backends}
+        ${VTKm_UT_USE_VTKM_JOB_POOL})
+    endif()
+    if ((NOT VTKm_ENABLE_MPI) OR VTKm_ENABLE_DIY_NOMPI)
+      vtkm_create_test_executable(
+        ${test_prog}
+        "${VTKm_UT_SOURCES}"
+        "${VTKm_UT_LIBRARIES}"
+        "${VTKm_UT_DEFINES}"
+        ON   # is_mpi_test
+        OFF  # use_mpi
+        ${enable_all_backends}
+        ${VTKm_UT_USE_VTKM_JOB_POOL})
+    endif()
  else()
-    set(extraArgs)
+    vtkm_create_test_executable(
+      ${test_prog}
+      "${VTKm_UT_SOURCES}"
+      "${VTKm_UT_LIBRARIES}"
+      "${VTKm_UT_DEFINES}"
+      OFF   # is_mpi_test
+      OFF   # use_mpi
+      ${enable_all_backends}
+      ${VTKm_UT_USE_VTKM_JOB_POOL})
  endif()

-  #the creation of the test source list needs to occur before the labeling as
-  #cuda. This is so that we get the correctly named entry points generated
-  create_test_sourcelist(test_sources ${test_prog}.cxx ${VTKm_UT_SOURCES} ${extraArgs})
-
-  add_executable(${test_prog} ${test_prog}.cxx ${VTKm_UT_SOURCES})
-  vtkm_add_drop_unused_function_flags(${test_prog})
-  target_compile_definitions(${test_prog} PRIVATE ${VTKm_UT_DEFINES})
-
-
-  #if all backends are enabled, we can use cuda compiler to handle all possible backends.
-  set(device_sources )
-  if(TARGET vtkm::cuda AND enable_all_backends)
-    set(device_sources ${VTKm_UT_SOURCES})
-  endif()
-  vtkm_add_target_information(${test_prog} DEVICE_SOURCES ${device_sources})
-
-  if(NOT VTKm_USE_DEFAULT_SYMBOL_VISIBILITY)
-    set_property(TARGET ${test_prog} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
-    set_property(TARGET ${test_prog} PROPERTY CXX_VISIBILITY_PRESET "hidden")
-  endif()
-  set_property(TARGET ${test_prog} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
-  set_property(TARGET ${test_prog} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
-  set_property(TARGET ${test_prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
-
-  target_link_libraries(${test_prog} PRIVATE vtkm_cont ${VTKm_UT_LIBRARIES})
-
  list(LENGTH per_device_command_line_arguments number_of_devices)
  foreach(index RANGE ${number_of_devices})
    if(index EQUAL number_of_devices)
@ -158,25 +243,42 @@ function(vtkm_unit_tests)

    foreach (test ${VTKm_UT_SOURCES})
      get_filename_component(tname ${test} NAME_WE)
-      if(VTKm_UT_MPI AND VTKm_ENABLE_MPI)
-        add_test(NAME ${tname}${upper_backend}
-          COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS}
-                  $<TARGET_FILE:${test_prog}> ${tname} ${device_command_line_argument}
-                  ${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS} ${MPIEXEC_POSTFLAGS}
-          )
-      else()
+      if(VTKm_UT_MPI)
+        if (VTKm_ENABLE_MPI)
+          add_test(NAME ${tname}${upper_backend}_mpi
+            COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS}
+                    $<TARGET_FILE:${test_prog}_mpi> ${tname} ${device_command_line_argument}
+                    ${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS} ${MPIEXEC_POSTFLAGS}
+            )
+          set_tests_properties("${tname}${upper_backend}_mpi" PROPERTIES
+            LABELS "${upper_backend};${VTKm_UT_LABEL}"
+            TIMEOUT ${timeout}
+            RUN_SERIAL ${run_serial}
+            FAIL_REGULAR_EXPRESSION "runtime error")
+        endif() # VTKm_ENABLE_MPI
+        if ((NOT VTKm_ENABLE_MPI) OR VTKm_ENABLE_DIY_NOMPI)
+          add_test(NAME ${tname}${upper_backend}_nompi
+            COMMAND ${test_prog}_nompi ${tname} ${device_command_line_argument}
+                    ${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS}
+            )
+          set_tests_properties("${tname}${upper_backend}_nompi" PROPERTIES
+            LABELS "${upper_backend};${VTKm_UT_LABEL}"
+            TIMEOUT ${timeout}
+            RUN_SERIAL ${run_serial}
+            FAIL_REGULAR_EXPRESSION "runtime error")
+
+        endif() # VTKm_ENABLE_DIY_NOMPI
+      else() # VTKm_UT_MPI
        add_test(NAME ${tname}${upper_backend}
          COMMAND ${test_prog} ${tname} ${device_command_line_argument}
                  ${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS}
          )
-      endif()
-
-      set_tests_properties("${tname}${upper_backend}" PROPERTIES
-        LABELS "${upper_backend};${VTKm_UT_LABEL}"
-        TIMEOUT ${timeout}
-        RUN_SERIAL ${run_serial}
-        FAIL_REGULAR_EXPRESSION "runtime error"
-      )
+        set_tests_properties("${tname}${upper_backend}" PROPERTIES
+            LABELS "${upper_backend};${VTKm_UT_LABEL}"
+            TIMEOUT ${timeout}
+            RUN_SERIAL ${run_serial}
+            FAIL_REGULAR_EXPRESSION "runtime error")
+      endif() # VTKm_UT_MPI
    endforeach()
  endforeach()

--- a/Utilities/DynamicAnalysis/lsan.supp
+++ b/Utilities/DynamicAnalysis/lsan.supp
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -8,15 +8,17 @@
 ##  PURPOSE.  See the above copyright notice for more information.
 ##============================================================================

-# If you want CUDA support, you will need to have CMake 3.9 on Linux/OSX.
-# We require CMake 3.11 with the MSVC generator as the $<COMPILE_LANGUAGE:>
-# generator expression is not supported on older versions.
-cmake_minimum_required(VERSION 3.8...3.15 FATAL_ERROR)
+# If you want CUDA support, you will need to have CMake 3.13 on Linux/OSX.
+cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
 project (VTKm)

-if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
-  cmake_minimum_required(VERSION 3.11...3.15 FATAL_ERROR)
-endif()
+# We only allow c++14
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# When using C++14 support make sure you use the standard C++ extensions rather
+# than compiler-specific versions of the extensions (to preserve portability).
+set(CMAKE_CXX_EXTENSIONS OFF)

 # Update module path
 set(VTKm_CMAKE_MODULE_PATH ${VTKm_SOURCE_DIR}/CMake)
@ -79,8 +81,9 @@ endmacro ()

 # Configurable Options
 vtkm_option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
-vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
+vtkm_option(VTKm_ENABLE_KOKKOS "Enable Kokkos support" OFF)
 vtkm_option(VTKm_ENABLE_OPENMP "Enable OpenMP support" OFF)
+vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
 vtkm_option(VTKm_ENABLE_RENDERING "Enable rendering library" ON)
 vtkm_option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
 vtkm_option(VTKm_ENABLE_MPI "Enable MPI support" OFF)
@ -97,6 +100,11 @@ endif()
 vtkm_option(VTKm_USE_DOUBLE_PRECISION "Use double precision for floating point calculations" OFF)
 vtkm_option(VTKm_USE_64BIT_IDS "Use 64-bit indices." ON)

+vtkm_option(VTKm_ENABLE_HDF5_IO "Enable HDF5 support" OFF)
+if (VTKm_ENABLE_HDF5_IO)
+  find_package(HDF5 REQUIRED COMPONENTS HL)
+endif()
+
 # VTK-m will turn on logging by default, but will set the default
 # logging level to WARN.  This option should not be visible by default
 # in the GUI, as ERROR and WARN level logging should not interfere
@ -108,6 +116,17 @@ vtkm_option(VTKm_ENABLE_LOGGING "Enable VTKm Logging" ON)
 # performance.
 vtkm_option(VTKm_NO_ASSERT "Disable assertions in debugging builds." OFF)

+# The CUDA compiler (as of CUDA 11) takes a surprising long time to compile
+# kernels with assert in them. By default we turn off asserts when compiling
+# for CUDA devices.
+vtkm_option(VTKm_NO_ASSERT_CUDA "Disable assertions for CUDA devices." ON)
+
+# The HIP compiler (as of ROCm 3.7) takes a surprising long time to compile
+# kernels with assert in them they generate `printf` calls which are very
+# slow ( cause massive register spillage). By default we turn off asserts when
+# compiling for HIP devices.
+vtkm_option(VTKm_NO_ASSERT_HIP "Disable assertions for HIP devices." ON)
+
 # When VTK-m is embedded into larger projects that wish to make end user
 # applications they want to only install libraries and don't want CMake/headers
 # installed.
@ -118,7 +137,7 @@ vtkm_option(VTKm_INSTALL_ONLY_LIBRARIES "install only vtk-m libraries and no hea
 # rather than exporting all symbols. This flag is added so that consumers
 # which require static builds can force all symbols on, which is something
 # VTK does.
-vtkm_option(VTKm_USE_DEFAULT_SYMBOL_VISIBILITY "Don't explicitly hide symbols from libraries." OFF)
+vtkm_option(VTKm_HIDE_PRIVATE_SYMBOLS "Hide symbols from libraries." ON)

 vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" OFF)
 set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
@ -128,18 +147,30 @@ set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
 # and the warnings are too strict for the parent project.
 vtkm_option(VTKm_ENABLE_DEVELOPER_FLAGS "Enable compiler flags that are useful while developing VTK-m" ON)

+# By default VTK-m would install its README.md and LICENSE.md.
+# Some application might need not to install those, hence this option.
+vtkm_option(VTKm_NO_INSTALL_README_LICENSE "disable the installation of README and LICENSE files" OFF)
+
+# We are in the process of deprecating the use of virtual methods because they
+# are not well supported on many accelerators. Turn this option on to remove
+# the code entirely. Note that the deprecation of virtual methods is work in
+# progress, so not all use of virtual methods may be done. In VTK-m 2.0
+# virtual methods should be removed entirely and this option will be removed.
+vtkm_option(VTKm_NO_DEPRECATED_VIRTUAL "Do not compile support of deprecated virtual methods" OFF)
+
 mark_as_advanced(
  VTKm_ENABLE_LOGGING
  VTKm_NO_ASSERT
+  VTKm_NO_ASSERT_CUDA
+  VTKm_NO_ASSERT_HIP
  VTKm_INSTALL_ONLY_LIBRARIES
-  VTKm_USE_DEFAULT_SYMBOL_VISIBILITY
+  VTKm_HIDE_PRIVATE_SYMBOLS
  VTKm_ENABLE_DEVELOPER_FLAGS
+  VTKm_NO_INSTALL_README_LICENSE
+  VTKm_NO_DEPRECATED_VIRTUAL
  )

 #-----------------------------------------------------------------------------
-# When using C++11 support make sure you use the standard C++ extensions rather
-# than compiler-specific versions of the extensions (to preserve portability).
-set(CMAKE_CXX_EXTENSIONS Off)

 # Setup default build types
 include(VTKmBuildType)
@ -154,6 +185,18 @@ include(VTKmCompilerFlags)


 #-----------------------------------------------------------------------------
+# We need to check and see if git lfs is installed so that test data will
+# be available for use
+if (VTKm_ENABLE_TESTING)
+  file(STRINGS "${VTKm_SOURCE_DIR}/data/data/sentinel-data" sentinel_data LIMIT_COUNT 1)
+  if (NOT sentinel_data STREQUAL "-- DO NOT MODIFY THIS LINE --")
+    message(WARNING
+      "Testing is enabled, but the data is not available. Use git lfs in order "
+      "to obtain the testing data.")
+    set(VTKm_ENABLE_TESTING off)
+  endif()
+endif()
+
 # We include the wrappers unconditionally as VTK-m expects the function to
 # always exist (and early terminate when testing is disabled).
 include(testing/VTKmTestWrappers)
@ -175,18 +218,17 @@ if (VTKm_ENABLE_TESTING)

  #-----------------------------------------------------------------------------
  # Find the Python interpreter, which we will use during the build process
-  find_package(PythonInterp QUIET)
+  find_package(Python QUIET COMPONENTS Interpreter)

  #-----------------------------------------------------------------------------
  # Find Pyexpander in case somebody wants to update the auto generated
  # faux variadic template code
  find_package(Pyexpander QUIET)

-  #-----------------------------------------------------------------------------
  # Setup compiler flags for dynamic analysis if needed
-  include(VTKmCompilerDynamicAnalysisFlags)
-  vtkm_check_sanitizer_build()
-endif (VTKm_ENABLE_TESTING)
+  include(testing/VTKmCompilerDynamicAnalysisFlags)
+
+endif()

 #-----------------------------------------------------------------------------
 # Check basic type sizes.
@ -197,6 +239,7 @@ check_type_size("long long" VTKm_SIZE_LONG_LONG BUILTIN_TYPES_ONLY)

 #-----------------------------------------------------------------------------
 # Add subdirectories
+add_subdirectory(vtkmstd)
 add_subdirectory(vtkm)

 #-----------------------------------------------------------------------------
@ -226,6 +269,18 @@ write_basic_package_version_file(
  VERSION ${VTKm_VERSION}
  COMPATIBILITY ExactVersion )

+# Install the readme and license files.
+if (NOT VTKm_NO_INSTALL_README_LICENSE)
+install(FILES ${VTKm_SOURCE_DIR}/README.md
+  DESTINATION ${VTKm_INSTALL_SHARE_DIR}
+  RENAME VTKmREADME.md
+  )
+install(FILES ${VTKm_SOURCE_DIR}/LICENSE.txt
+  DESTINATION ${VTKm_INSTALL_SHARE_DIR}
+  RENAME VTKmLICENSE.txt
+  )
+endif()
+
 if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
  install(
    FILES
@ -234,24 +289,19 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
    DESTINATION ${VTKm_INSTALL_CONFIG_DIR}
    )

-  # Install the readme and license files.
-  install(FILES ${VTKm_SOURCE_DIR}/README.md
-    DESTINATION ${VTKm_INSTALL_SHARE_DIR}
-    RENAME VTKmREADME.md
-    )
-  install(FILES ${VTKm_SOURCE_DIR}/LICENSE.txt
-    DESTINATION ${VTKm_INSTALL_SHARE_DIR}
-    RENAME VTKmLICENSE.txt
-    )
-
  # Install helper configure files.
  install(
    FILES
+      ${VTKm_SOURCE_DIR}/CMake/VTKmCMakeBackports.cmake
      ${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
-      ${VTKm_SOURCE_DIR}/CMake/FindMPI.cmake
-      ${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
+      ${VTKm_SOURCE_DIR}/CMake/patches/FindMPI.cmake
    DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
    )
+  install(
+    FILES
+      ${VTKm_SOURCE_DIR}/CMake/patches/3.15/FindMPI.cmake
+    DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}/3.15
+    )

  # Install support files.
  install(
@ -259,8 +309,8 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
      ${VTKm_SOURCE_DIR}/CMake/VTKmCPUVectorization.cmake
      ${VTKm_SOURCE_DIR}/CMake/VTKmDetectCUDAVersion.cu
      ${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
+      ${VTKm_SOURCE_DIR}/CMake/VTKmDIYUtils.cmake
      ${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
-      ${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
      ${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake
      ${VTKm_SOURCE_DIR}/CMake/VTKmWrappers.cmake
    DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -294,7 +344,7 @@ endif ()
 #-----------------------------------------------------------------------------
 #add the benchmarking folder
 if(VTKm_ENABLE_BENCHMARKS)
-    add_subdirectory(benchmarking)
+  add_subdirectory(benchmarking)
 endif()

 #-----------------------------------------------------------------------------
@ -319,6 +369,8 @@ if (VTKm_ENABLE_TESTING)
  # installed version of VTK-m.
  include(testing/VTKmTestInstall)
  vtkm_test_install()
+else ()
+  set(CTEST_USE_LAUNCHERS off)
 endif()

 #-----------------------------------------------------------------------------
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -32,19 +32,19 @@ Before you begin, perform initial setup:
    This will prompt for your GitLab user name and configure a remote
    called `gitlab` to refer to it.

-5. (Optional but highly recommended.) 
+5. (Optional but highly recommended.)
    [Register with the VTK-m dashboard] on Kitware's CDash instance to
    better know how your code performs in regression tests. After
    registering and signing in, click on "All Dashboards" link in the upper
    left corner, scroll down and click "Subscribe to this project" on the
    right of VTK-m.

-6.  (Optional but highly recommended.) 
+6.  (Optional but highly recommended.)
    [Sign up for the VTK-m mailing list] to communicate with other
    developers and users.

 [GitLab Access]: https://gitlab.kitware.com/users/sign_in
-[Fork VTK-m]: https://gitlab.kitware.com/vtk/vtk-m/forks/new
+[Fork VTK-m]: https://gitlab.kitware.com/vtk/vtk-m/-/forks/new
 [Register with the VTK-m dashboard]: https://open.cdash.org/register.php
 [Sign up for the VTK-m mailing list]: http://vtk.org/mailman/listinfo/vtkm

@ -108,6 +108,9 @@ idea of the feature or fix to be developed given just the branch name.
        This is required as VTK-m uses Git-LFS to efficiently support data
        files.

+4.  If you are adding a new feature or making sigificant changes to API,
+    make sure to add a entry to `docs/changelog`. This allows release
+    notes to properly capture all relevant changes.

 ### Guidelines for Commit Messages ###

@ -175,7 +178,7 @@ upper right.
 When you [pushed your topic branch](#share-a-topic), it will provide you
 with a url of the form

-    https://gitlab.kitware.com/<username>/vtk-m/merge_requests/new
+    https://gitlab.kitware.com/<username>/vtk-m/-/merge_requests/new

 You can copy/paste that into your web browser to create a new merge
 request. Alternately, you can visit your fork in GitLab, browse to the
@ -203,7 +206,7 @@ will be filled out for you.

 5.  In the "**Description**" field provide a high-level description of the
    change the topic makes and any relevant information about how to try
-    it. 
+    it.
    *   Use `@username` syntax to draw attention of specific developers.
        This syntax may be used anywhere outside literal text and code
        blocks.  Or, wait until the [next step](#review-a-merge-request)
@ -226,6 +229,10 @@ will be filled out for you.
 6.  The "**Assign to**", "**Milestone**", and "**Labels**" fields may be
    left blank.

+7.  Enable the "**Allow commits from members who can merge to the target branch.**" option,
+    so that reviewers can modify the merge request. This allows reviewers to change
+    minor style issues without overwhelming the author with change requests.
+
 7.  Use the "**Submit merge request**" button to create the merge request
    and visit its page.

@ -318,32 +325,49 @@ succeeds.

 ### Testing ###

-VTK-m has a [buildbot](http://buildbot.net) instance watching for merge
-requests to test. Each time a merge request is updated the buildbot user
-(@buildbot) will automatically trigger a new build on all VTK-m buildbot
-workers. The buildbot user (@buildbot) will respond with a comment linking
-to the CDash results when it schedules builds.
+Each time a merge request is created or updated automated testing
+is automatically triggered, and shows up under the pipeline tab.

-The buildbot user (@buildbot) will also respond to any comment with the
-form:
+Developers can track the status of the pipeline for a merge
+request by using the Pipeline tab on a merge request or by
+clicking on stage icons as shown below:
+
+![alt text](docs/build_stage.png "Pipeline")
+
+When trying to diagnose why a build or tests stage has failed it
+generally is easier to look at the pruned information reported
+on [VTK-m's CDash Dashboard](https://open.cdash.org/index.php?project=VTKM).
+To make it easier to see only the results for a given merge request
+you can click the `cdash` link under the external stage ( rightmost pipeline stage icon )
+
+![alt text](docs/external_stage.png "CDash Link")
+
+In addition to the gitlab pipelines the buildbot user (@buildbot) will respond
+with a comment linking to the CDash results when it schedules builds.
+
+The builds for VTK-m that show up as part of the `external` stage of the
+gitlab pipeline are driven via buildbot, and have a different workflow.
+When you need to do things such as retry a build, you must issue commands
+via comments of the following form. The buildbot user (@buildbot) will
+respond signify that the command has been executed

    Do: test

 The `Do: test` command accepts the following arguments:

-  * `--oneshot` 
+  * `--oneshot`
        only build the *current* hash of the branch; updates will not be
        built using this command
-  * `--stop` 
+  * `--stop`
        clear the list of commands for the merge request
-  * `--superbuild` 
+  * `--superbuild`
        build the superbuilds related to the project
-  * `--clear` 
+  * `--clear`
        clear previous commands before adding this command
-  * `--regex-include <arg>` or `-i <arg>` 
+  * `--regex-include <arg>` or `-i <arg>`
        only build on builders matching `<arg>` (a Python regular
        expression)
-  * `--regex-exclude <arg>` or `-e <arg>` 
+  * `--regex-exclude <arg>` or `-e <arg>`
        excludes builds on builders matching `<arg>` (a Python regular
        expression)

@ -451,7 +475,7 @@ will stop running so that you can make changes. Make the changes you need,
 use `git add` to stage those changes, and then use

    $ git rebase --continue
-	
+
 to have git continue the rebase process. You can always run `git status` to
 get help about what to do next.

--- a/CTestCustom.cmake.in
+++ b/CTestCustom.cmake.in
@ -10,10 +10,16 @@

 list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION
  ".*warning: ignoring loop annotation.*"
-  ".*warning: Included by graph for.*not generated, too many nodes. Consider increasing DOT_GRAPH_MAX_NODES."
+  ".*warning: Included by graph for.*not generated, too many nodes. Consider increasing DOT_GRAPH_MAX_NODES.*"
+
+  # disable doxygen warnings about no matching members caused by auto keyword
+  ".*warning: no matching file member found for.*"
+
+  # disable doxygen warning from VTKM_DEPRECATED usage
+  ".*warning: Found.*while parsing initializer list!.*"

  # disable doxygen warning about potential recursion.
-  ".*warning: Detected potential recursive class relation between class vtkm::exec::internal::ArrayPortalTransform"
+  ".*warning: Detected potential recursive class relation between .*"

  # disable doxygen warning about not generating graph
  ".*warning: Included by graph for"
@ -49,7 +55,7 @@ list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION
  "nvlink warning : .*ArrayPortalVirtual.* has address taken but no possible call to it"
  "nvlink warning : .*CellLocatorBoundingIntervalHierarchyExec.* has address taken but no possible call to it"
  "nvlink warning : .*CellLocatorRectilinearGrid.* has address taken but no possible call to it"
-  "nvlink warning : .*CellLocatorUniformBins.* has address taken but no possible call to it"
+  "nvlink warning : .*CellLocatorTwoLevel.* has address taken but no possible call to it"
  "nvlink warning : .*CellLocatorUniformGrid.* has address taken but no possible call to it"

 )
--- a/README.md
+++ b/README.md
@ -28,6 +28,12 @@ You can find out more about the design of VTK-m on the [VTK-m Wiki].
      + "Part 4: Advanced Development" covers topics such as new worklet
        types and custom device adapters.

+  + A practical [VTK-m Tutorial] based in what users want to accomplish with
+    VTK-m:
+      + Building VTK-m and using existing VTK-m data structures and filters.
+      + Algorithm development with VTK-m.
+      + Writing new VTK-m filters.
+
  + Community discussion takes place on the [VTK-m users email list].

  + Doxygen-generated nightly reference documentation is available
@ -58,22 +64,20 @@ effort.
 VTK-m Requires:

  + C++11 Compiler. VTK-m has been confirmed to work with the following
-      + GCC 4.8+
+      + GCC 5.4+
      + Clang 5.0+
      + XCode 5.0+
      + MSVC 2015+
      + Intel 17.0.4+
  + [CMake](http://www.cmake.org/download/)
-      + CMake 3.8+
-      + CMake 3.11+ (for Visual Studio generator)
-      + CMake 3.12+ (for OpenMP support)
+      + CMake 3.12+
      + CMake 3.13+ (for CUDA support)

 Optional dependencies are:

  + CUDA Device Adapter
-      + [Cuda Toolkit 9.2+](https://developer.nvidia.com/cuda-toolkit)
-      + Note CUDA >= 10.1 is required on Windows
+      + [Cuda Toolkit 9.2, >= 10.2](https://developer.nvidia.com/cuda-toolkit)
+      + Note CUDA >= 10.2 is required on Windows
  + TBB Device Adapter
      + [TBB](https://www.threadingbuildingblocks.org/)
  + OpenMP Device Adapter
@ -99,18 +103,18 @@ Optional dependencies are:

 VTK-m has been tested on the following configurations:c
  + On Linux
-      + GCC 4.8.5, 5.4.0, 6.4.0, 7.3.0, Clang 5.0, 6.0, 7.0, Intel 17.0.4, Intel 19.0.0
-      + CMake 3.13.3, 3.14.1
-      + CUDA 9.2.148, 10.0.130, 10.1.105
+      + GCC 5.4.0, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0
+      + CMake 3.12, 3.13, 3.16, 3.17
+      + CUDA 9.2, 10.2, 11.0, 11.1 
      + TBB 4.4 U2, 2017 U7
  + On Windows
      + Visual Studio 2015, 2017
-      + CMake 3.8.2, 3.11.1, 3.12.4
-      + CUDA 10.1
+      + CMake 3.12, 3.17
+      + CUDA 10.2
      + TBB 2017 U3, 2018 U2
  + On MacOS
      + AppleClang 9.1
-      + CMake 3.12.3
+      + CMake 3.12
      + TBB 2018


@ -149,7 +153,20 @@ Below is a simple example of using VTK-m to load a VTK image file, run the
 Marching Cubes algorithm on it, and render the results to an image:

 ```cpp
-vtkm::io::reader::VTKDataSetReader reader("path/to/vtk_image_file");
+#include <vtkm/Bounds.h>
+#include <vtkm/Range.h>
+#include <vtkm/cont/ColorTable.h>
+#include <vtkm/filter/Contour.h>
+#include <vtkm/io/VTKDataSetReader.h>
+#include <vtkm/rendering/Actor.h>
+#include <vtkm/rendering/Camera.h>
+#include <vtkm/rendering/CanvasRayTracer.h>
+#include <vtkm/rendering/Color.h>
+#include <vtkm/rendering/MapperRayTracer.h>
+#include <vtkm/rendering/Scene.h>
+#include <vtkm/rendering/View3D.h>
+
+vtkm::io::VTKDataSetReader reader("path/to/vtk_image_file.vtk");
 vtkm::cont::DataSet inputData = reader.ReadDataSet();
 std::string fieldName = "scalars";

@ -165,21 +182,10 @@ vtkm::cont::DataSet outputData = filter.Execute(inputData);

 // compute the bounds and extends of the input data
 vtkm::Bounds coordsBounds = inputData.GetCoordinateSystem().GetBounds();
-vtkm::Vec<vtkm::Float64,3> totalExtent( coordsBounds.X.Length(),
-                                        coordsBounds.Y.Length(),
-                                        coordsBounds.Z.Length() );
-vtkm::Float64 mag = vtkm::Magnitude(totalExtent);
-vtkm::Normalize(totalExtent);

 // setup a camera and point it to towards the center of the input data
 vtkm::rendering::Camera camera;
 camera.ResetToBounds(coordsBounds);
-
-camera.SetLookAt(totalExtent*(mag * .5f));
-camera.SetViewUp(vtkm::make_Vec(0.f, 1.f, 0.f));
-camera.SetClippingRange(1.f, 100.f);
-camera.SetFieldOfView(60.f);
-camera.SetPosition(totalExtent*(mag * 2.f));
 vtkm::cont::ColorTable colorTable("inferno");

 // Create a mapper, canvas and view that will be used to render the scene
@ -194,11 +200,23 @@ scene.AddActor(vtkm::rendering::Actor(outputData.GetCellSet(),
                                      outputData.GetField(fieldName),
                                      colorTable));
 vtkm::rendering::View3D view(scene, mapper, canvas, camera, bg);
-view.Initialize();
 view.Paint();
-view.SaveAs("demo_output.pnm");
+view.SaveAs("demo_output.png");
 ```

+A minimal CMakeLists.txt such as the following one can be used to build this
+example.
+
+```CMake
+project(example)
+
+set(VTKm_DIR "/somepath/lib/cmake/vtkm-XYZ")
+
+find_package(VTKm REQUIRED)
+
+add_executable(example example.cxx)
+target_link_libraries(example vtkm_cont vtkm_rendering)
+```

 ## License ##

@ -211,9 +229,10 @@ See [LICENSE.txt](LICENSE.txt) for details.
 [VTK-m Doxygen]:            http://m.vtk.org/documentation/
 [VTK-m download page]:      http://m.vtk.org/index.php/VTK-m_Releases
 [VTK-m git repository]:     https://gitlab.kitware.com/vtk/vtk-m/
-[VTK-m Issue Tracker]:      https://gitlab.kitware.com/vtk/vtk-m/issues
+[VTK-m Issue Tracker]:      https://gitlab.kitware.com/vtk/vtk-m/-/issues
 [VTK-m Overview]:           http://m.vtk.org/images/2/29/VTKmVis2016.pptx
 [VTK-m Users Guide]:        http://m.vtk.org/images/c/c8/VTKmUsersGuide.pdf
 [VTK-m users email list]:   http://vtk.org/mailman/listinfo/vtkm
 [VTK-m Wiki]:               http://m.vtk.org/
+[VTK-m Tutorial]:           http://m.vtk.org/index.php/Tutorial
 [CONTRIBUTING.md]:          CONTRIBUTING.md
--- a/Utilities/CI/.gitignore
+++ b/Utilities/CI/.gitignore
@ -0,0 +1 @@
+env/
--- a/Utilities/CI/DeveloperSetup.md
+++ b/Utilities/CI/DeveloperSetup.md
@ -0,0 +1,37 @@
+#How to setup machine to use CI scripts#
+
+#OSX and Unix#
+
+
+# Requirements #
+
+- Docker
+- Python3
+-- PyYAML
+
+The CI scripts require python3 and the PyYAML package.
+
+Generally the best way to setup this environment is to create a python
+virtual env so you don't pollute your system. This means getting pip
+the python package manager, and virtual env which allow for isolation
+of a projects python dependencies.
+
+```
+sudo easy_install pip
+sudo pip install virtualenv
+```
+
+Next we need to create a new virtual env of python. I personally
+like to setup this in `vtkm/Utilities/CI/env`.
+
+```
+mkdir env
+virtualenv env
+```
+
+Now all we have to do is setup the requirements:
+
+```
+./env/bin/pip install -r requirements.txt
+```
+
--- a/Utilities/CI/reproduce_ci_env.py
+++ b/Utilities/CI/reproduce_ci_env.py
@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+
+#=============================================================================
+#
+#  Copyright (c) Kitware, Inc.
+#  All rights reserved.
+#  See LICENSE.txt for details.
+#
+#  This software is distributed WITHOUT ANY WARRANTY; without even
+#  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+#  PURPOSE.  See the above copyright notice for more information.
+#
+#===============
+
+import enum
+import os
+import tempfile
+import string
+import subprocess
+import sys
+import platform
+import re
+import yaml
+
+def get_root_dir():
+  dir_path = os.path.dirname(os.path.realpath(__file__))
+  #find the where .gitlab-ci.yml is located
+  try:
+    src_root = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'], cwd=dir_path)
+    src_root = str(src_root, 'utf-8')
+    src_root = src_root.rstrip('\n')
+    # Corrections in case the filename is a funny Cygwin path
+    src_root = re.sub(r'^/cygdrive/([a-z])/', r'\1:/', src_root)
+    return src_root
+  except subprocess.CalledProcessError:
+    return None
+
+def extract_stage_job_from_cmdline(*args):
+  if len(args) == 1:
+    stage_and_job = str(args[0]).split(':')
+    if len(stage_and_job) == 1:
+      stage_and_job = ['build', stage_and_job[0]]
+    return stage_and_job
+  return args
+
+def load_ci_file(ci_file_path):
+  ci_state = {}
+  if ci_file_path:
+    root_dir = os.path.dirname(ci_file_path)
+    ci_state = yaml.safe_load(open(ci_file_path))
+    if 'include' in ci_state:
+      for inc in ci_state['include']:
+        if 'local' in inc:
+          #the local paths can start with '/'
+          include_path = inc['local'].lstrip('/')
+          include_path = os.path.join(root_dir, include_path)
+          ci_state.update(yaml.safe_load(open(include_path)))
+  return ci_state
+
+def flattened_entry_copy(ci_state, name):
+  import copy
+  entry = copy.deepcopy(ci_state[name])
+
+  #Flatten 'extends' entries, only presume the first level of inheritance is
+  #important
+  if 'extends' in entry:
+    to_merge = []
+
+    if not isinstance(entry['extends'], list):
+      entry['extends'] = [ entry['extends'] ]
+
+    for e in entry['extends']:
+      entry.update(ci_state[e])
+    del entry['extends']
+  return entry
+
+def ci_stages_and_jobs(ci_state):
+  stages = ci_state['stages']
+  jobs = dict((s,[]) for s in stages)
+  for key in ci_state:
+    entry = flattened_entry_copy(ci_state, key)
+
+    is_job = False
+    if 'stage' in entry:
+      stage = entry['stage']
+      if stage in stages:
+        is_job = True
+
+    # if we have a job ( that isn't private )
+    if is_job and not key.startswith('.'):
+      # clean up the name
+      clean_name = key
+      if ':' in key:
+        clean_name = key.split(':')[1]
+      jobs[stage].append(clean_name)
+
+
+  return jobs
+
+def subset_yml(ci_state, stage, name):
+  #given a stage and name generate a new yaml
+  #file that only contains information for stage and name.
+  #Does basic extend merging so that recreating the env is easier
+  runner_yml = {}
+
+  if stage+":"+name in ci_state:
+    name = stage+":"+name
+
+  runner_yml[name] = flattened_entry_copy(ci_state, name)
+  return runner_yml
+
+class CallMode(enum.Enum):
+  call = 1
+  output = 2
+
+
+def subprocess_call_docker(cmd, cwd, mode=CallMode.call):
+  system = platform.system()
+  if (system == 'Windows') or (system == 'Darwin'):
+    # Windows and MacOS run Docker in a VM, so they don't need sudo
+    full_cmd = ['docker'] + cmd
+  else:
+    # Unix needs to run docker with root privileges
+    full_cmd = ['sudo', 'docker'] + cmd
+  print(" ".join(full_cmd), flush=True)
+
+  if mode is CallMode.call:
+    return subprocess.check_call(full_cmd, cwd=cwd)
+  if mode is CallMode.output:
+    return subprocess.check_output(full_cmd, cwd=cwd)
+
+###############################################################################
+#
+#     User Command: 'list'
+#
+###############################################################################
+def list_jobs(ci_file_path, *args):
+  ci_state = load_ci_file(ci_file_path)
+  jobs = ci_stages_and_jobs(ci_state)
+  for key,values in jobs.items():
+    print('Jobs for Stage:', key)
+    for v in values:
+      print('\t',v)
+    print('')
+
+
+###############################################################################
+#
+#     User Command: 'build' | 'setup'
+#
+###############################################################################
+def create_container(ci_file_path, *args):
+  ci_state = load_ci_file(ci_file_path)
+  ci_jobs = ci_stages_and_jobs(ci_state)
+  stage,name = extract_stage_job_from_cmdline(*args)
+
+  if not stage in ci_jobs:
+    print('Unable to find stage: ', stage)
+    print('Valid stages are:', list(ci_jobs.keys()))
+    exit(1)
+
+  if not name in ci_jobs[stage]:
+    print('Unable to find job: ', name)
+    print('Valid jobs are:', ci_jobs[stage])
+    exit(1)
+
+  #we now have the relevant subset of the yml
+  #fully expanded into a single definition
+  subset = subset_yml(ci_state, stage, name)
+
+  job_name = name
+  if stage+":"+name in subset:
+    job_name = stage+":"+name
+  runner_name = stage+":"+name
+
+  runner = subset[job_name]
+  src_dir = get_root_dir()
+  gitlab_env = [ k + '="' + v + '"' for k,v in runner['variables'].items()]
+
+  # propagate any https/http proxy info
+  if os.getenv('http_proxy'):
+    gitlab_env = [ 'http_proxy=' + os.getenv('http_proxy') ] + gitlab_env
+  if os.getenv('https_proxy'):
+    gitlab_env = [ 'https_proxy=' + os.getenv('https_proxy') ] + gitlab_env
+
+  # The script and before_script could be anywhere!
+  script_search_locations = [ci_state, subset, runner]
+  for loc in script_search_locations:
+    if 'before_script' in loc:
+      before_script = loc['before_script']
+    if 'script' in loc:
+      script = loc['script']
+
+  docker_template = string.Template('''
+FROM $image
+ENV GITLAB_CI=1 \
+    GITLAB_CI_EMULATION=1 \
+    CI_PROJECT_DIR=. \
+    CI_JOB_NAME=$job_name
+#Copy all of this project to the src directory
+COPY . /src
+ENV $gitlab_env
+WORKDIR /src
+RUN echo "$before_script || true" >> /setup-gitlab-env.sh && \
+    echo "$script || true" >> /run-gitlab-stage.sh && \
+    bash /setup-gitlab-env.sh
+''')
+
+  docker_content = docker_template.substitute(image=runner['image'],
+                    job_name='local-build'+runner_name,
+                    src_dir=src_dir,
+                    gitlab_env= " ".join(gitlab_env),
+                    before_script=" && ".join(before_script),
+                    script=" && ".join(script))
+
+  # Write out the file
+  docker_file = tempfile.NamedTemporaryFile(delete=False)
+  docker_file.write(bytes(docker_content, 'utf-8'))
+  docker_file.close()
+
+  # now we need to run docker and build this image with a name equal to the
+  # ci name, and the docker context to be the current git repo root dir so
+  # we can copy the current project src automagically
+  try:
+    subprocess_call_docker(['build', '-f', docker_file.name, '-t', runner_name, src_dir],
+                           cwd=src_dir)
+  except subprocess.CalledProcessError:
+    print('Unable to build the docker image for: ', runner_name)
+    exit(1)
+  finally:
+    # remove the temp file
+    os.remove(docker_file.name)
+
+###############################################################################
+#
+#     User Command: 'help'
+#
+###############################################################################
+def run_container(ci_file_path, *args):
+  # Exec/Run ( https://docs.docker.com/engine/reference/commandline/exec/#run-docker-exec-on-a-running-container )
+  src_dir = get_root_dir()
+  stage,name = extract_stage_job_from_cmdline(*args)
+  image_name = stage+':'+name
+
+  try:
+    cmd = ['run', '-itd', image_name]
+    container_id = subprocess_call_docker(cmd, cwd=src_dir, mode=CallMode.output)
+    container_id = str(container_id, 'utf-8')
+    container_id= container_id.rstrip('\n')
+  except subprocess.CalledProcessError:
+    print('Unable to run the docker image for: ', image_name)
+    exit(1)
+
+  try:
+    cmd = ['exec', '-it', container_id, 'bash']
+    subprocess_call_docker(cmd, cwd=src_dir)
+  except subprocess.CalledProcessError:
+    print('Unable to attach an iteractive shell to : ', container_id)
+  pass
+
+  try:
+    cmd = ['container', 'stop', container_id]
+    subprocess_call_docker(cmd, cwd=src_dir)
+  except subprocess.CalledProcessError:
+    print('Unable to stop container: ', container_id)
+  pass
+
+###############################################################################
+#
+#     User Command: 'help'
+#
+###############################################################################
+def help_usage(ci_file_path, *args):
+  print('Setup gitlab-ci docker environments/state locally')
+  print('Usage: reproduce_ci_env.py [command] [stage] <name>')
+  print('\n')
+  print('Commands:\n' + \
+  '\n'+\
+  '  list: List all stage and job names for gitlab-ci\n'+\
+  '  create: build a docker container for this gitlab-ci job.\n'+\
+  '        Will match the <stage> to docker repo, and <name> to the tag. \n' +\
+  '        If no explicit <stage> is provided will default to `build` stage. \n' +\
+  '  run: Launch an interactive shell inside the docker image\n' +\
+  '        for a given stage:name with the correct environment and will automatically\n' +\
+  '        run the associated stage script.\n'
+  '        If no explicit <stage> is provided will default to `build` stage. \n')
+  print('Example:\n' + \
+  '\n'+\
+  '  reproduce_ci_env create centos7\n'+\
+  '  reproduce_ci_env run build:centos7\n')
+
+###############################################################################
+def main(argv):
+  ci_file_path = os.path.join(get_root_dir(), '.gitlab-ci.yml')
+  if len(argv) == 0:
+    help_usage( ci_file_path  )
+    exit(1)
+  if len(argv) > 3:
+    help_usage( ci_file_path  )
+    exit(1)
+
+  #commands we want
+  # - list
+  # -- list all 'jobs'
+  # - create | setup
+  # -- create a docker image that represents a given stage:name
+  # - run | exec
+  # -- run the script for the stage:name inside the correct docker image
+  #    and provide an interactive shell
+  # -- help
+  #setup arg function table
+  commands = {
+    'list': list_jobs,
+    'create': create_container,
+    'setup': create_container,
+    'exec': run_container,
+    'run': run_container,
+    'help': help_usage
+    }
+  if argv[0] in commands:
+    #splat the subset of the vector so they are separate call parameters
+    commands[argv[0]]( ci_file_path, *argv[1:3] )
+  else:
+    commands['help']( ci_file_path )
+    exit(1)
+  exit(0)
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
--- a/Utilities/CI/requirements.txt
+++ b/Utilities/CI/requirements.txt
@ -0,0 +1 @@
+PyYAML
--- a/Utilities/DynamicAnalysis/sanitizer_blacklist.txt.in
+++ b/Utilities/DynamicAnalysis/sanitizer_blacklist.txt.in
@ -1,2 +0,0 @@
-# Blacklist third party libraries from invoking sanitizer errors
-src:@VTKm_SOURCE_DIR@/vtkm/thirdparty/*
--- a/Utilities/GitSetup/git-gitlab-push
+++ b/Utilities/GitSetup/git-gitlab-push
@ -23,7 +23,8 @@ OPTIONS
    Show what would be pushed without actually updating the destination

 -f,--force
-    Force-push the topic HEAD to rewrite the destination branch
+    Force-push the topic HEAD to rewrite the destination branch (use twice
+    to ignore stale remote tracking branches)

 --no-default
    Do not push the default branch (e.g. master)
@ -73,7 +74,14 @@ set_upstream=true
 # Parse the command line options.
 while test $# != 0; do
 	case "$1" in
-		-f|--force)    force='+'; lease=true ;;
+		-f|--force)
+			if test -n "$force"; then
+				lease=false
+			else
+				lease=true
+			fi
+			force='+'
+			;;
 		--no-topic)    no_topic=1; set_upstream=false ;;
 		--dry-run)     dry_run=--dry-run ;;
 		--no-default)  no_default=1 ;;
--- a/Utilities/GitSetup/setup-lfs
+++ b/Utilities/GitSetup/setup-lfs
@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+#=============================================================================
+# Copyright 2017 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#=============================================================================
+
+# Run this script to set up the local Git repository to push LFS data to
+# remotes.
+
+die() {
+	echo 1>&2 "$@" ; exit 1
+}
+
+# Make sure we are inside the repository.
+cd "${BASH_SOURCE%/*}" &&
+
+# Set the LFS filter configuration up.
+if git lfs version; then
+	git config filter.lfs.clean "git-lfs clean -- %f" &&
+	git config filter.lfs.smudge "git-lfs smudge -- %f" &&
+	git config filter.lfs.process "git-lfs filter-process" &&
+	git config filter.lfs.required true &&
+	git lfs fetch &&
+	git lfs checkout &&
+	echo 'LFS is now configured.'
+else
+	die 'Git LFS is not available. Please make it available on the PATH' \
+		'either by installing it through your system provider or installing it' \
+		'from <https://git-lfs.github.com>.'
+fi
--- a/Utilities/Scripts/benchCompare.py
+++ b/Utilities/Scripts/benchCompare.py
@ -1,157 +0,0 @@
-#!/usr/bin/env python3
-#
-# Compares the output from BenchmarkDeviceAdapter from the serial
-# device to a parallel device and prints a table containing the results.
-#
-# Example usage:
-#
-# $ BenchmarkDeviceAdapter_SERIAL > serial.out
-# $ BenchmarkDeviceAdapter_TBB > tbb.out
-# $ benchCompare.py serial.out tbb.out
-#
-#
-# The number of threads (optional -- only used to generate the "Warn" column)
-maxThreads = 4
-#
-# Print debugging output:
-doDebug = False
-#
-# End config options.
-
-import re
-import sys
-
-assert(len(sys.argv) == 3)
-
-def debug(str):
-  if (doDebug): print(str)
-
-# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
-typeParser = re.compile("\\*{3} ([^*]+) on device ([^*]+) \\*{15}")
-
-# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
-nameParser = re.compile("Benchmark '([^-]+)' results:")
-
-# Parses "mean = 0.0125s" --> 0.0125
-meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
-
-# Parses "std dev = 0.0125s" --> 0.0125
-stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
-
-serialFilename = sys.argv[1]
-parallelFilename = sys.argv[2]
-
-serialFile = open(serialFilename, 'r')
-parallelFile = open(parallelFilename, 'r')
-
-class BenchKey:
-  def __init__(self, name_, type_):
-    self.name = name_
-    self.type = type_
-
-  def __eq__(self, other):
-    return self.name == other.name and self.type == other.type
-
-  def __lt__(self, other):
-    if self.name < other.name: return True
-    elif self.name > other.name: return False
-    else: return self.type < other.type
-
-  def __hash__(self):
-    return (self.name + self.type).__hash__()
-
-class BenchData:
-  def __init__(self, mean_, stdDev_):
-    self.mean = mean_
-    self.stdDev = stdDev_
-
-def parseFile(f, benchmarks):
-  type = ""
-  bench = ""
-  mean = -1.
-  stdDev = -1.
-  for line in f:
-    debug("Line: {}".format(line))
-
-    typeRes = typeParser.match(line)
-    if typeRes:
-      type = typeRes.group(1)
-      debug("Found type: {}".format(type))
-      continue
-
-    nameRes = nameParser.match(line)
-    if nameRes:
-      name = nameRes.group(1)
-      debug("Found name: {}".format(name))
-      continue
-
-    meanRes = meanParser.match(line)
-    if meanRes:
-      mean = float(meanRes.group(1))
-      debug("Found mean: {}".format(mean))
-      continue
-
-    stdDevRes = stdDevParser.match(line)
-    if stdDevRes:
-      stdDev = float(stdDevRes.group(1))
-      debug("Found stddev: {}".format(stdDev))
-
-      # stdDev is always the last parse for a given benchmark, add entry now
-      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
-      debug("{} records found.".format(len(benchmarks)))
-
-      mean = -1.
-      stdDev = -1.
-
-      continue
-
-serialBenchmarks = {}
-parallelBenchmarks = {}
-
-parseFile(serialFile, serialBenchmarks)
-parseFile(parallelFile, parallelBenchmarks)
-
-serialKeys = set(serialBenchmarks.keys())
-parallelKeys = set(parallelBenchmarks.keys())
-
-commonKeys = sorted(list(serialKeys.intersection(parallelKeys)))
-
-serialOnlyKeys = sorted(list(serialKeys.difference(parallelKeys)))
-parallelOnlyKeys = sorted(list(parallelKeys.difference(serialKeys)))
-
-debug("{} serial keys\n{} parallel keys\n{} common keys\n{} serialOnly keys\n{} parallelOnly keys.".format(
-        len(serialKeys), len(parallelKeys), len(commonKeys), len(serialOnlyKeys), len(parallelOnlyKeys)))
-
-if len(serialOnlyKeys) > 0:
-  print("Keys found only in serial:")
-  for k in serialOnlyKeys:
-    print("%s (%s)"%(k.name, k.type))
-  print("")
-
-if len(parallelOnlyKeys) > 0:
-  print("Keys found only in parallel:")
-  for k in parallelOnlyKeys:
-    print("%s (%s)"%(k.name, k.type))
-  print("")
-
-print("Comparison:")
-print("| %7s | %4s | %8s    %8s | %8s    %8s | %s (%s) |"%(
-        "Speedup", "Warn", "serial", "", "parallel", "", "Benchmark", "Type"))
-print("|-%7s-|-%4s-|-%8s----%8s-|-%8s----%8s-|-%s--%s--|"%(
-        "-"*7, "-"*4, "-"*8, "-"*8, "-"*8, "-"*8, "-"*9, "-"*4))
-for key in commonKeys:
-  sData = serialBenchmarks[key]
-  pData = parallelBenchmarks[key]
-  speedup = sData.mean / pData.mean if pData.mean != 0. else 0.
-  if speedup > maxThreads * .9:
-    flag = "    "
-  elif speedup > maxThreads * .75:
-    flag = "!   "
-  elif speedup > maxThreads * .5:
-    flag = "!!  "
-  elif speedup > maxThreads * .25:
-    flag = "!!! "
-  else:
-    flag = "!!!!"
-  print("| %7.3f | %4s | %08.6f +- %08.6f | %08.6f +- %08.6f | %s (%s) |"%(
-          speedup, flag, sData.mean, sData.stdDev, pData.mean, pData.stdDev, key.name, key.type))
--- a/Utilities/Scripts/benchSummary.py
+++ b/Utilities/Scripts/benchSummary.py
@ -1,111 +0,0 @@
-#!/usr/bin/env python
-#
-# Prints a concise summary of a benchmark output as a TSV blob.
-#
-# Example usage:
-#
-# $ BenchmarkXXX_DEVICE > bench.out
-# $ benchSummary.py bench.out
-#
-# Options SortByType, SortByName, or SortByMean may be passed after the
-# filename to sort the output by the indicated quantity. If no sort option
-# is provided, the output order matches the input. If multiple options are
-# specified, the list will be sorted repeatedly in the order requested.
-
-import re
-import sys
-
-assert(len(sys.argv) >= 2)
-
-# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
-typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
-
-# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
-nameParser = re.compile("Benchmark '([^-]+)' results:")
-
-# Parses "mean = 0.0125s" --> 0.0125
-meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
-
-# Parses "std dev = 0.0125s" --> 0.0125
-stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
-
-filename = sys.argv[1]
-benchFile = open(filename, 'r')
-
-sortOpt = None
-if len(sys.argv) > 2:
-  sortOpt = sys.argv[2:]
-
-class BenchKey:
-  def __init__(self, name_, type_):
-    self.name = name_
-    self.type = type_
-
-  def __eq__(self, other):
-    return self.name == other.name and self.type == other.type
-
-  def __lt__(self, other):
-    if self.name < other.name: return True
-    elif self.name > other.name: return False
-    else: return self.type < other.type
-
-  def __hash__(self):
-    return (self.name + self.type).__hash__()
-
-class BenchData:
-  def __init__(self, mean_, stdDev_):
-    self.mean = mean_
-    self.stdDev = stdDev_
-
-def parseFile(f, benchmarks):
-  type = ""
-  bench = ""
-  mean = -1.
-  stdDev = -1.
-  for line in f:
-    typeRes = typeParser.match(line)
-    if typeRes:
-      type = typeRes.group(1)
-      continue
-
-    nameRes = nameParser.match(line)
-    if nameRes:
-      name = nameRes.group(1)
-      continue
-
-    meanRes = meanParser.match(line)
-    if meanRes:
-      mean = float(meanRes.group(1))
-      continue
-
-    stdDevRes = stdDevParser.match(line)
-    if stdDevRes:
-      stdDev = float(stdDevRes.group(1))
-
-      # stdDev is always the last parse for a given benchmark, add entry now
-      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
-
-      mean = -1.
-      stdDev = -1.
-
-      continue
-
-benchmarks = {}
-parseFile(benchFile, benchmarks)
-
-# Sort keys by type:
-keys = benchmarks.keys()
-if sortOpt:
-  for opt in sortOpt:
-    if opt.lower() == "sortbytype":
-      keys = sorted(keys, key=lambda k: k.type)
-    elif opt.lower() == "sortbyname":
-      keys = sorted(keys, key=lambda k: k.name)
-    elif opt.lower() == "sortbymean":
-      keys = sorted(keys, key=lambda k: benchmarks[k].mean)
-
-print("# Summary: (%s)"%filename)
-print("%-9s\t%-9s\t%-9s\t%-s"%("Mean", "Stdev", "Stdev%", "Benchmark (type)"))
-for key in keys:
-  data = benchmarks[key]
-  print("%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.mean, data.stdDev, data.stdDev / data.mean * 100., key.name, key.type))
--- a/Utilities/Scripts/benchSummaryWithBaselines.py
+++ b/Utilities/Scripts/benchSummaryWithBaselines.py
@ -1,156 +0,0 @@
-#!/usr/bin/env python
-#
-# Prints a concise summary of a benchmark output as a TSV blob. Benchmarks are
-# expected to have "Baseline" in the name, and a matching benchmark with the
-# same name but Baseline replaced with something else. For example,
-#
-# Baseline benchmark name: "Some benchmark: Baseline, Size=4"
-# Test benchmark name:     "Some benchmark: Blahblah, Size=4"
-#
-# The output will print the baseline, test, and overhead times for the
-# benchmarks.
-#
-# Example usage:
-#
-# $ BenchmarkXXX_DEVICE > bench.out
-# $ benchSummaryWithBaselines.py bench.out
-#
-# Options SortByType, SortByName, SortByOverhead, or SortByRatio
-# (testtime/baseline) may be passed after the filename to sort the output by
-# the indicated quantity. If no sort option is provided, the output order
-# matches the input. If multiple options are specified, the list will be sorted
-# repeatedly in the order requested.
-
-import re
-import sys
-
-assert(len(sys.argv) >= 2)
-
-# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
-typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
-
-# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
-nameParser = re.compile("Benchmark '([^-]+)' results:")
-
-# Parses "mean = 0.0125s" --> 0.0125
-meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
-
-# Parses "std dev = 0.0125s" --> 0.0125
-stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
-
-# Parses "SomeText Baseline Other Text" --> ("SomeText ", " Other Text")
-baselineParser = re.compile("(.*)Baseline(.*)")
-
-filename = sys.argv[1]
-benchFile = open(filename, 'r')
-
-sortOpt = None
-if len(sys.argv) > 2:
-  sortOpt = sys.argv[2:]
-
-class BenchKey:
-  def __init__(self, name_, type_):
-    self.name = name_
-    self.type = type_
-
-  def __eq__(self, other):
-    return self.name == other.name and self.type == other.type
-
-  def __lt__(self, other):
-    if self.name < other.name: return True
-    elif self.name > other.name: return False
-    else: return self.type < other.type
-
-  def __hash__(self):
-    return (self.name + self.type).__hash__()
-
-class BenchData:
-  def __init__(self, mean_, stdDev_):
-    self.mean = mean_
-    self.stdDev = stdDev_
-
-def parseFile(f, benchmarks):
-  type = ""
-  bench = ""
-  mean = -1.
-  stdDev = -1.
-  for line in f:
-    typeRes = typeParser.match(line)
-    if typeRes:
-      type = typeRes.group(1)
-      continue
-
-    nameRes = nameParser.match(line)
-    if nameRes:
-      name = nameRes.group(1)
-      continue
-
-    meanRes = meanParser.match(line)
-    if meanRes:
-      mean = float(meanRes.group(1))
-      continue
-
-    stdDevRes = stdDevParser.match(line)
-    if stdDevRes:
-      stdDev = float(stdDevRes.group(1))
-
-      # stdDev is always the last parse for a given benchmark, add entry now
-      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
-
-      mean = -1.
-      stdDev = -1.
-
-      continue
-
-class BaselinedBenchData:
-  def __init__(self, baseline, test):
-    self.baseline = baseline.mean
-    self.test = test.mean
-    self.overhead = test.mean - baseline.mean
-
-def findBaselines(benchmarks):
-  result = {}
-
-  for baseKey in benchmarks.keys():
-    # Look for baseline entries
-    baselineRes = baselineParser.match(baseKey.name)
-    if baselineRes:
-      prefix = baselineRes.group(1)
-      suffix = baselineRes.group(2)
-
-      # Find the test entry matching the baseline:
-      for testKey in benchmarks.keys():
-        if baseKey.type != testKey.type: # Need same type
-          continue
-        if baseKey.name == testKey.name: # Skip the base key
-          continue
-        if testKey.name.startswith(prefix) and testKey.name.endswith(suffix):
-          newName = (prefix + suffix).replace(", ,", ",")
-          newKey = BenchKey(newName, testKey.type)
-          newVal = BaselinedBenchData(benchmarks[baseKey], benchmarks[testKey])
-          result[newKey] = newVal
-  return result
-
-benchmarks = {}
-parseFile(benchFile, benchmarks)
-benchmarks = findBaselines(benchmarks)
-
-# Sort keys by type:
-keys = benchmarks.keys()
-if sortOpt:
-  for opt in sortOpt:
-    if opt.lower() == "sortbytype":
-      keys = sorted(keys, key=lambda k: k.type)
-    elif opt.lower() == "sortbyname":
-      keys = sorted(keys, key=lambda k: k.name)
-    elif opt.lower() == "sortbyoverhead":
-      keys = sorted(keys, key=lambda k: benchmarks[k].overhead)
-    elif opt.lower() == "sortbyratio":
-      keys = sorted(keys, key=lambda k: benchmarks[k].overhead / benchmarks[k].baseline)
-
-print("# Summary: (%s)"%filename)
-print("%-9s\t%-9s\t%-9s\t%-9s\t%-s"%("Baseline", "TestTime", "Overhead", "Test/Base", "Benchmark (type)"))
-for key in keys:
-  data = benchmarks[key]
-  print("%9.6f\t%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.baseline, data.test,
-        data.overhead, data.test / data.baseline, key.name, key.type))
--- a/Utilities/Scripts/compare-benchmarks.py
+++ b/Utilities/Scripts/compare-benchmarks.py
@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+compare-benchmarks.py - VTKm + Google Benchmarks compare.py
+"""
+
+import getopt
+import subprocess
+import sys
+import time
+import os
+
+CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
+COMPARE_PY_PATH = os.path.join(CURRENT_DIR, 'compare.py')
+COMPARE_PY = sys.executable + " " + COMPARE_PY_PATH
+
+class Bench():
+    def __init__(self):
+        self.__cmd = None
+
+    @property
+    def cmd(self):
+        return self.__cmd
+
+    @cmd.setter
+    def cmd(self, c):
+        self.__cmd = c
+
+    def launch(self):
+        output_file = "bench-%d.json" % time.time()
+        cmd_exec = "%s --benchmark_out=%s --benchmark_out_format=json" \
+                % (self.cmd, output_file)
+        print(cmd_exec)
+        subprocess.call(cmd_exec, shell=True)
+        return output_file
+
+def print_help(error_msg = None):
+    if error_msg != None:
+        print(error_msg)
+
+    print("usage: compare-benchmarks <opts>\n" \
+            " --benchmark1='<benchmark1> [arg1] [arg2] ...'"\
+            " [--filter1=<filter1>]\n"\
+            " --benchmark2='<benchmark2> [arg1] [arg2] ...'"\
+            " [--filter2=<filter2>]\n"\
+            " -- [-opt] benchmarks|filters|benchmarksfiltered\n\n" \
+            "compare.py help:")
+
+    subprocess.call(COMPARE_PY, shell=True)
+    sys.exit(0)
+
+# -----------------------------------------------------------------------------
+def main():
+    is_filters = False
+    filter1 = str()
+    filter2 = str()
+    bench1 = Bench()
+    bench2 = Bench()
+
+    options, remainder = getopt.gnu_getopt(sys.argv[1:], '',
+            ['help','benchmark1=', 'benchmark2=', 'filter1=', 'filter2='])
+
+    for opt, arg in options:
+        if opt == "--benchmark1":
+            bench1.cmd = arg
+
+        if opt == "--benchmark2":
+            bench2.cmd = arg
+
+        if opt == "--filter1":
+            filter1 = arg
+
+        if opt == "--filter2":
+            filter2 = arg
+
+        if opt == "--help":
+            print_help()
+
+    if bench1.cmd == None:
+        print_help("ERROR: no benchmarks chosen")
+
+    for arg in remainder:
+        if arg == "filters":
+           is_filters = True
+
+    if is_filters and bench2.cmd != None:
+        print_help("ERROR: filters option can only accept --benchmark1= and --filter1")
+
+    b1_output = bench1.launch()
+    b2_output = bench2.launch() if not is_filters else filter1 + " " + filter2
+
+    cmd = "%s %s %s %s" % (COMPARE_PY, " ".join(remainder), b1_output, b2_output)
+    print(cmd)
+    subprocess.call(cmd, shell=True)
+
+    os.remove(b1_output)
+
+    if not is_filters:
+        os.remove(b2_output)
+
+if  __name__ == '__main__':
+    main()
--- a/Utilities/Scripts/compare.py
+++ b/Utilities/Scripts/compare.py
@ -0,0 +1,408 @@
+#!/usr/bin/env python
+
+import unittest
+"""
+compare.py - versatile benchmark output compare tool
+"""
+
+import argparse
+from argparse import ArgumentParser
+import sys
+import gbench
+from gbench import util, report
+from gbench.util import *
+
+
+def check_inputs(in1, in2, flags):
+    """
+    Perform checking on the user provided inputs and diagnose any abnormalities
+    """
+    in1_kind, in1_err = classify_input_file(in1)
+    in2_kind, in2_err = classify_input_file(in2)
+    output_file = find_benchmark_flag('--benchmark_out=', flags)
+    output_type = find_benchmark_flag('--benchmark_out_format=', flags)
+    if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
+        print(("WARNING: '--benchmark_out=%s' will be passed to both "
+               "benchmarks causing it to be overwritten") % output_file)
+    if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
+        print("WARNING: passing optional flags has no effect since both "
+              "inputs are JSON")
+    if output_type is not None and output_type != 'json':
+        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
+               " is not supported.") % output_type)
+        sys.exit(1)
+
+
+def create_parser():
+    parser = ArgumentParser(
+        description='versatile benchmark output compare tool')
+
+    parser.add_argument(
+        '-a',
+        '--display_aggregates_only',
+        dest='display_aggregates_only',
+        action="store_true",
+        help="If there are repetitions, by default, we display everything - the"
+             " actual runs, and the aggregates computed. Sometimes, it is "
+             "desirable to only view the aggregates. E.g. when there are a lot "
+             "of repetitions. Do note that only the display is affected. "
+             "Internally, all the actual runs are still used, e.g. for U test.")
+
+    utest = parser.add_argument_group()
+    utest.add_argument(
+        '--no-utest',
+        dest='utest',
+        default=True,
+        action="store_false",
+        help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS))
+    alpha_default = 0.05
+    utest.add_argument(
+        "--alpha",
+        dest='utest_alpha',
+        default=alpha_default,
+        type=float,
+        help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") %
+        alpha_default)
+
+    subparsers = parser.add_subparsers(
+        help='This tool has multiple modes of operation:',
+        dest='mode')
+
+    parser_a = subparsers.add_parser(
+        'benchmarks',
+        help='The most simple use-case, compare all the output of these two benchmarks')
+    baseline = parser_a.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test_baseline',
+        metavar='test_baseline',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    contender = parser_a.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'test_contender',
+        metavar='test_contender',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    parser_a.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    parser_b = subparsers.add_parser(
+        'filters', help='Compare filter one with the filter two of benchmark')
+    baseline = parser_b.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test',
+        metavar='test',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    baseline.add_argument(
+        'filter_baseline',
+        metavar='filter_baseline',
+        type=str,
+        nargs=1,
+        help='The first filter, that will be used as baseline')
+    contender = parser_b.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'filter_contender',
+        metavar='filter_contender',
+        type=str,
+        nargs=1,
+        help='The second filter, that will be compared against the baseline')
+    parser_b.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    parser_c = subparsers.add_parser(
+        'benchmarksfiltered',
+        help='Compare filter one of first benchmark with filter two of the second benchmark')
+    baseline = parser_c.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test_baseline',
+        metavar='test_baseline',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    baseline.add_argument(
+        'filter_baseline',
+        metavar='filter_baseline',
+        type=str,
+        nargs=1,
+        help='The first filter, that will be used as baseline')
+    contender = parser_c.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'test_contender',
+        metavar='test_contender',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='The second benchmark executable or JSON output file, that will be compared against the baseline')
+    contender.add_argument(
+        'filter_contender',
+        metavar='filter_contender',
+        type=str,
+        nargs=1,
+        help='The second filter, that will be compared against the baseline')
+    parser_c.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    return parser
+
+
+def main():
+    # Parse the command line flags
+    parser = create_parser()
+    args, unknown_args = parser.parse_known_args()
+    if args.mode is None:
+        parser.print_help()
+        exit(1)
+    assert not unknown_args
+    benchmark_options = args.benchmark_options
+
+    if args.mode == 'benchmarks':
+        test_baseline = args.test_baseline[0].name
+        test_contender = args.test_contender[0].name
+        filter_baseline = ''
+        filter_contender = ''
+
+        # NOTE: if test_baseline == test_contender, you are analyzing the stdev
+
+        description = 'Comparing %s to %s' % (test_baseline, test_contender)
+    elif args.mode == 'filters':
+        test_baseline = args.test[0].name
+        test_contender = args.test[0].name
+        filter_baseline = args.filter_baseline[0]
+        filter_contender = args.filter_contender[0]
+
+        # NOTE: if filter_baseline == filter_contender, you are analyzing the
+        # stdev
+
+        description = 'Comparing %s to %s (from %s)' % (
+            filter_baseline, filter_contender, args.test[0].name)
+    elif args.mode == 'benchmarksfiltered':
+        test_baseline = args.test_baseline[0].name
+        test_contender = args.test_contender[0].name
+        filter_baseline = args.filter_baseline[0]
+        filter_contender = args.filter_contender[0]
+
+        # NOTE: if test_baseline == test_contender and
+        # filter_baseline == filter_contender, you are analyzing the stdev
+
+        description = 'Comparing %s (from %s) to %s (from %s)' % (
+            filter_baseline, test_baseline, filter_contender, test_contender)
+    else:
+        # should never happen
+        print("Unrecognized mode of operation: '%s'" % args.mode)
+        parser.print_help()
+        exit(1)
+
+    check_inputs(test_baseline, test_contender, benchmark_options)
+
+    if args.display_aggregates_only:
+        benchmark_options += ['--benchmark_display_aggregates_only=true']
+
+    options_baseline = []
+    options_contender = []
+
+    if filter_baseline and filter_contender:
+        options_baseline = ['--benchmark_filter=%s' % filter_baseline]
+        options_contender = ['--benchmark_filter=%s' % filter_contender]
+
+    # Run the benchmarks and report the results
+    json1 = json1_orig = gbench.util.run_or_load_benchmark(
+        test_baseline, benchmark_options + options_baseline)
+    json2 = json2_orig = gbench.util.run_or_load_benchmark(
+        test_contender, benchmark_options + options_contender)
+
+    # Now, filter the benchmarks so that the difference report can work
+    if filter_baseline and filter_contender:
+        replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
+        json1 = gbench.report.filter_benchmark(
+            json1_orig, filter_baseline, replacement)
+        json2 = gbench.report.filter_benchmark(
+            json2_orig, filter_contender, replacement)
+
+    # Diff and output
+    output_lines = gbench.report.generate_difference_report(
+        json1, json2, args.display_aggregates_only,
+        args.utest, args.utest_alpha)
+    print(description)
+    for ln in output_lines:
+        print(ln)
+
+
+class TestParser(unittest.TestCase):
+    def setUp(self):
+        self.parser = create_parser()
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'gbench',
+            'Inputs')
+        self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
+        self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
+
+    def test_benchmarks_basic(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_basic_without_utest(self):
+        parsed = self.parser.parse_args(
+            ['--no-utest', 'benchmarks', self.testInput0, self.testInput1])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertFalse(parsed.utest)
+        self.assertEqual(parsed.utest_alpha, 0.05)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_basic_display_aggregates_only(self):
+        parsed = self.parser.parse_args(
+            ['-a', 'benchmarks', self.testInput0, self.testInput1])
+        self.assertTrue(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_basic_with_utest_alpha(self):
+        parsed = self.parser.parse_args(
+            ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.utest_alpha, 0.314)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_basic_without_utest_with_utest_alpha(self):
+        parsed = self.parser.parse_args(
+            ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertFalse(parsed.utest)
+        self.assertEqual(parsed.utest_alpha, 0.314)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1, 'd'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.benchmark_options, ['d'])
+
+    def test_benchmarks_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.benchmark_options, ['e'])
+
+    def test_filters_basic(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_filters_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd', 'e'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.benchmark_options, ['e'])
+
+    def test_filters_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd', '--', 'f'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.benchmark_options, ['f'])
+
+    def test_benchmarksfiltered_basic(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarksfiltered_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.benchmark_options[0], 'f')
+
+    def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.benchmark_options[0], 'g')
+
+
+if __name__ == '__main__':
+    # unittest.main()
+    main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/Utilities/Scripts/gbench/init.py
+++ b/Utilities/Scripts/gbench/init.py
@ -0,0 +1,8 @@
+"""Google Benchmark tooling"""
+
+__author__ = 'Eric Fiselier'
+__email__ = 'eric@efcs.ca'
+__versioninfo__ = (0, 5, 0)
+__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
+
+__all__ = []
--- a/Utilities/Scripts/gbench/report.py
+++ b/Utilities/Scripts/gbench/report.py
@ -0,0 +1,541 @@
+import unittest
+"""report.py - Utilities for reporting statistics about benchmark results
+"""
+import os
+import re
+import copy
+
+from scipy.stats import mannwhitneyu
+
+
+class BenchmarkColor(object):
+    def __init__(self, name, code):
+        self.name = name
+        self.code = code
+
+    def __repr__(self):
+        return '%s%r' % (self.__class__.__name__,
+                         (self.name, self.code))
+
+    def __format__(self, format):
+        return self.code
+
+
+# Benchmark Colors Enumeration
+BC_NONE = BenchmarkColor('NONE', '')
+BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
+BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
+BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
+BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
+BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
+BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
+BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
+BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
+BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
+BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
+BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
+
+UTEST_MIN_REPETITIONS = 2
+UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
+UTEST_COL_NAME = "_pvalue"
+
+
+def color_format(use_color, fmt_str, *args, **kwargs):
+    """
+    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
+    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
+    is False then all color codes in 'args' and 'kwargs' are replaced with
+    the empty string.
+    """
+    assert use_color is True or use_color is False
+    if not use_color:
+        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                for arg in args]
+        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                  for key, arg in kwargs.items()}
+    return fmt_str.format(*args, **kwargs)
+
+
+def find_longest_name(benchmark_list):
+    """
+    Return the length of the longest benchmark name in a given list of
+    benchmark JSON objects
+    """
+    longest_name = 1
+    for bc in benchmark_list:
+        if len(bc['name']) > longest_name:
+            longest_name = len(bc['name'])
+    return longest_name
+
+
+def calculate_change(old_val, new_val):
+    """
+    Return a float representing the decimal change between old_val and new_val.
+    """
+    if old_val == 0 and new_val == 0:
+        return 0.0
+    if old_val == 0:
+        return float(new_val - old_val) / (float(old_val + new_val) / 2)
+    return float(new_val - old_val) / abs(old_val)
+
+
+def filter_benchmark(json_orig, family, replacement=""):
+    """
+    Apply a filter to the json, and only leave the 'family' of benchmarks.
+    """
+    regex = re.compile(family)
+    filtered = {}
+    filtered['benchmarks'] = []
+    for be in json_orig['benchmarks']:
+        if not regex.search(be['name']):
+            continue
+        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
+        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
+        filtered['benchmarks'].append(filteredbench)
+    return filtered
+
+
+def get_unique_benchmark_names(json):
+    """
+    While *keeping* the order, give all the unique 'names' used for benchmarks.
+    """
+    seen = set()
+    uniqued = [x['name'] for x in json['benchmarks']
+               if x['name'] not in seen and
+               (seen.add(x['name']) or True)]
+    return uniqued
+
+
+def intersect(list1, list2):
+    """
+    Given two lists, get a new list consisting of the elements only contained
+    in *both of the input lists*, while preserving the ordering.
+    """
+    return [x for x in list1 if x in list2]
+
+
+def is_potentially_comparable_benchmark(x):
+    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
+
+
+def partition_benchmarks(json1, json2):
+    """
+    While preserving the ordering, find benchmarks with the same names in
+    both of the inputs, and group them.
+    (i.e. partition/filter into groups with common name)
+    """
+    json1_unique_names = get_unique_benchmark_names(json1)
+    json2_unique_names = get_unique_benchmark_names(json2)
+    names = intersect(json1_unique_names, json2_unique_names)
+    partitions = []
+    for name in names:
+        time_unit = None
+        # Pick the time unit from the first entry of the lhs benchmark.
+        # We should be careful not to crash with unexpected input.
+        for x in json1['benchmarks']:
+            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
+                time_unit = x['time_unit']
+                break
+        if time_unit is None:
+            continue
+        # Filter by name and time unit.
+        # All the repetitions are assumed to be comparable.
+        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
+               x['time_unit'] == time_unit]
+        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
+               x['time_unit'] == time_unit]
+        partitions.append([lhs, rhs])
+    return partitions
+
+
+def extract_field(partition, field_name):
+    # The count of elements may be different. We want *all* of them.
+    lhs = [x[field_name] for x in partition[0]]
+    rhs = [x[field_name] for x in partition[1]]
+    return [lhs, rhs]
+
+def calc_utest(timings_cpu, timings_time):
+    min_rep_cnt = min(len(timings_time[0]),
+                      len(timings_time[1]),
+                      len(timings_cpu[0]),
+                      len(timings_cpu[1]))
+
+    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
+    if min_rep_cnt < UTEST_MIN_REPETITIONS:
+        return False, None, None
+
+    time_pvalue = mannwhitneyu(
+        timings_time[0], timings_time[1], alternative='two-sided').pvalue
+    cpu_pvalue = mannwhitneyu(
+        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
+
+    return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
+
+def print_utest(partition, utest_alpha, first_col_width, use_color=True):
+    def get_utest_color(pval):
+        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
+
+    timings_time = extract_field(partition, 'real_time')
+    timings_cpu = extract_field(partition, 'cpu_time')
+    have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
+
+    # Check if we failed miserably with minimum required repetitions for utest
+    if not have_optimal_repetitions and cpu_pvalue is None and time_pvalue is None:
+        return []
+
+    dsc = "U Test, Repetitions: {} vs {}".format(
+        len(timings_cpu[0]), len(timings_cpu[1]))
+    dsc_color = BC_OKGREEN
+
+    # We still got some results to show but issue a warning about it.
+    if not have_optimal_repetitions:
+        dsc_color = BC_WARNING
+        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
+            UTEST_OPTIMAL_REPETITIONS)
+
+    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
+
+    last_name = partition[0][0]['name']
+    return [color_format(use_color,
+                         special_str,
+                         BC_HEADER,
+                         "{}{}".format(last_name, UTEST_COL_NAME),
+                         first_col_width,
+                         get_utest_color(time_pvalue), time_pvalue,
+                         get_utest_color(cpu_pvalue), cpu_pvalue,
+                         dsc_color, dsc,
+                         endc=BC_ENDC)]
+
+
+def generate_difference_report(
+        json1,
+        json2,
+        display_aggregates_only=False,
+        utest=False,
+        utest_alpha=0.05,
+        use_color=True):
+    """
+    Calculate and report the difference between each test of two benchmarks
+    runs specified as 'json1' and 'json2'.
+    """
+    assert utest is True or utest is False
+    first_col_width = find_longest_name(json1['benchmarks'])
+
+    def find_test(name):
+        for b in json2['benchmarks']:
+            if b['name'] == name:
+                return b
+        return None
+
+    first_col_width = max(
+        first_col_width,
+        len('Benchmark'))
+    first_col_width += len(UTEST_COL_NAME)
+    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
+        'Benchmark', 12 + first_col_width)
+    output_strs = [first_line, '-' * len(first_line)]
+
+    partitions = partition_benchmarks(json1, json2)
+    for partition in partitions:
+        # Careful, we may have different repetition count.
+        for i in range(min(len(partition[0]), len(partition[1]))):
+            bn = partition[0][i]
+            other_bench = partition[1][i]
+
+            # *If* we were asked to only display aggregates,
+            # and if it is non-aggregate, then skip it.
+            if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench:
+                assert bn['run_type'] == other_bench['run_type']
+                if bn['run_type'] != 'aggregate':
+                    continue
+
+            fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
+
+            def get_color(res):
+                if res > 0.05:
+                    return BC_FAIL
+                elif res > -0.07:
+                    return BC_WHITE
+                else:
+                    return BC_CYAN
+
+            tres = calculate_change(bn['real_time'], other_bench['real_time'])
+            cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
+            output_strs += [color_format(use_color,
+                                         fmt_str,
+                                         BC_HEADER,
+                                         bn['name'],
+                                         first_col_width,
+                                         get_color(tres),
+                                         tres,
+                                         get_color(cpures),
+                                         cpures,
+                                         bn['real_time'],
+                                         other_bench['real_time'],
+                                         bn['cpu_time'],
+                                         other_bench['cpu_time'],
+                                         endc=BC_ENDC)]
+
+        # After processing the whole partition, if requested, do the U test.
+        if utest:
+            output_strs += print_utest(partition,
+                                       utest_alpha=utest_alpha,
+                                       first_col_width=first_col_width,
+                                       use_color=use_color)
+
+    return output_strs
+
+
+###############################################################################
+# Unit tests
+
+
+class TestGetUniqueBenchmarkNames(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput = os.path.join(testInputs, 'test3_run0.json')
+        with open(testOutput, 'r') as f:
+            json = json.load(f)
+        return json
+
+    def test_basic(self):
+        expect_lines = [
+            'BM_One',
+            'BM_Two',
+            'short',  # These two are not sorted
+            'medium',  # These two are not sorted
+        ]
+        json = self.load_results()
+        output_lines = get_unique_benchmark_names(json)
+        print("\n")
+        print("\n".join(output_lines))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            self.assertEqual(expect_lines[i], output_lines[i])
+
+
+class TestReportDifference(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test1_run1.json')
+        testOutput2 = os.path.join(testInputs, 'test1_run2.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_basic(self):
+        expect_lines = [
+            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
+            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
+            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
+            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
+            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
+            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
+            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
+            ['BM_100xSlower', '+99.0000', '+99.0000',
+                '100', '10000', '100', '10000'],
+            ['BM_100xFaster', '-0.9900', '-0.9900',
+                '10000', '100', '10000', '100'],
+            ['BM_10PercentCPUToTime', '+0.1000',
+                '-0.1000', '100', '110', '100', '90'],
+            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
+            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
+        ]
+        json1, json2 = self.load_results()
+        output_lines_with_header = generate_difference_report(
+            json1, json2, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 7)
+            self.assertEqual(expect_lines[i], parts)
+
+
+class TestReportDifferenceBetweenFamilies(unittest.TestCase):
+    def load_result(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput = os.path.join(testInputs, 'test2_run.json')
+        with open(testOutput, 'r') as f:
+            json = json.load(f)
+        return json
+
+    def test_basic(self):
+        expect_lines = [
+            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
+            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
+            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
+            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
+        ]
+        json = self.load_result()
+        json1 = filter_benchmark(json, "BM_Z.ro", ".")
+        json2 = filter_benchmark(json, "BM_O.e", ".")
+        output_lines_with_header = generate_difference_report(
+            json1, json2, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 7)
+            self.assertEqual(expect_lines[i], parts)
+
+
+class TestReportDifferenceWithUTest(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test3_run0.json')
+        testOutput2 = os.path.join(testInputs, 'test3_run1.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_utest(self):
+        expect_lines = []
+        expect_lines = [
+            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
+            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
+            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
+            ['BM_Two_pvalue',
+             '0.6985',
+             '0.6985',
+             'U',
+             'Test,',
+             'Repetitions:',
+             '2',
+             'vs',
+             '2.',
+             'WARNING:',
+             'Results',
+             'unreliable!',
+             '9+',
+             'repetitions',
+             'recommended.'],
+            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
+            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
+            ['short_pvalue',
+             '0.7671',
+             '0.1489',
+             'U',
+             'Test,',
+             'Repetitions:',
+             '2',
+             'vs',
+             '3.',
+             'WARNING:',
+             'Results',
+             'unreliable!',
+             '9+',
+             'repetitions',
+             'recommended.'],
+            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
+        ]
+        json1, json2 = self.load_results()
+        output_lines_with_header = generate_difference_report(
+            json1, json2, utest=True, utest_alpha=0.05, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(expect_lines[i], parts)
+
+
+class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
+        unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test3_run0.json')
+        testOutput2 = os.path.join(testInputs, 'test3_run1.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_utest(self):
+        expect_lines = []
+        expect_lines = [
+            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
+            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
+            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
+            ['BM_Two_pvalue',
+             '0.6985',
+             '0.6985',
+             'U',
+             'Test,',
+             'Repetitions:',
+             '2',
+             'vs',
+             '2.',
+             'WARNING:',
+             'Results',
+             'unreliable!',
+             '9+',
+             'repetitions',
+             'recommended.'],
+            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
+            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
+            ['short_pvalue',
+             '0.7671',
+             '0.1489',
+             'U',
+             'Test,',
+             'Repetitions:',
+             '2',
+             'vs',
+             '3.',
+             'WARNING:',
+             'Results',
+             'unreliable!',
+             '9+',
+             'repetitions',
+             'recommended.'],
+        ]
+        json1, json2 = self.load_results()
+        output_lines_with_header = generate_difference_report(
+            json1, json2, display_aggregates_only=True,
+            utest=True, utest_alpha=0.05, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(expect_lines[i], parts)
+
+
+if __name__ == '__main__':
+    unittest.main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/Utilities/Scripts/gbench/util.py
+++ b/Utilities/Scripts/gbench/util.py
@ -0,0 +1,164 @@
+"""util.py - General utilities for running, loading, and processing benchmarks
+"""
+import json
+import os
+import tempfile
+import subprocess
+import sys
+
+# Input file type enumeration
+IT_Invalid = 0
+IT_JSON = 1
+IT_Executable = 2
+
+_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
+
+
+def is_executable_file(filename):
+    """
+    Return 'True' if 'filename' names a valid file which is likely
+    an executable. A file is considered an executable if it starts with the
+    magic bytes for a EXE, Mach O, or ELF file.
+    """
+    if not os.path.isfile(filename):
+        return False
+    with open(filename, mode='rb') as f:
+        magic_bytes = f.read(_num_magic_bytes)
+    if sys.platform == 'darwin':
+        return magic_bytes in [
+            b'\xfe\xed\xfa\xce',  # MH_MAGIC
+            b'\xce\xfa\xed\xfe',  # MH_CIGAM
+            b'\xfe\xed\xfa\xcf',  # MH_MAGIC_64
+            b'\xcf\xfa\xed\xfe',  # MH_CIGAM_64
+            b'\xca\xfe\xba\xbe',  # FAT_MAGIC
+            b'\xbe\xba\xfe\xca'   # FAT_CIGAM
+        ]
+    elif sys.platform.startswith('win'):
+        return magic_bytes == b'MZ'
+    else:
+        return magic_bytes == b'\x7FELF'
+
+
+def is_json_file(filename):
+    """
+    Returns 'True' if 'filename' names a valid JSON output file.
+    'False' otherwise.
+    """
+    try:
+        with open(filename, 'r') as f:
+            json.load(f)
+        return True
+    except BaseException:
+        pass
+    return False
+
+
+def classify_input_file(filename):
+    """
+    Return a tuple (type, msg) where 'type' specifies the classified type
+    of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
+    string represeting the error.
+    """
+    ftype = IT_Invalid
+    err_msg = None
+    if not os.path.exists(filename):
+        err_msg = "'%s' does not exist" % filename
+    elif not os.path.isfile(filename):
+        err_msg = "'%s' does not name a file" % filename
+    elif is_executable_file(filename):
+        ftype = IT_Executable
+    elif is_json_file(filename):
+        ftype = IT_JSON
+    else:
+        err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename
+    return ftype, err_msg
+
+
+def check_input_file(filename):
+    """
+    Classify the file named by 'filename' and return the classification.
+    If the file is classified as 'IT_Invalid' print an error message and exit
+    the program.
+    """
+    ftype, msg = classify_input_file(filename)
+    if ftype == IT_Invalid:
+        print("Invalid input file: %s" % msg)
+        sys.exit(1)
+    return ftype
+
+
+def find_benchmark_flag(prefix, benchmark_flags):
+    """
+    Search the specified list of flags for a flag matching `<prefix><arg>` and
+    if it is found return the arg it specifies. If specified more than once the
+    last value is returned. If the flag is not found None is returned.
+    """
+    assert prefix.startswith('--') and prefix.endswith('=')
+    result = None
+    for f in benchmark_flags:
+        if f.startswith(prefix):
+            result = f[len(prefix):]
+    return result
+
+
+def remove_benchmark_flags(prefix, benchmark_flags):
+    """
+    Return a new list containing the specified benchmark_flags except those
+    with the specified prefix.
+    """
+    assert prefix.startswith('--') and prefix.endswith('=')
+    return [f for f in benchmark_flags if not f.startswith(prefix)]
+
+
+def load_benchmark_results(fname):
+    """
+    Read benchmark output from a file and return the JSON object.
+    REQUIRES: 'fname' names a file containing JSON benchmark output.
+    """
+    with open(fname, 'r') as f:
+        return json.load(f)
+
+
+def run_benchmark(exe_name, benchmark_flags):
+    """
+    Run a benchmark specified by 'exe_name' with the specified
+    'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
+    real time console output.
+    RETURNS: A JSON object representing the benchmark output
+    """
+    output_name = find_benchmark_flag('--benchmark_out=',
+                                      benchmark_flags)
+    is_temp_output = False
+    if output_name is None:
+        is_temp_output = True
+        thandle, output_name = tempfile.mkstemp()
+        os.close(thandle)
+        benchmark_flags = list(benchmark_flags) + \
+            ['--benchmark_out=%s' % output_name]
+
+    cmd = [exe_name] + benchmark_flags
+    print("RUNNING: %s" % ' '.join(cmd))
+    exitCode = subprocess.call(cmd)
+    if exitCode != 0:
+        print('TEST FAILED...')
+        sys.exit(exitCode)
+    json_res = load_benchmark_results(output_name)
+    if is_temp_output:
+        os.unlink(output_name)
+    return json_res
+
+
+def run_or_load_benchmark(filename, benchmark_flags):
+    """
+    Get the results for a specified benchmark. If 'filename' specifies
+    an executable benchmark then the results are generated by running the
+    benchmark. Otherwise 'filename' must name a valid JSON output file,
+    which is loaded and the result returned.
+    """
+    ftype = check_input_file(filename)
+    if ftype == IT_JSON:
+        return load_benchmark_results(filename)
+    elif ftype == IT_Executable:
+        return run_benchmark(filename, benchmark_flags)
+    else:
+        assert False  # This branch is unreachable
--- a/Utilities/Scripts/strip_asm.py
+++ b/Utilities/Scripts/strip_asm.py
@ -0,0 +1,151 @@
+#!/usr/bin/env python
+
+"""
+strip_asm.py - Cleanup ASM output for the specified file
+"""
+
+from argparse import ArgumentParser
+import sys
+import os
+import re
+
+def find_used_labels(asm):
+    found = set()
+    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
+    for l in asm.splitlines():
+        m = label_re.match(l)
+        if m:
+            found.add('.L%s' % m.group(1))
+    return found
+
+
+def normalize_labels(asm):
+    decls = set()
+    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+    for l in asm.splitlines():
+        m = label_decl.match(l)
+        if m:
+            decls.add(m.group(0))
+    if len(decls) == 0:
+        return asm
+    needs_dot = next(iter(decls))[0] != '.'
+    if not needs_dot:
+        return asm
+    for ld in decls:
+        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
+    return asm
+
+
+def transform_labels(asm):
+    asm = normalize_labels(asm)
+    used_decls = find_used_labels(asm)
+    new_asm = ''
+    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+    for l in asm.splitlines():
+        m = label_decl.match(l)
+        if not m or m.group(0) in used_decls:
+            new_asm += l
+            new_asm += '\n'
+    return new_asm
+
+
+def is_identifier(tk):
+    if len(tk) == 0:
+        return False
+    first = tk[0]
+    if not first.isalpha() and first != '_':
+        return False
+    for i in range(1, len(tk)):
+        c = tk[i]
+        if not c.isalnum() and c != '_':
+            return False
+    return True
+
+def process_identifiers(l):
+    """
+    process_identifiers - process all identifiers and modify them to have
+    consistent names across all platforms; specifically across ELF and MachO.
+    For example, MachO inserts an additional understore at the beginning of
+    names. This function removes that.
+    """
+    parts = re.split(r'([a-zA-Z0-9_]+)', l)
+    new_line = ''
+    for tk in parts:
+        if is_identifier(tk):
+            if tk.startswith('__Z'):
+                tk = tk[1:]
+            elif tk.startswith('_') and len(tk) > 1 and \
+                    tk[1].isalpha() and tk[1] != 'Z':
+                tk = tk[1:]
+        new_line += tk
+    return new_line
+
+
+def process_asm(asm):
+    """
+    Strip the ASM of unwanted directives and lines
+    """
+    new_contents = ''
+    asm = transform_labels(asm)
+
+    # TODO: Add more things we want to remove
+    discard_regexes = [
+        re.compile("\s+\..*$"), # directive
+        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
+        re.compile("\s*#.*$"), # comment line
+        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
+        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
+    ]
+    keep_regexes = [
+
+    ]
+    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
+    for l in asm.splitlines():
+        # Remove Mach-O attribute
+        l = l.replace('@GOTPCREL', '')
+        add_line = True
+        for reg in discard_regexes:
+            if reg.match(l) is not None:
+                add_line = False
+                break
+        for reg in keep_regexes:
+            if reg.match(l) is not None:
+                add_line = True
+                break
+        if add_line:
+            if fn_label_def.match(l) and len(new_contents) != 0:
+                new_contents += '\n'
+            l = process_identifiers(l)
+            new_contents += l
+            new_contents += '\n'
+    return new_contents
+
+def main():
+    parser = ArgumentParser(
+        description='generate a stripped assembly file')
+    parser.add_argument(
+        'input', metavar='input', type=str, nargs=1,
+        help='An input assembly file')
+    parser.add_argument(
+        'out', metavar='output', type=str, nargs=1,
+        help='The output file')
+    args, unknown_args = parser.parse_known_args()
+    input = args.input[0]
+    output = args.out[0]
+    if not os.path.isfile(input):
+        print(("ERROR: input file '%s' does not exist") % input)
+        sys.exit(1)
+    contents = None
+    with open(input, 'r') as f:
+        contents = f.read()
+    new_contents = process_asm(contents)
+    with open(output, 'w') as f:
+        f.write(new_contents)
+
+
+if __name__ == '__main__':
+    main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/Utilities/SetupForDevelopment.sh
+++ b/Utilities/SetupForDevelopment.sh
@ -3,6 +3,7 @@
 cd "${BASH_SOURCE%/*}/.." &&
 Utilities/GitSetup/setup-user && echo &&
 Utilities/GitSetup/setup-hooks && echo &&
+Utilities/GitSetup/setup-lfs && echo &&
 (Utilities/GitSetup/setup-upstream ||
 echo 'Failed to setup origin.  Run this again to retry.') && echo &&
 (Utilities/GitSetup/setup-gitlab ||
@ -27,3 +28,6 @@ echo "Set up git gitlab-push" &&
 git config alias.gitlab-sync '!bash Utilities/GitSetup/git-gitlab-sync' &&
 echo "Set up git gitlab-sync" &&
 true
+
+SetupForDevelopment=1
+git config hooks.SetupForDevelopment ${SetupForDevelopment_VERSION}
--- a/Utilities/hooks/pre-commit
+++ b/Utilities/hooks/pre-commit
@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+die() {
+  echo 'pre-commit hook failure' 1>&2
+  echo '-----------------------' 1>&2
+  echo '' 1>&2
+  echo "$@" 1>&2
+  exit 1
+}
+
+#-----------------------------------------------------------------------------
+
+# Check that development setup is up-to-date.
+lastSetupForDevelopment=$(git config --get hooks.SetupForDevelopment || echo 0)
+eval $(grep '^SetupForDevelopment_VERSION=' "${BASH_SOURCE%/*}/../SetupForDevelopment.sh")
+test -n "$SetupForDevelopment_VERSION" || SetupForDevelopment_VERSION=0
+if test $lastSetupForDevelopment -lt $SetupForDevelopment_VERSION; then
+  die 'Developer setup in this work tree is out of date.  Please re-run
+
+  Utilities/SetupForDevelopment.sh
+'
+fi
--- a/Utilities/hooks/pre-push
+++ b/Utilities/hooks/pre-push
@ -0,0 +1,3 @@
+#!/bin/sh
+command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path.\n"; exit 2; }
+git lfs pre-push "$@"
--- a/Utilities/update-gitsetup.bash
+++ b/Utilities/update-gitsetup.bash
@ -18,6 +18,7 @@ README
 git-gitlab-push
 setup-gitlab
 setup-hooks
+setup-lfs
 setup-ssh
 setup-upstream
 setup-user
--- a/benchmarking/BenchmarkArrayTransfer.cxx
+++ b/benchmarking/BenchmarkArrayTransfer.cxx
--- a/benchmarking/BenchmarkAtomicArray.cxx
+++ b/benchmarking/BenchmarkAtomicArray.cxx
--- a/benchmarking/BenchmarkCopySpeeds.cxx
+++ b/benchmarking/BenchmarkCopySpeeds.cxx
@ -8,23 +8,19 @@
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================

-#include <vtkm/TypeTraits.h>
-
 #include "Benchmarker.h"

+#include <vtkm/cont/Algorithm.h>
 #include <vtkm/cont/DeviceAdapter.h>
-#include <vtkm/cont/DeviceAdapterAlgorithm.h>
-#include <vtkm/cont/ErrorBadAllocation.h>
+#include <vtkm/cont/RuntimeDeviceTracker.h>
 #include <vtkm/cont/Timer.h>

-#include <vtkm/cont/serial/DeviceAdapterSerial.h>
-
 #include <vtkm/internal/Configure.h>

 #include <vtkm/testing/Testing.h>

-#include <iomanip>
-#include <iostream>
+#include <vtkm/List.h>
+
 #include <sstream>

 #ifdef VTKM_ENABLE_TBB
@ -34,145 +30,90 @@
 // For the TBB implementation, the number of threads can be customized using a
 // "NumThreads [numThreads]" argument.

-namespace vtkm
-{
-namespace benchmarking
-{
-
-const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
-const vtkm::UInt64 COPY_SIZE_MAX = (1 << 29); // 512 MiB
-const vtkm::UInt64 COPY_SIZE_INC = 1;         // Used as 'size <<= INC'
-
-const size_t COL_WIDTH = 32;
-
-template <typename ValueType, typename DeviceAdapter>
-struct MeasureCopySpeed
-{
-  using Algo = vtkm::cont::Algorithm;
-
-  vtkm::cont::ArrayHandle<ValueType> Source;
-  vtkm::cont::ArrayHandle<ValueType> Destination;
-  vtkm::UInt64 NumBytes;
-
-  VTKM_CONT
-  MeasureCopySpeed(vtkm::UInt64 bytes)
-    : NumBytes(bytes)
-  {
-    vtkm::Id numValues = static_cast<vtkm::Id>(bytes / sizeof(ValueType));
-    this->Source.Allocate(numValues);
-  }
-
-  VTKM_CONT vtkm::Float64 operator()()
-  {
-    vtkm::cont::Timer timer{ DeviceAdapter() };
-    timer.Start();
-    Algo::Copy(this->Source, this->Destination);
-
-    return timer.GetElapsedTime();
-  }
-
-  VTKM_CONT std::string Description() const
-  {
-    vtkm::UInt64 actualSize = sizeof(ValueType);
-    actualSize *= static_cast<vtkm::UInt64>(this->Source.GetNumberOfValues());
-    std::ostringstream out;
-    out << "Copying " << vtkm::cont::GetHumanReadableSize(this->NumBytes)
-        << " (actual=" << vtkm::cont::GetHumanReadableSize(actualSize) << ") of "
-        << vtkm::testing::TypeName<ValueType>::Name() << "\n";
-    return out.str();
-  }
-};
-
-void PrintRow(std::ostream& out, const std::string& label, const std::string& data)
-{
-  out << "| " << std::setw(COL_WIDTH) << label << " | " << std::setw(COL_WIDTH) << data << " |"
-      << std::endl;
-}
-
-void PrintDivider(std::ostream& out)
-{
-  const std::string fillStr(COL_WIDTH, '-');
-
-  out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl;
-}
-
-template <typename ValueType, typename DeviceAdapter>
-void BenchmarkValueType(vtkm::cont::DeviceAdapterId id)
-{
-  PrintRow(std::cout, vtkm::testing::TypeName<ValueType>::Name(), id.GetName());
-
-  PrintDivider(std::cout);
-
-  Benchmarker bench(15, 100);
-  for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC)
-  {
-    MeasureCopySpeed<ValueType, DeviceAdapter> functor(size);
-    bench.Reset();
-
-    std::string speedStr;
-
-    try
-    {
-      bench.GatherSamples(functor);
-      vtkm::Float64 speed = static_cast<Float64>(size) / stats::Mean(bench.GetSamples());
-      speedStr = vtkm::cont::GetHumanReadableSize(static_cast<UInt64>(speed)) + std::string("/s");
-    }
-    catch (vtkm::cont::ErrorBadAllocation&)
-    {
-      speedStr = "[allocation too large]";
-    }
-
-    PrintRow(std::cout, vtkm::cont::GetHumanReadableSize(size), speedStr);
-  }
-
-  std::cout << "\n";
-}
-}
-} // end namespace vtkm::benchmarking
-
 namespace
 {
-using namespace vtkm::benchmarking;

-struct BenchmarkValueTypeFunctor
+// Make this global so benchmarks can access the current device id:
+vtkm::cont::InitializeResult Config;
+
+const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB
+const vtkm::UInt64 COPY_SIZE_MAX = (1 << 30); // 1 GiB
+
+using TypeList = vtkm::List<vtkm::UInt8,
+                            vtkm::Vec2ui_8,
+                            vtkm::Vec3ui_8,
+                            vtkm::Vec4ui_8,
+                            vtkm::UInt32,
+                            vtkm::Vec2ui_32,
+                            vtkm::UInt64,
+                            vtkm::Vec2ui_64,
+                            vtkm::Float32,
+                            vtkm::Vec2f_32,
+                            vtkm::Float64,
+                            vtkm::Vec2f_64,
+                            vtkm::Pair<vtkm::UInt32, vtkm::Float32>,
+                            vtkm::Pair<vtkm::UInt32, vtkm::Float64>,
+                            vtkm::Pair<vtkm::UInt64, vtkm::Float32>,
+                            vtkm::Pair<vtkm::UInt64, vtkm::Float64>>;
+
+template <typename ValueType>
+void CopySpeed(benchmark::State& state)
 {
-  template <typename DeviceAdapter>
-  bool operator()(DeviceAdapter id)
+  const vtkm::cont::DeviceAdapterId device = Config.Device;
+  const vtkm::UInt64 numBytes = static_cast<vtkm::UInt64>(state.range(0));
+  const vtkm::Id numValues = static_cast<vtkm::Id>(numBytes / sizeof(ValueType));
+
+  state.SetLabel(vtkm::cont::GetHumanReadableSize(numBytes));
+
+  vtkm::cont::ArrayHandle<ValueType> src;
+  vtkm::cont::ArrayHandle<ValueType> dst;
+  src.Allocate(numValues);
+  dst.Allocate(numValues);
+
+  vtkm::cont::Timer timer(device);
+  for (auto _ : state)
  {
-    BenchmarkValueType<vtkm::UInt8, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Vec2ui_8, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Vec3ui_8, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Vec4ui_8, DeviceAdapter>(id);
+    (void)_;
+    timer.Start();
+    vtkm::cont::Algorithm::Copy(device, src, dst);
+    timer.Stop();

-    BenchmarkValueType<vtkm::UInt32, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Vec2ui_32, DeviceAdapter>(id);
-
-    BenchmarkValueType<vtkm::UInt64, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Vec2ui_64, DeviceAdapter>(id);
-
-    BenchmarkValueType<vtkm::Float32, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Vec2f_32, DeviceAdapter>(id);
-
-    BenchmarkValueType<vtkm::Float64, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Vec2f_64, DeviceAdapter>(id);
-
-    BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float32>, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Pair<vtkm::UInt32, vtkm::Float64>, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float32>, DeviceAdapter>(id);
-    BenchmarkValueType<vtkm::Pair<vtkm::UInt64, vtkm::Float64>, DeviceAdapter>(id);
-
-    return true;
+    state.SetIterationTime(timer.GetElapsedTime());
  }
-};
+
+  const int64_t iterations = static_cast<int64_t>(state.iterations());
+  state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
+  state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
 }
+VTKM_BENCHMARK_TEMPLATES_OPTS(CopySpeed,
+                                ->Range(COPY_SIZE_MIN, COPY_SIZE_MAX)
+                                ->ArgName("Bytes"),
+                              TypeList);
+
+} // end anon namespace

 int main(int argc, char* argv[])
 {
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice |
-    vtkm::cont::InitializeOptions::ErrorOnBadOption | vtkm::cont::InitializeOptions::AddHelp;
-  auto config = vtkm::cont::Initialize(argc, argv, opts);
+  // Parse VTK-m options:
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);

+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }
+
+// Handle NumThreads command-line arg:
 #ifdef VTKM_ENABLE_TBB
  int numThreads = tbb::task_scheduler_init::automatic;
 #endif // TBB
@ -196,6 +137,6 @@ int main(int argc, char* argv[])
  tbb::task_scheduler_init init(numThreads);
 #endif // TBB

-  BenchmarkValueTypeFunctor functor;
-  vtkm::cont::TryExecuteOnDevice(config.Device, functor);
+  // handle benchmarking related args and run benchmarks:
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkDeviceAdapter.cxx
+++ b/benchmarking/BenchmarkDeviceAdapter.cxx
--- a/benchmarking/BenchmarkFieldAlgorithms.cxx
+++ b/benchmarking/BenchmarkFieldAlgorithms.cxx
--- a/benchmarking/BenchmarkFilters.cxx
+++ b/benchmarking/BenchmarkFilters.cxx
--- a/benchmarking/BenchmarkODEIntegrators.cxx
+++ b/benchmarking/BenchmarkODEIntegrators.cxx
@ -0,0 +1,97 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+
+#include "Benchmarker.h"
+
+#include <vtkm/cont/DataSet.h>
+#include <vtkm/cont/DataSetBuilderUniform.h>
+#include <vtkm/cont/ErrorInternal.h>
+#include <vtkm/cont/Logging.h>
+#include <vtkm/cont/RuntimeDeviceTracker.h>
+#include <vtkm/cont/Timer.h>
+#include <vtkm/cont/internal/OptionParser.h>
+#include <vtkm/filter/ParticleAdvection.h>
+#include <vtkm/worklet/particleadvection/EulerIntegrator.h>
+#include <vtkm/worklet/particleadvection/RK4Integrator.h>
+#ifdef VTKM_ENABLE_TBB
+#include <tbb/task_scheduler_init.h>
+#endif
+#ifdef VTKM_ENABLE_OPENMP
+#include <omp.h>
+#endif
+
+
+namespace
+{
+// Hold configuration state (e.g. active device):
+vtkm::cont::InitializeResult Config;
+
+// Wrapper around RK4:
+void BenchParticleAdvection(::benchmark::State& state)
+{
+  const vtkm::cont::DeviceAdapterId device = Config.Device;
+  const vtkm::Id3 dims(5, 5, 5);
+  const vtkm::Vec3f vecX(1, 0, 0);
+
+  vtkm::Id numPoints = dims[0] * dims[1] * dims[2];
+
+  std::vector<vtkm::Vec3f> vectorField(static_cast<std::size_t>(numPoints));
+  for (std::size_t i = 0; i < static_cast<std::size_t>(numPoints); i++)
+    vectorField[i] = vecX;
+
+  vtkm::cont::DataSetBuilderUniform dataSetBuilder;
+
+  vtkm::cont::DataSet ds = dataSetBuilder.Create(dims);
+  ds.AddPointField("vector", vectorField);
+
+  vtkm::cont::ArrayHandle<vtkm::Particle> seedArray =
+    vtkm::cont::make_ArrayHandle({ vtkm::Particle(vtkm::Vec3f(.2f, 1.0f, .2f), 0),
+                                   vtkm::Particle(vtkm::Vec3f(.2f, 2.0f, .2f), 1),
+                                   vtkm::Particle(vtkm::Vec3f(.2f, 3.0f, .2f), 2) });
+
+  vtkm::filter::ParticleAdvection particleAdvection;
+
+  particleAdvection.SetStepSize(vtkm::FloatDefault(1) / state.range(0));
+  particleAdvection.SetNumberOfSteps(static_cast<vtkm::Id>(state.range(0)));
+  particleAdvection.SetSeeds(seedArray);
+  particleAdvection.SetActiveField("vector");
+  vtkm::cont::Timer timer{ device };
+  for (auto _ : state)
+  {
+    (void)_;
+    timer.Start();
+    auto output = particleAdvection.Execute(ds);
+    ::benchmark::DoNotOptimize(output);
+    timer.Stop();
+
+    state.SetIterationTime(timer.GetElapsedTime());
+  }
+  state.SetComplexityN(state.range(0));
+}
+VTKM_BENCHMARK_OPTS(BenchParticleAdvection,
+                      ->RangeMultiplier(2)
+                      ->Range(32, 4096)
+                      ->ArgName("Steps")
+                      ->Complexity());
+
+} // end anon namespace
+
+int main(int argc, char* argv[])
+{
+  auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+  if (opts != vtkm::cont::InitializeOptions::None)
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
+}
--- a/benchmarking/BenchmarkRayTracing.cxx
+++ b/benchmarking/BenchmarkRayTracing.cxx
@ -14,6 +14,7 @@

 #include <vtkm/cont/ArrayHandle.h>
 #include <vtkm/cont/DeviceAdapterAlgorithm.h>
+#include <vtkm/cont/Initialize.h>
 #include <vtkm/cont/Timer.h>
 #include <vtkm/cont/testing/MakeTestDataSet.h>

@ -25,120 +26,112 @@

 #include <vtkm/exec/FunctorBase.h>

-#include <vtkm/cont/ColorTable.hxx>
-
 #include <sstream>
 #include <string>
 #include <vector>

-using namespace vtkm::benchmarking;
-namespace vtkm
-{
-namespace benchmarking
+namespace
 {

-template <typename Precision, typename DeviceAdapter>
-struct BenchRayTracing
+// Hold configuration state (e.g. active device)
+vtkm::cont::InitializeResult Config;
+
+void BenchRayTracing(::benchmark::State& state)
 {
-  vtkm::rendering::raytracing::RayTracer Tracer;
-  vtkm::rendering::raytracing::Camera RayCamera;
-  vtkm::cont::ArrayHandle<vtkm::Id4> Indices;
-  vtkm::rendering::raytracing::Ray<Precision> Rays;
-  vtkm::cont::CoordinateSystem Coords;
-  vtkm::cont::DataSet Data;
+  const vtkm::Id3 dims(128, 128, 128);

-  VTKM_CONT ~BenchRayTracing() {}
+  vtkm::cont::testing::MakeTestDataSet maker;
+  auto dataset = maker.Make3DUniformDataSet3(dims);
+  auto coords = dataset.GetCoordinateSystem();

-  VTKM_CONT BenchRayTracing()
+  vtkm::rendering::Camera camera;
+  vtkm::Bounds bounds = dataset.GetCoordinateSystem().GetBounds();
+  camera.ResetToBounds(bounds);
+
+  vtkm::cont::DynamicCellSet cellset = dataset.GetCellSet();
+
+  vtkm::rendering::raytracing::TriangleExtractor triExtractor;
+  triExtractor.ExtractCells(cellset);
+
+  auto triIntersector = std::make_shared<vtkm::rendering::raytracing::TriangleIntersector>(
+    vtkm::rendering::raytracing::TriangleIntersector());
+
+  vtkm::rendering::raytracing::RayTracer tracer;
+  triIntersector->SetData(coords, triExtractor.GetTriangles());
+  tracer.AddShapeIntersector(triIntersector);
+
+  vtkm::rendering::CanvasRayTracer canvas(1920, 1080);
+  vtkm::rendering::raytracing::Camera rayCamera;
+  rayCamera.SetParameters(camera, vtkm::Int32(canvas.GetWidth()), vtkm::Int32(canvas.GetHeight()));
+  vtkm::rendering::raytracing::Ray<vtkm::Float32> rays;
+  rayCamera.CreateRays(rays, coords.GetBounds());
+
+  rays.Buffers.at(0).InitConst(0.f);
+
+  vtkm::cont::Field field = dataset.GetField("pointvar");
+  vtkm::Range range = field.GetRange().ReadPortal().Get(0);
+
+  tracer.SetField(field, range);
+
+  vtkm::cont::ArrayHandle<vtkm::Vec4ui_8> temp;
+  vtkm::cont::ColorTable table("cool to warm");
+  table.Sample(100, temp);
+
+  vtkm::cont::ArrayHandle<vtkm::Vec4f_32> colors;
+  colors.Allocate(100);
+  auto portal = colors.WritePortal();
+  auto colorPortal = temp.ReadPortal();
+  constexpr vtkm::Float32 conversionToFloatSpace = (1.0f / 255.0f);
+  for (vtkm::Id i = 0; i < 100; ++i)
  {
-    vtkm::Id3 dims(128, 128, 128);
-    vtkm::cont::testing::MakeTestDataSet maker;
-    Data = maker.Make3DUniformDataSet3(dims);
-    Coords = Data.GetCoordinateSystem();
-
-    vtkm::rendering::Camera camera;
-    vtkm::Bounds bounds = Data.GetCoordinateSystem().GetBounds();
-    camera.ResetToBounds(bounds);
-
-    vtkm::cont::DynamicCellSet cellset = Data.GetCellSet();
-
-    vtkm::rendering::raytracing::TriangleExtractor triExtractor;
-    triExtractor.ExtractCells(cellset);
-
-    auto triIntersector = std::make_shared<vtkm::rendering::raytracing::TriangleIntersector>(
-      vtkm::rendering::raytracing::TriangleIntersector());
-
-    triIntersector->SetData(Coords, triExtractor.GetTriangles());
-    Tracer.AddShapeIntersector(triIntersector);
-
-    vtkm::rendering::CanvasRayTracer canvas(1920, 1080);
-    RayCamera.SetParameters(camera, canvas);
-    RayCamera.CreateRays(Rays, Coords.GetBounds());
-
-    Rays.Buffers.at(0).InitConst(0.f);
-
-    vtkm::cont::Field field = Data.GetField("pointvar");
-    vtkm::Range range = field.GetRange().GetPortalConstControl().Get(0);
-
-    Tracer.SetField(field, range);
-
-    vtkm::cont::ArrayHandle<vtkm::Vec4ui_8> temp;
-    vtkm::cont::ColorTable table("cool to warm");
-    table.Sample(100, temp);
-
-    vtkm::cont::ArrayHandle<vtkm::Vec4f_32> colors;
-    colors.Allocate(100);
-    auto portal = colors.GetPortalControl();
-    auto colorPortal = temp.GetPortalConstControl();
-    constexpr vtkm::Float32 conversionToFloatSpace = (1.0f / 255.0f);
-    for (vtkm::Id i = 0; i < 100; ++i)
-    {
-      auto color = colorPortal.Get(i);
-      vtkm::Vec4f_32 t(color[0] * conversionToFloatSpace,
-                       color[1] * conversionToFloatSpace,
-                       color[2] * conversionToFloatSpace,
-                       color[3] * conversionToFloatSpace);
-      portal.Set(i, t);
-    }
-
-    Tracer.SetColorMap(colors);
-    Tracer.Render(Rays);
+    auto color = colorPortal.Get(i);
+    vtkm::Vec4f_32 t(color[0] * conversionToFloatSpace,
+                     color[1] * conversionToFloatSpace,
+                     color[2] * conversionToFloatSpace,
+                     color[3] * conversionToFloatSpace);
+    portal.Set(i, t);
  }

-  VTKM_CONT
-  vtkm::Float64 operator()()
+  tracer.SetColorMap(colors);
+  tracer.Render(rays);
+
+  vtkm::cont::Timer timer{ Config.Device };
+  for (auto _ : state)
  {
-    vtkm::cont::Timer timer{ DeviceAdapter() };
+    (void)_;
    timer.Start();
+    rayCamera.CreateRays(rays, coords.GetBounds());
+    tracer.Render(rays);
+    timer.Stop();

-    RayCamera.CreateRays(Rays, Coords.GetBounds());
-    try
-    {
-      Tracer.Render(Rays);
-    }
-    catch (vtkm::cont::ErrorBadValue& e)
-    {
-      std::cout << "exception " << e.what() << "\n";
-    }
-
-    return timer.GetElapsedTime();
+    state.SetIterationTime(timer.GetElapsedTime());
  }
-
-  VTKM_CONT
-  std::string Description() const { return "A ray tracing benchmark"; }
-};
-
-VTKM_MAKE_BENCHMARK(RayTracing, BenchRayTracing);
 }
-} // end namespace vtkm::benchmarking

+VTKM_BENCHMARK(BenchRayTracing);
+
+} // end namespace vtkm::benchmarking

 int main(int argc, char* argv[])
 {
-  auto opts =
-    vtkm::cont::InitializeOptions::DefaultAnyDevice | vtkm::cont::InitializeOptions::Strict;
-  auto config = vtkm::cont::Initialize(argc, argv, opts);
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  VTKM_RUN_BENCHMARK(RayTracing, vtkm::ListTagBase<vtkm::Float32>(), config.Device);
-  return 0;
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }
+
+  // handle benchmarking related args and run benchmarks:
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkTopologyAlgorithms.cxx
+++ b/benchmarking/BenchmarkTopologyAlgorithms.cxx
@ -7,40 +7,37 @@
 //  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================
+
+#include "Benchmarker.h"
+
 #include <vtkm/Math.h>
 #include <vtkm/VectorAnalysis.h>

 #include <vtkm/cont/ArrayHandle.h>
 #include <vtkm/cont/CellSetStructured.h>
+#include <vtkm/cont/Invoker.h>
 #include <vtkm/cont/Timer.h>

-#include <vtkm/worklet/DispatcherMapField.h>
-#include <vtkm/worklet/DispatcherMapTopology.h>
 #include <vtkm/worklet/WorkletMapField.h>
 #include <vtkm/worklet/WorkletMapTopology.h>

-#include "Benchmarker.h"
 #include <vtkm/cont/testing/Testing.h>

 #include <cctype>
 #include <random>
 #include <string>

-namespace vtkm
-{
-namespace benchmarking
+namespace
 {

 #define CUBE_SIZE 256
-static const std::string DIVIDER(40, '-');

-enum BenchmarkName
-{
-  CELL_TO_POINT = 1 << 1,
-  POINT_TO_CELL = 1 << 2,
-  MC_CLASSIFY = 1 << 3,
-  ALL = CELL_TO_POINT | POINT_TO_CELL | MC_CLASSIFY
-};
+using ValueTypes = vtkm::List<vtkm::UInt32, vtkm::Int32, vtkm::Int64, vtkm::Float32, vtkm::Float64>;
+
+using ValueVariantHandle = vtkm::cont::VariantArrayHandleBase<ValueTypes>;
+
+// Hold configuration state (e.g. active device)
+vtkm::cont::InitializeResult Config;

 class AveragePointToCell : public vtkm::worklet::WorkletVisitCellsWithPoints
 {
@ -118,376 +115,289 @@ public:
  }
 };

-struct ValueTypes
-  : vtkm::ListTagBase<vtkm::UInt32, vtkm::Int32, vtkm::Int64, vtkm::Float32, vtkm::Float64>
+template <typename T, typename Enable = void>
+struct NumberGenerator
 {
 };

-/// This class runs a series of micro-benchmarks to measure
-/// performance of different field operations
-class BenchmarkTopologyAlgorithms
+template <typename T>
+struct NumberGenerator<T, typename std::enable_if<std::is_floating_point<T>::value>::type>
 {
-  using StorageTag = vtkm::cont::StorageTagBasic;
-
-  using Timer = vtkm::cont::Timer;
-
-  using ValueVariantHandle = vtkm::cont::VariantArrayHandleBase<ValueTypes>;
-
-private:
-  template <typename T, typename Enable = void>
-  struct NumberGenerator
+  std::mt19937 rng;
+  std::uniform_real_distribution<T> distribution;
+  NumberGenerator(T low, T high)
+    : rng()
+    , distribution(low, high)
  {
-  };
+  }
+  T next() { return distribution(rng); }
+};

-  template <typename T>
-  struct NumberGenerator<T, typename std::enable_if<std::is_floating_point<T>::value>::type>
+template <typename T>
+struct NumberGenerator<T, typename std::enable_if<!std::is_floating_point<T>::value>::type>
+{
+  std::mt19937 rng;
+  std::uniform_int_distribution<T> distribution;
+
+  NumberGenerator(T low, T high)
+    : rng()
+    , distribution(low, high)
  {
-    std::mt19937 rng;
-    std::uniform_real_distribution<T> distribution;
-    NumberGenerator(T low, T high)
-      : rng()
-      , distribution(low, high)
-    {
-    }
-    T next() { return distribution(rng); }
-  };
+  }
+  T next() { return distribution(rng); }
+};

-  template <typename T>
-  struct NumberGenerator<T, typename std::enable_if<!std::is_floating_point<T>::value>::type>
+// Returns an extra random value.
+// Like, an additional random value.
+// Not a random value that's somehow "extra random".
+template <typename ArrayT>
+VTKM_CONT typename ArrayT::ValueType FillRandomValues(ArrayT& array,
+                                                      vtkm::Id size,
+                                                      vtkm::Float64 min,
+                                                      vtkm::Float64 max)
+{
+  using ValueType = typename ArrayT::ValueType;
+
+  NumberGenerator<ValueType> generator{ static_cast<ValueType>(min), static_cast<ValueType>(max) };
+  array.Allocate(size);
+  auto portal = array.WritePortal();
+  for (vtkm::Id i = 0; i < size; ++i)
  {
-    std::mt19937 rng;
-    std::uniform_int_distribution<T> distribution;
+    portal.Set(i, generator.next());
+  }
+  return generator.next();
+}

-    NumberGenerator(T low, T high)
-      : rng()
-      , distribution(low, high)
-    {
-    }
-    T next() { return distribution(rng); }
-  };
+template <typename Value>
+struct BenchCellToPointAvgImpl
+{
+  vtkm::cont::ArrayHandle<Value> Input;

-  template <typename Value, typename DeviceAdapter>
-  struct BenchCellToPointAvg
+  ::benchmark::State& State;
+  vtkm::Id CubeSize;
+  vtkm::Id NumCells;
+
+  vtkm::cont::Timer Timer;
+  vtkm::cont::Invoker Invoker;
+
+  VTKM_CONT
+  BenchCellToPointAvgImpl(::benchmark::State& state)
+    : State{ state }
+    , CubeSize{ CUBE_SIZE }
+    , NumCells{ (this->CubeSize - 1) * (this->CubeSize - 1) * (this->CubeSize - 1) }
+    , Timer{ Config.Device }
+    , Invoker{ Config.Device }
  {
-    std::vector<Value> input;
-    vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
-    std::size_t DomainSize;
+    FillRandomValues(this->Input, this->NumCells, 1., 100.);

-    VTKM_CONT
-    BenchCellToPointAvg()
-    {
-      NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
-      //cube size is points in each dim
-      this->DomainSize = (CUBE_SIZE - 1) * (CUBE_SIZE - 1) * (CUBE_SIZE - 1);
-      this->input.resize(DomainSize);
-      for (std::size_t i = 0; i < DomainSize; ++i)
-      {
-        this->input[i] = generator.next();
-      }
-      this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
+    { // Configure label:
+      std::ostringstream desc;
+      desc << "CubeSize:" << this->CubeSize;
+      this->State.SetLabel(desc.str());
    }
+  }

-    VTKM_CONT
-    vtkm::Float64 operator()()
-    {
-      vtkm::cont::CellSetStructured<3> cellSet;
-      cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
-      vtkm::cont::ArrayHandle<Value, StorageTag> result;
-
-      Timer timer{ DeviceAdapter() };
-      timer.Start();
-
-      vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
-      dispatcher.Invoke(this->InputHandle, cellSet, result);
-
-      return timer.GetElapsedTime();
-    }
-
-    virtual std::string Type() const { return std::string("Static"); }
-
-    VTKM_CONT
-    std::string Description() const
-    {
-
-      std::stringstream description;
-      description << "Computing Cell To Point Average "
-                  << "[" << this->Type() << "] "
-                  << "with a domain size of: " << this->DomainSize;
-      return description.str();
-    }
-  };
-
-  template <typename Value, typename DeviceAdapter>
-  struct BenchCellToPointAvgDynamic : public BenchCellToPointAvg<Value, DeviceAdapter>
+  template <typename BenchArrayType>
+  VTKM_CONT void Run(const BenchArrayType& input)
  {
+    vtkm::cont::CellSetStructured<3> cellSet;
+    cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
+    vtkm::cont::ArrayHandle<Value> result;

-    VTKM_CONT
-    vtkm::Float64 operator()()
+    for (auto _ : this->State)
    {
-      vtkm::cont::CellSetStructured<3> cellSet;
-      cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
+      (void)_;
+      this->Timer.Start();
+      this->Invoker(AverageCellToPoint{}, input, cellSet, result);
+      this->Timer.Stop();

-      ValueVariantHandle dinput(this->InputHandle);
-      vtkm::cont::ArrayHandle<Value, StorageTag> result;
-
-      Timer timer{ DeviceAdapter() };
-      timer.Start();
-
-      vtkm::worklet::DispatcherMapTopology<AverageCellToPoint> dispatcher;
-      dispatcher.Invoke(dinput, cellSet, result);
-
-      return timer.GetElapsedTime();
+      this->State.SetIterationTime(this->Timer.GetElapsedTime());
    }

-    virtual std::string Type() const { return std::string("Dynamic"); }
-  };
-
-  VTKM_MAKE_BENCHMARK(CellToPointAvg, BenchCellToPointAvg);
-  VTKM_MAKE_BENCHMARK(CellToPointAvgDynamic, BenchCellToPointAvgDynamic);
-
-  template <typename Value, typename DeviceAdapter>
-  struct BenchPointToCellAvg
-  {
-    std::vector<Value> input;
-    vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
-    std::size_t DomainSize;
-
-    VTKM_CONT
-    BenchPointToCellAvg()
-    {
-      NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
-
-      this->DomainSize = (CUBE_SIZE) * (CUBE_SIZE) * (CUBE_SIZE);
-      this->input.resize(DomainSize);
-      for (std::size_t i = 0; i < DomainSize; ++i)
-      {
-        this->input[i] = generator.next();
-      }
-      this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
-    }
-
-    VTKM_CONT
-    vtkm::Float64 operator()()
-    {
-      vtkm::cont::CellSetStructured<3> cellSet;
-      cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
-      vtkm::cont::ArrayHandle<Value, StorageTag> result;
-
-      Timer timer{ DeviceAdapter() };
-      timer.Start();
-
-      vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
-      dispatcher.Invoke(this->InputHandle, cellSet, result);
-
-      return timer.GetElapsedTime();
-    }
-
-    virtual std::string Type() const { return std::string("Static"); }
-
-    VTKM_CONT
-    std::string Description() const
-    {
-
-      std::stringstream description;
-      description << "Computing Point To Cell Average "
-                  << "[" << this->Type() << "] "
-                  << "with a domain size of: " << this->DomainSize;
-      return description.str();
-    }
-  };
-
-  template <typename Value, typename DeviceAdapter>
-  struct BenchPointToCellAvgDynamic : public BenchPointToCellAvg<Value, DeviceAdapter>
-  {
-
-    VTKM_CONT
-    vtkm::Float64 operator()()
-    {
-      vtkm::cont::CellSetStructured<3> cellSet;
-      cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
-
-      ValueVariantHandle dinput(this->InputHandle);
-      vtkm::cont::ArrayHandle<Value, StorageTag> result;
-
-      Timer timer{ DeviceAdapter() };
-      timer.Start();
-
-      vtkm::worklet::DispatcherMapTopology<AveragePointToCell> dispatcher;
-      dispatcher.Invoke(dinput, cellSet, result);
-
-      return timer.GetElapsedTime();
-    }
-
-    virtual std::string Type() const { return std::string("Dynamic"); }
-  };
-
-  VTKM_MAKE_BENCHMARK(PointToCellAvg, BenchPointToCellAvg);
-  VTKM_MAKE_BENCHMARK(PointToCellAvgDynamic, BenchPointToCellAvgDynamic);
-
-  template <typename Value, typename DeviceAdapter>
-  struct BenchClassification
-  {
-    std::vector<Value> input;
-    vtkm::cont::ArrayHandle<Value, StorageTag> InputHandle;
-    Value IsoValue;
-    size_t DomainSize;
-
-    VTKM_CONT
-    BenchClassification()
-    {
-      NumberGenerator<Value> generator(static_cast<Value>(1.0), static_cast<Value>(100.0));
-
-      this->DomainSize = (CUBE_SIZE) * (CUBE_SIZE) * (CUBE_SIZE);
-      this->input.resize(DomainSize);
-      for (std::size_t i = 0; i < DomainSize; ++i)
-      {
-        this->input[i] = generator.next();
-      }
-      this->InputHandle = vtkm::cont::make_ArrayHandle(this->input);
-      this->IsoValue = generator.next();
-    }
-
-    VTKM_CONT
-    vtkm::Float64 operator()()
-    {
-      vtkm::cont::CellSetStructured<3> cellSet;
-      cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
-      vtkm::cont::ArrayHandle<vtkm::IdComponent, StorageTag> result;
-
-      ValueVariantHandle dinput(this->InputHandle);
-
-      Timer timer{ DeviceAdapter() };
-      timer.Start();
-
-      Classification<Value> worklet(this->IsoValue);
-      vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
-      dispatcher.Invoke(dinput, cellSet, result);
-
-      return timer.GetElapsedTime();
-    }
-
-    virtual std::string Type() const { return std::string("Static"); }
-
-    VTKM_CONT
-    std::string Description() const
-    {
-
-      std::stringstream description;
-      description << "Computing Marching Cubes Classification "
-                  << "[" << this->Type() << "] "
-                  << "with a domain size of: " << this->DomainSize;
-      return description.str();
-    }
-  };
-
-  template <typename Value, typename DeviceAdapter>
-  struct BenchClassificationDynamic : public BenchClassification<Value, DeviceAdapter>
-  {
-    VTKM_CONT
-    vtkm::Float64 operator()()
-    {
-      vtkm::cont::CellSetStructured<3> cellSet;
-      cellSet.SetPointDimensions(vtkm::Id3(CUBE_SIZE, CUBE_SIZE, CUBE_SIZE));
-      vtkm::cont::ArrayHandle<vtkm::IdComponent, StorageTag> result;
-
-      Timer timer{ DeviceAdapter() };
-      timer.Start();
-
-      Classification<Value> worklet(this->IsoValue);
-      vtkm::worklet::DispatcherMapTopology<Classification<Value>> dispatcher(worklet);
-      dispatcher.Invoke(this->InputHandle, cellSet, result);
-
-      timer.Stop();
-      return timer.GetElapsedTime();
-    }
-
-    virtual std::string Type() const { return std::string("Dynamic"); }
-  };
-
-  VTKM_MAKE_BENCHMARK(Classification, BenchClassification);
-  VTKM_MAKE_BENCHMARK(ClassificationDynamic, BenchClassificationDynamic);
-
-public:
-  static VTKM_CONT int Run(int benchmarks, vtkm::cont::DeviceAdapterId id)
-  {
-    std::cout << DIVIDER << "\nRunning Topology Algorithm benchmarks\n";
-
-    if (benchmarks & CELL_TO_POINT)
-    {
-      std::cout << DIVIDER << "\nBenchmarking Cell To Point Average\n";
-      VTKM_RUN_BENCHMARK(CellToPointAvg, ValueTypes(), id);
-      VTKM_RUN_BENCHMARK(CellToPointAvgDynamic, ValueTypes(), id);
-    }
-
-    if (benchmarks & POINT_TO_CELL)
-    {
-      std::cout << DIVIDER << "\nBenchmarking Point to Cell Average\n";
-      VTKM_RUN_BENCHMARK(PointToCellAvg, ValueTypes(), id);
-      VTKM_RUN_BENCHMARK(PointToCellAvgDynamic, ValueTypes(), id);
-    }
-
-    if (benchmarks & MC_CLASSIFY)
-    {
-      std::cout << DIVIDER << "\nBenchmarking Hex/Voxel MC Classification\n";
-      VTKM_RUN_BENCHMARK(Classification, ValueTypes(), id);
-      VTKM_RUN_BENCHMARK(ClassificationDynamic, ValueTypes(), id);
-    }
-
-    return 0;
+    // #items = #points
+    const int64_t iterations = static_cast<int64_t>(this->State.iterations());
+    this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfPoints()) * iterations);
  }
 };

-#undef ARRAY_SIZE
-}
-} // namespace vtkm::benchmarking
+template <typename ValueType>
+void BenchCellToPointAvgStatic(::benchmark::State& state)
+{
+  BenchCellToPointAvgImpl<ValueType> impl{ state };
+  impl.Run(impl.Input);
+};
+VTKM_BENCHMARK_TEMPLATES(BenchCellToPointAvgStatic, ValueTypes);
+
+template <typename ValueType>
+void BenchCellToPointAvgDynamic(::benchmark::State& state)
+{
+  BenchCellToPointAvgImpl<ValueType> impl{ state };
+  impl.Run(ValueVariantHandle{ impl.Input });
+};
+VTKM_BENCHMARK_TEMPLATES(BenchCellToPointAvgDynamic, ValueTypes);
+
+template <typename Value>
+struct BenchPointToCellAvgImpl
+{
+  vtkm::cont::ArrayHandle<Value> Input;
+
+  ::benchmark::State& State;
+  vtkm::Id CubeSize;
+  vtkm::Id NumPoints;
+
+  vtkm::cont::Timer Timer;
+  vtkm::cont::Invoker Invoker;
+
+  VTKM_CONT
+  BenchPointToCellAvgImpl(::benchmark::State& state)
+    : State{ state }
+    , CubeSize{ CUBE_SIZE }
+    , NumPoints{ (this->CubeSize) * (this->CubeSize) * (this->CubeSize) }
+    , Timer{ Config.Device }
+    , Invoker{ Config.Device }
+  {
+    FillRandomValues(this->Input, this->NumPoints, 1., 100.);
+
+    { // Configure label:
+      std::ostringstream desc;
+      desc << "CubeSize:" << this->CubeSize;
+      this->State.SetLabel(desc.str());
+    }
+  }
+
+  template <typename BenchArrayType>
+  VTKM_CONT void Run(const BenchArrayType& input)
+  {
+    vtkm::cont::CellSetStructured<3> cellSet;
+    cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
+    vtkm::cont::ArrayHandle<Value> result;
+
+    for (auto _ : this->State)
+    {
+      (void)_;
+      this->Timer.Start();
+      this->Invoker(AveragePointToCell{}, input, cellSet, result);
+      this->Timer.Stop();
+
+      this->State.SetIterationTime(this->Timer.GetElapsedTime());
+    }
+
+    // #items = #cells
+    const int64_t iterations = static_cast<int64_t>(this->State.iterations());
+    this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfCells()) * iterations);
+  }
+};
+
+template <typename ValueType>
+void BenchPointToCellAvgStatic(::benchmark::State& state)
+{
+  BenchPointToCellAvgImpl<ValueType> impl{ state };
+  impl.Run(impl.Input);
+};
+VTKM_BENCHMARK_TEMPLATES(BenchPointToCellAvgStatic, ValueTypes);
+
+template <typename ValueType>
+void BenchPointToCellAvgDynamic(::benchmark::State& state)
+{
+  BenchPointToCellAvgImpl<ValueType> impl{ state };
+  impl.Run(ValueVariantHandle{ impl.Input });
+};
+VTKM_BENCHMARK_TEMPLATES(BenchPointToCellAvgDynamic, ValueTypes);
+
+template <typename Value>
+struct BenchClassificationImpl
+{
+  vtkm::cont::ArrayHandle<Value> Input;
+
+  ::benchmark::State& State;
+  vtkm::Id CubeSize;
+  vtkm::Id DomainSize;
+  Value IsoValue;
+
+  vtkm::cont::Timer Timer;
+  vtkm::cont::Invoker Invoker;
+
+  VTKM_CONT
+  BenchClassificationImpl(::benchmark::State& state)
+    : State{ state }
+    , CubeSize{ CUBE_SIZE }
+    , DomainSize{ this->CubeSize * this->CubeSize * this->CubeSize }
+    , Timer{ Config.Device }
+    , Invoker{ Config.Device }
+  {
+    this->IsoValue = FillRandomValues(this->Input, this->DomainSize, 1., 100.);
+
+    { // Configure label:
+      std::ostringstream desc;
+      desc << "CubeSize:" << this->CubeSize;
+      this->State.SetLabel(desc.str());
+    }
+  }
+
+  template <typename BenchArrayType>
+  VTKM_CONT void Run(const BenchArrayType& input)
+  {
+    vtkm::cont::CellSetStructured<3> cellSet;
+    cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize });
+    vtkm::cont::ArrayHandle<vtkm::IdComponent> result;
+
+    Classification<Value> worklet(this->IsoValue);
+
+    for (auto _ : this->State)
+    {
+      (void)_;
+      this->Timer.Start();
+      this->Invoker(worklet, input, cellSet, result);
+      this->Timer.Stop();
+
+      this->State.SetIterationTime(this->Timer.GetElapsedTime());
+    }
+
+    // #items = #cells
+    const int64_t iterations = static_cast<int64_t>(this->State.iterations());
+    this->State.SetItemsProcessed(static_cast<int64_t>(cellSet.GetNumberOfCells()) * iterations);
+  }
+};
+
+template <typename ValueType>
+void BenchClassificationStatic(::benchmark::State& state)
+{
+  BenchClassificationImpl<ValueType> impl{ state };
+  impl.Run(impl.Input);
+};
+VTKM_BENCHMARK_TEMPLATES(BenchClassificationStatic, ValueTypes);
+
+template <typename ValueType>
+void BenchClassificationDynamic(::benchmark::State& state)
+{
+  BenchClassificationImpl<ValueType> impl{ state };
+  impl.Run(ValueVariantHandle{ impl.Input });
+};
+VTKM_BENCHMARK_TEMPLATES(BenchClassificationDynamic, ValueTypes);
+
+} // end anon namespace

 int main(int argc, char* argv[])
 {
-  auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
-  auto config = vtkm::cont::Initialize(argc, argv, opts);
+  // Parse VTK-m options:
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  int benchmarks = 0;
-  if (argc <= 1)
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
  {
-    benchmarks = vtkm::benchmarking::ALL;
+    std::cout << Config.Usage << std::endl;
  }
  else
  {
-    for (int i = 1; i < argc; ++i)
-    {
-      std::string arg = argv[i];
-      std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) {
-        return static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
-      });
-      if (arg == "celltopoint")
-      {
-        benchmarks |= vtkm::benchmarking::CELL_TO_POINT;
-      }
-      else if (arg == "pointtocell")
-      {
-        benchmarks |= vtkm::benchmarking::POINT_TO_CELL;
-      }
-      else if (arg == "classify")
-      {
-        benchmarks |= vtkm::benchmarking::MC_CLASSIFY;
-      }
-      else
-      {
-        std::cerr << "Unrecognized benchmark: " << argv[i] << std::endl;
-        std::cerr << "USAGE: " << argv[0] << " [options] [<benchmarks>]" << std::endl;
-        std::cerr << "Options are: " << std::endl;
-        std::cerr << config.Usage << std::endl;
-        std::cerr << "Benchmarks are one or more of the following:" << std::endl;
-        std::cerr << "  CellToPoint\tFind average of point data on each cell" << std::endl;
-        std::cerr << "  PointToCell\tFind average of cell data on each point" << std::endl;
-        std::cerr << "  Classify\tFind Marching Cube case of each cell" << std::endl;
-        std::cerr << "If no benchmarks are specified, all are run." << std::endl;
-        return 1;
-      }
-    }
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
  }

-  //now actually execute the benchmarks
-
-  return vtkm::benchmarking::BenchmarkTopologyAlgorithms::Run(benchmarks, config.Device);
+  // handle benchmarking related args and run benchmarks:
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/Benchmarker.h
+++ b/benchmarking/Benchmarker.h
@ -11,331 +11,418 @@
 #ifndef vtk_m_benchmarking_Benchmarker_h
 #define vtk_m_benchmarking_Benchmarker_h

-#include <vtkm/ListTag.h>
-#include <vtkm/Math.h>
-#include <vtkm/cont/DeviceAdapterTag.h>
-#include <vtkm/cont/TryExecute.h>
+#include <vtkm/cont/RuntimeDeviceTracker.h>
+#include <vtkm/cont/Timer.h>
+
 #include <vtkm/cont/testing/Testing.h>

-#include <algorithm>
-#include <iostream>
-#include <vector>
+#include <vtkm/internal/brigand.hpp>

-/*
- * Writing a Benchmark
- * -------------------
- * To write a benchmark you must provide a functor that will run the operations
- * you want to time and return the run time of those operations using the timer
- * for the device. The benchmark should also be templated on the value type being
- * operated on. Then use VTKM_MAKE_BENCHMARK to generate a maker functor and
- * VTKM_RUN_BENCHMARK to run the benchmark on a list of types.
- *
- * For Example:
- *
- * template<typename Value>
- * struct BenchSilly {
- *   // Setup anything that doesn't need to change per run in the constructor
- *   VTKM_CONT BenchSilly(){}
- *
- *   // The overloaded call operator will run the operations being timed and
- *   // return the execution time
- *   VTKM_CONT
- *   vtkm::Float64 operator()(){
- *     return 0.05;
- *   }
- *
- *   // The benchmark must also provide a method describing itself, this is
- *   // used when printing out run time statistics
- *   VTKM_CONT
- *   std::string Description() const {
- *     return "A silly benchmark";
- *   }
- * };
- *
- * // Now use the VTKM_MAKE_BENCHMARK macro to generate a maker functor for
- * // your benchmark. This lets us generate the benchmark functor for each type
- * // we want to test
- * VTKM_MAKE_BENCHMARK(Silly, BenchSilly);
- *
- * // You can also optionally pass arguments to the constructor like so:
- * // VTKM_MAKE_BENCHMARK(Blah, BenchBlah, 1, 2, 3);
- * // Note that benchmark names (the first argument) must be unique so different
- * // parameters to the constructor should have different names
- *
- * // We can now run our benchmark using VTKM_RUN_BENCHMARK, passing the
- * // benchmark name and type list to run on
- * int main(int, char**){
- *   VTKM_RUN_BENCHMARK(Silly, vtkm::ListTagBase<vtkm::Float32>());
- *   return 0;
- * }
- *
- * Check out vtkm/benchmarking/BenchmarkDeviceAdapter.h for some example usage
- */
+#include <benchmark/benchmark.h>

-/*
- * Use the VTKM_MAKE_BENCHMARK macro to define a maker functor for your benchmark.
- * This is used to allow you to template the benchmark functor on the type being benchmarked
- * and the device adapter so you can write init code in the constructor. Then the maker will
- * return a constructed instance of your benchmark for the type being benchmarked.
- * The VA_ARGS are used to pass any extra arguments needed by your benchmark
- */
-#define VTKM_MAKE_BENCHMARK(Name, Bench, ...)                                                      \
-  struct MakeBench##Name                                                                           \
-  {                                                                                                \
-    template <typename Value, typename DeviceAdapter>                                              \
-    VTKM_CONT Bench<Value, DeviceAdapter> operator()(const Value vtkmNotUsed(v),                   \
-                                                     DeviceAdapter vtkmNotUsed(id)) const          \
-    {                                                                                              \
-      return Bench<Value, DeviceAdapter>(__VA_ARGS__);                                             \
-    }                                                                                              \
-  }
+#include <ostream>

-/*
- * Use the VTKM_RUN_BENCHMARK macro to run your benchmark on the type list passed.
- * You must have previously defined a maker functor with VTKM_MAKE_BENCHMARK that this
- * macro will look for and use
- */
-#define VTKM_RUN_BENCHMARK(Name, Types, Id)                                                        \
-  vtkm::benchmarking::BenchmarkTypes(MakeBench##Name(), (Types), (Id))
+/// \file Benchmarker.h
+/// \brief Benchmarking utilities
+///
+/// VTK-m's benchmarking framework is built on top of Google Benchmark.
+///
+/// A benchmark is now a single function, which is passed to a macro:
+///
+/// ```
+/// void MyBenchmark(::benchmark::State& state)
+/// {
+///   MyClass someClass;
+///
+///   // Optional: Add a descriptive label with additional benchmark details:
+///   state.SetLabel("Blah blah blah.");
+///
+///   // Must use a vtkm timer to properly capture eg. CUDA execution times.
+///   vtkm::cont::Timer timer;
+///   for (auto _ : state)
+///   {
+///     someClass.Reset();
+///
+///     timer.Start();
+///     someClass.DoWork();
+///     timer.Stop();
+///
+///     state.SetIterationTime(timer.GetElapsedTime());
+///   }
+///
+///   // Optional: Report items and/or bytes processed per iteration in output:
+///   state.SetItemsProcessed(state.iterations() * someClass.GetNumberOfItems());
+///   state.SetBytesProcessed(state.iterations() * someClass.GetNumberOfBytes());
+/// }
+/// }
+/// VTKM_BENCHMARK(MyBenchmark);
+/// ```
+///
+/// Google benchmark also makes it easy to implement parameter sweep benchmarks:
+///
+/// ```
+/// void MyParameterSweep(::benchmark::State& state)
+/// {
+///   // The current value in the sweep:
+///   const vtkm::Id currentValue = state.range(0);
+///
+///   MyClass someClass;
+///   someClass.SetSomeParameter(currentValue);
+///
+///   vtkm::cont::Timer timer;
+///   for (auto _ : state)
+///   {
+///     someClass.Reset();
+///
+///     timer.Start();
+///     someClass.DoWork();
+///     timer.Stop();
+///
+///     state.SetIterationTime(timer.GetElapsedTime());
+///   }
+/// }
+/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("Param")->Range(32, 1024 * 1024));
+/// ```
+///
+/// will generate and launch several benchmarks, exploring the parameter space of
+/// `SetSomeParameter` between the values of 32 and (1024*1024). The chain of
+///   functions calls in the second argument is applied to an instance of
+/// ::benchmark::internal::Benchmark. See Google Benchmark's documentation for
+/// more details.
+///
+/// For more complex benchmark configurations, the VTKM_BENCHMARK_APPLY macro
+///   accepts a function with the signature
+/// `void Func(::benchmark::internal::Benchmark*)` that may be used to generate
+/// more complex configurations.
+///
+/// To instantiate a templated benchmark across a list of types, the
+/// VTKM_BENCHMARK_TEMPLATE* macros take a vtkm::List of types as an additional
+/// parameter. The templated benchmark function will be instantiated and called
+/// for each type in the list:
+///
+/// ```
+/// template <typename T>
+/// void MyBenchmark(::benchmark::State& state)
+/// {
+///   MyClass<T> someClass;
+///
+///   // Must use a vtkm timer to properly capture eg. CUDA execution times.
+///   vtkm::cont::Timer timer;
+///   for (auto _ : state)
+///   {
+///     someClass.Reset();
+///
+///     timer.Start();
+///     someClass.DoWork();
+///     timer.Stop();
+///
+///     state.SetIterationTime(timer.GetElapsedTime());
+///   }
+/// }
+/// }
+/// VTKM_BENCHMARK_TEMPLATE(MyBenchmark, vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
+/// ```
+///
+/// The benchmarks are executed by calling the `VTKM_EXECUTE_BENCHMARKS(argc, argv)`
+/// macro from `main`. There is also a `VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, some_string)`
+/// macro that appends the contents of `some_string` to the Google Benchmark preamble.
+///
+/// If a benchmark is not compatible with some configuration, it may call
+/// `state.SkipWithError("Error message");` on the `::benchmark::State` object and return. This is
+/// useful, for instance in the filter tests when the input is not compatible with the filter.
+///
+/// When launching a benchmark executable, the following options are supported by Google Benchmark:
+///
+/// - `--benchmark_list_tests`: List all available tests.
+/// - `--benchmark_filter="[regex]"`: Only run benchmark with names that match `[regex]`.
+/// - `--benchmark_filter="-[regex]"`: Only run benchmark with names that DON'T match `[regex]`.
+/// - `--benchmark_min_time=[float]`: Make sure each benchmark repetition gathers `[float]` seconds
+///   of data.
+/// - `--benchmark_repetitions=[int]`: Run each benchmark `[int]` times and report aggregate statistics
+///   (mean, stdev, etc). A "repetition" refers to a single execution of the benchmark function, not
+///   an "iteration", which is a loop of the `for(auto _:state){...}` section.
+/// - `--benchmark_report_aggregates_only="true|false"`: If true, only the aggregate statistics are
+///   reported (affects both console and file output). Requires `--benchmark_repetitions` to be useful.
+/// - `--benchmark_display_aggregates_only="true|false"`: If true, only the aggregate statistics are
+///   printed to the terminal. Any file output will still contain all repetition info.
+/// - `--benchmark_format="console|json|csv"`: Specify terminal output format: human readable
+///   (`console`) or `csv`/`json` formats.
+/// - `--benchmark_out_format="console|json|csv"`: Specify file output format: human readable
+///   (`console`) or `csv`/`json` formats.
+/// - `--benchmark_out=[filename]`: Specify output file.
+/// - `--benchmark_color="true|false"`: Toggle color output in terminal when using `console` output.
+/// - `--benchmark_counters_tabular="true|false"`: Print counter information (e.g. bytes/sec, items/sec)
+///   in the table, rather than appending them as a label.
+///
+/// For more information and examples of practical usage, take a look at the existing benchmarks in
+/// vtk-m/benchmarking/.
+
+/// \def VTKM_EXECUTE_BENCHMARKS(argc, argv)
+///
+/// Run the benchmarks defined in the current file. Benchmarks may be filtered
+/// and modified using the passed arguments; see the Google Benchmark documentation
+/// for more details.
+#define VTKM_EXECUTE_BENCHMARKS(argc, argv) vtkm::bench::detail::ExecuteBenchmarks(argc, argv)
+
+/// \def VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble)
+///
+/// Run the benchmarks defined in the current file. Benchmarks may be filtered
+/// and modified using the passed arguments; see the Google Benchmark documentation
+/// for more details. The `preamble` string may be used to supply additional
+/// information that will be appended to the output's preamble.
+#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
+  vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
+
+/// \def VTKM_BENCHMARK(BenchFunc)
+///
+/// Define a simple benchmark. A single benchmark will be generated that executes
+/// `BenchFunc`. `BenchFunc` must have the signature:
+///
+/// ```
+/// void BenchFunc(::benchmark::State& state)
+/// ```
+#define VTKM_BENCHMARK(BenchFunc) \
+  BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)
+
+/// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
+///
+/// Similar to `VTKM_BENCHMARK`, but allows additional options to be specified
+/// on the `::benchmark::internal::Benchmark` object. Example usage:
+///
+/// ```
+/// VTKM_BENCHMARK_OPTS(MyBenchmark, ->ArgName("MyParam")->Range(32, 1024*1024));
+/// ```
+///
+/// Note the similarity to the raw Google Benchmark usage of
+/// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
+/// the Google Benchmark documentation for more details on the available options.
+#define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
+  BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options
+
+/// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
+///
+/// Similar to `VTKM_BENCHMARK`, but allows advanced benchmark configuration
+/// via a supplied ConfigFunc, similar to Google Benchmark's
+/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
+/// signature:
+///
+/// ```
+/// void ConfigFunc(::benchmark::internal::Benchmark*);
+/// ```
+///
+/// See the Google Benchmark documentation for more details on the available options.
+#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
+  BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)
+
+/// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
+///
+/// Define a family of benchmark that vary by template argument. A single
+/// benchmark will be generated for each type in `TypeList` (a vtkm::List of
+/// types) that executes `BenchFunc<T>`. `BenchFunc` must have the signature:
+///
+/// ```
+/// template <typename T>
+/// void BenchFunc(::benchmark::State& state)
+/// ```
+#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
+  VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
+
+/// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
+///
+/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows additional options to be specified
+/// on the `::benchmark::internal::Benchmark` object. Example usage:
+///
+/// ```
+/// VTKM_BENCHMARK_TEMPLATES_OPTS(MyBenchmark,
+///                                ->ArgName("MyParam")->Range(32, 1024*1024),
+///                              vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
+/// ```
+#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList)                          \
+  VTKM_BENCHMARK_TEMPLATES_APPLY(                                                            \
+    BenchFunc,                                                                               \
+    [](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
+    TypeList)
+
+/// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
+///
+/// Similar to `VTKM_BENCHMARK_TEMPLATES`, but allows advanced benchmark configuration
+/// via a supplied ConfigFunc, similar to Google Benchmark's
+/// `BENCHMARK(BenchFunc)->Apply(ConfigFunc)`. `ConfigFunc` must have the
+/// signature:
+///
+/// ```
+/// void ConfigFunc(::benchmark::internal::Benchmark*);
+/// ```
+///
+/// See the Google Benchmark documentation for more details on the available options.
+#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList)                            \
+  namespace                                                                                          \
+  { /* A template function cannot be used as a template parameter, so wrap the function with       \
+     * a template struct to get it into the GenerateTemplateBenchmarks class. */ \
+  template <typename... Ts>                                                                          \
+  struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)                                                      \
+  {                                                                                                  \
+    static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; }               \
+  };                                                                                                 \
+  } /* end anon namespace */                                                                         \
+  int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks<           \
+    brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>,                                           \
+    TypeList>::Register(#BenchFunc, ApplyFunctor)
+
+// Internal use only:
+#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
+  BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)

 namespace vtkm
 {
-namespace benchmarking
+namespace bench
 {
-namespace stats
+namespace detail
 {
-// Checks that the sequence is sorted, returns true if it's sorted, false
-// otherwise
-template <typename ForwardIt>
-bool is_sorted(ForwardIt first, ForwardIt last)
+
+static inline void NullApply(::benchmark::internal::Benchmark*) {}
+
+/// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
+/// instead.
+// TypeLists could be expanded to compute cross products if we ever have that
+// need.
+template <typename BoundBench, typename TypeLists>
+struct GenerateTemplateBenchmarks;
+
+template <template <typename...> class BenchType, typename TypeList>
+struct GenerateTemplateBenchmarks<brigand::bind<BenchType>, TypeList>
 {
-  ForwardIt next = first;
-  ++next;
-  for (; next != last; ++next, ++first)
+private:
+  template <typename T>
+  using MakeBenchType = BenchType<T>;
+
+  using Benchmarks = brigand::transform<TypeList, brigand::bind<MakeBenchType, brigand::_1>>;
+
+  template <typename ApplyFunctor>
+  struct RegisterImpl
  {
-    if (*first > *next)
+    std::string BenchName;
+    ApplyFunctor Apply;
+
+    template <typename P>
+    void operator()(brigand::type_<BenchType<P>>) const
+    {
+      std::ostringstream name;
+      name << this->BenchName << "<" << vtkm::testing::TypeName<P>::Name() << ">";
+      auto bm = ::benchmark::internal::RegisterBenchmarkInternal(
+        new ::benchmark::internal::FunctionBenchmark(name.str().c_str(),
+                                                     BenchType<P>::GetFunction()));
+      this->Apply(bm);
+
+      // Always use manual time with vtkm::cont::Timer to capture CUDA times accurately.
+      bm->UseManualTime()->Unit(benchmark::kMillisecond);
+    }
+  };
+
+public:
+  template <typename ApplyFunctor>
+  static int Register(const std::string& benchName, ApplyFunctor&& apply)
+  {
+    brigand::for_each<Benchmarks>(
+      RegisterImpl<ApplyFunctor>{ benchName, std::forward<ApplyFunctor>(apply) });
+    return 0;
+  }
+};
+
+class VTKmConsoleReporter : public ::benchmark::ConsoleReporter
+{
+  std::string UserPreamble;
+
+public:
+  VTKmConsoleReporter() = default;
+
+  explicit VTKmConsoleReporter(const std::string& preamble)
+    : UserPreamble{ preamble }
+  {
+  }
+
+  bool ReportContext(const Context& context) override
+  {
+    if (!::benchmark::ConsoleReporter::ReportContext(context))
    {
      return false;
    }
-  }
-  return true;
-}

-// Get the value representing the `percent` percentile of the
-// sorted samples using linear interpolation
-vtkm::Float64 PercentileValue(const std::vector<vtkm::Float64>& samples,
-                              const vtkm::Float64 percent)
-{
-  VTKM_ASSERT(!samples.empty());
-  if (samples.size() == 1)
-  {
-    return samples.front();
-  }
-  VTKM_ASSERT(percent >= 0.0);
-  VTKM_ASSERT(percent <= 100.0);
-  VTKM_ASSERT(vtkm::benchmarking::stats::is_sorted(samples.begin(), samples.end()));
-  if (percent == 100.0)
-  {
-    return samples.back();
-  }
-  // Find the two nearest percentile values and linearly
-  // interpolate between them
-  const vtkm::Float64 rank = percent / 100.0 * (static_cast<vtkm::Float64>(samples.size()) - 1.0);
-  const vtkm::Float64 low_rank = vtkm::Floor(rank);
-  const vtkm::Float64 dist = rank - low_rank;
-  const size_t k = static_cast<size_t>(low_rank);
-  const vtkm::Float64 low = samples[k];
-  const vtkm::Float64 high = samples[k + 1];
-  return low + (high - low) * dist;
-}
-// Winsorize the samples to clean up any very extreme outliers
-// Will replace all samples below `percent` and above 100 - `percent` percentiles
-// with the value at the percentile
-// NOTE: Assumes the samples have been sorted, as we make use of PercentileValue
-void Winsorize(std::vector<vtkm::Float64>& samples, const vtkm::Float64 percent)
-{
-  const vtkm::Float64 low_percentile = PercentileValue(samples, percent);
-  const vtkm::Float64 high_percentile = PercentileValue(samples, 100.0 - percent);
-  for (std::vector<vtkm::Float64>::iterator it = samples.begin(); it != samples.end(); ++it)
-  {
-    if (*it < low_percentile)
+    // The rest of the preamble is printed to the error stream, so be consistent:
+    auto& out = this->GetErrorStream();
+
+    // Print list of devices:
+    out << "VTK-m Device State:\n";
+    vtkm::cont::GetRuntimeDeviceTracker().PrintSummary(out);
+    if (!this->UserPreamble.empty())
    {
-      *it = low_percentile;
+      out << this->UserPreamble << "\n";
    }
-    else if (*it > high_percentile)
-    {
-      *it = high_percentile;
-    }
-  }
-}
-// Compute the mean value of the dataset
-vtkm::Float64 Mean(const std::vector<vtkm::Float64>& samples)
-{
-  vtkm::Float64 mean = 0;
-  for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
-  {
-    mean += *it;
-  }
-  return mean / static_cast<vtkm::Float64>(samples.size());
-}
-// Compute the sample variance of the samples
-vtkm::Float64 Variance(const std::vector<vtkm::Float64>& samples)
-{
-  vtkm::Float64 mean = Mean(samples);
-  vtkm::Float64 square_deviations = 0;
-  for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
-  {
-    square_deviations += vtkm::Pow(*it - mean, 2.0);
-  }
-  return square_deviations / (static_cast<vtkm::Float64>(samples.size()) - 1.0);
-}
-// Compute the standard deviation of the samples
-vtkm::Float64 StandardDeviation(const std::vector<vtkm::Float64>& samples)
-{
-  return vtkm::Sqrt(Variance(samples));
-}
-// Compute the median absolute deviation of the dataset
-vtkm::Float64 MedianAbsDeviation(const std::vector<vtkm::Float64>& samples)
-{
-  std::vector<vtkm::Float64> abs_deviations;
-  abs_deviations.reserve(samples.size());
-  const vtkm::Float64 median = PercentileValue(samples, 50.0);
-  for (std::vector<vtkm::Float64>::const_iterator it = samples.begin(); it != samples.end(); ++it)
-  {
-    abs_deviations.push_back(vtkm::Abs(*it - median));
-  }
-  std::sort(abs_deviations.begin(), abs_deviations.end());
-  return PercentileValue(abs_deviations, 50.0);
-}
-} // stats
+    out.flush();

-/*
- * The benchmarker takes a functor to benchmark and runs it multiple times,
- * printing out statistics of the run time at the end.
- * The functor passed should return the run time of the thing being benchmarked
- * in seconds, this lets us avoid including any per-run setup time in the benchmark.
- * However any one-time setup should be done in the functor's constructor
- */
-struct Benchmarker
-{
-  std::vector<vtkm::Float64> Samples;
-  std::string BenchmarkName;
-
-  const vtkm::Float64 MaxRuntime;
-  const size_t MaxIterations;
-
-public:
-  VTKM_CONT
-  Benchmarker(vtkm::Float64 maxRuntime = 30, std::size_t maxIterations = 100)
-    : MaxRuntime(maxRuntime)
-    , MaxIterations(maxIterations)
-  {
-  }
-
-  template <typename Functor>
-  VTKM_CONT void GatherSamples(Functor func)
-  {
-    this->Samples.clear();
-    this->BenchmarkName = func.Description();
-
-    // Do a warm-up run. If the benchmark allocates any additional memory
-    // eg. storage for output results, this will let it do that and
-    // allow us to avoid measuring the allocation time in the actual benchmark run
-    func();
-
-    this->Samples.reserve(this->MaxIterations);
-
-    // Run each benchmark for MAX_RUNTIME seconds or MAX_ITERATIONS iterations, whichever
-    // takes less time. This kind of assumes that running for 500 iterations or 30s will give
-    // good statistics, but if median abs dev and/or std dev are too high both these limits
-    // could be increased
-    size_t iter = 0;
-    for (vtkm::Float64 elapsed = 0.0; elapsed < this->MaxRuntime && iter < this->MaxIterations;
-         elapsed += this->Samples.back(), ++iter)
-    {
-      this->Samples.push_back(func());
-    }
-
-    std::sort(this->Samples.begin(), this->Samples.end());
-    stats::Winsorize(this->Samples, 5.0);
-  }
-
-  VTKM_CONT void PrintSummary(std::ostream& out = std::cout)
-  {
-    out << "Benchmark \'" << this->BenchmarkName << "\' results:\n";
-
-    if (this->Samples.empty())
-    {
-      out << "\tNo samples gathered!\n";
-      return;
-    }
-
-    out << "\tnumSamples = " << this->Samples.size() << "\n"
-        << "\tmedian = " << stats::PercentileValue(this->Samples, 50.0) << "s\n"
-        << "\tmedian abs dev = " << stats::MedianAbsDeviation(this->Samples) << "s\n"
-        << "\tmean = " << stats::Mean(this->Samples) << "s\n"
-        << "\tstd dev = " << stats::StandardDeviation(this->Samples) << "s\n"
-        << "\tmin = " << this->Samples.front() << "s\n"
-        << "\tmax = " << this->Samples.back() << "s\n";
-  }
-
-  template <typename DeviceAdapter, typename MakerFunctor, typename T>
-  VTKM_CONT bool operator()(DeviceAdapter id, MakerFunctor&& makerFunctor, T t)
-  {
-    auto func = makerFunctor(t, id);
-    std::cout << "Running '" << func.Description() << "'" << std::endl;
-    this->GatherSamples(func);
-    this->PrintSummary();
    return true;
  }
-
-  VTKM_CONT const std::vector<vtkm::Float64>& GetSamples() const { return this->Samples; }
-
-  VTKM_CONT void Reset()
-  {
-    this->Samples.clear();
-    this->BenchmarkName.clear();
-  }
 };

-template <typename MakerFunctor>
-class InternalPrintTypeAndBench
+// Returns the number of executed benchmarks:
+static inline vtkm::Id ExecuteBenchmarks(int& argc,
+                                         char* argv[],
+                                         const std::string& preamble = std::string{})
 {
-  MakerFunctor Maker;
-
-public:
-  VTKM_CONT
-  InternalPrintTypeAndBench(MakerFunctor maker)
-    : Maker(maker)
+  ::benchmark::Initialize(&argc, argv);
+  if (::benchmark::ReportUnrecognizedArguments(argc, argv))
  {
+    return 1;
  }

-  template <typename T>
-  VTKM_CONT void operator()(T t, vtkm::cont::DeviceAdapterId id) const
+  VTKmConsoleReporter reporter{ preamble };
+
+  vtkm::cont::Timer timer;
+  timer.Start();
+  std::size_t num = ::benchmark::RunSpecifiedBenchmarks(&reporter);
+  timer.Stop();
+
+  reporter.GetOutputStream().flush();
+  reporter.GetErrorStream().flush();
+
+  reporter.GetErrorStream() << "Ran " << num << " benchmarks in " << timer.GetElapsedTime()
+                            << " seconds." << std::endl;
+
+  return static_cast<vtkm::Id>(num);
+}
+
+void InitializeArgs(int* argc, std::vector<char*>& args, vtkm::cont::InitializeOptions& opts)
+{
+  bool isHelp = false;
+
+  // Inject --help
+  if (*argc == 1)
  {
-    std::cout << "*** " << vtkm::testing::TypeName<T>::Name() << " on device " << id.GetName()
-              << " ***************" << std::endl;
-    Benchmarker bench;
-    try
+    const char* help = "--help"; // We want it to be static
+    args.push_back(const_cast<char*>(help));
+    *argc = *argc + 1;
+  }
+
+  args.push_back(nullptr);
+
+  for (size_t i = 0; i < static_cast<size_t>(*argc); ++i)
+  {
+    auto opt_s = std::string(args[i]);
+    if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h")
    {
-      vtkm::cont::TryExecuteOnDevice(id, bench, Maker, t);
-    }
-    catch (std::exception& e)
-    {
-      std::cout << "\n"
-                << "An exception occurring during a benchmark:\n\t" << e.what() << "\n"
-                << "Attempting to continue with remaining benchmarks...\n\n";
+      isHelp = true;
    }
  }
-};

-template <class MakerFunctor, class TypeList>
-VTKM_CONT void BenchmarkTypes(MakerFunctor&& maker, TypeList, vtkm::cont::DeviceAdapterId id)
-{
-  vtkm::ListForEach(
-    InternalPrintTypeAndBench<MakerFunctor>(std::forward<MakerFunctor>(maker)), TypeList(), id);
+  if (!isHelp)
+  {
+    return;
+  }
+
+  opts = vtkm::cont::InitializeOptions::None;
 }
 }
 }
+} // end namespace vtkm::bench::detail

 #endif
--- a/benchmarking/CMakeLists.txt
+++ b/benchmarking/CMakeLists.txt
@ -7,18 +7,24 @@
 ##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ##  PURPOSE.  See the above copyright notice for more information.
 ##============================================================================
+
+# Find Google Benchmark. Note that benchmark_DIR must be pointed at an
+# installation, not a build directory.
+find_package(benchmark REQUIRED)
+
 function(add_benchmark)
  set(options)
  set(oneValueArgs NAME FILE)
  set(multiValueArgs LIBS)
  cmake_parse_arguments(VTKm_AB
-    "${options}" "${oneValueArgs}" "${multiValueArgs}"
-    ${ARGN}
-    )
+          "${options}" "${oneValueArgs}" "${multiValueArgs}"
+          ${ARGN}
+          )
  set(exe_name ${VTKm_AB_NAME})

  add_executable(${exe_name} ${VTKm_AB_FILE})
  target_link_libraries(${exe_name} PRIVATE ${VTKm_AB_LIBS})
+  target_link_libraries(${exe_name} PRIVATE benchmark::benchmark)
  vtkm_add_drop_unused_function_flags(${exe_name})
  vtkm_add_target_information(${exe_name})

@ -38,13 +44,21 @@ set(benchmarks
  BenchmarkDeviceAdapter
  BenchmarkFieldAlgorithms
  BenchmarkFilters
+  BenchmarkODEIntegrators
  BenchmarkTopologyAlgorithms
  )

+set(VTKm_BENCHS_RANGE_LOWER_BOUNDARY 4096 CACHE STRING "Smallest sample for input size bench for BenchmarkDeviceAdapter")
+set(VTKm_BENCHS_RANGE_UPPER_BOUNDARY 134217728 CACHE STRING "Biggest sample for input size bench for BenchmarkDeviceAdapter")
+mark_as_advanced(VTKm_BENCHS_RANGE_LOWER_BOUNDARY VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
+
 foreach (benchmark ${benchmarks})
-  add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_source vtkm_filter)
+  add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_source vtkm_filter vtkm_io)
 endforeach ()

+target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_LOWER_BOUNDARY=${VTKm_BENCHS_RANGE_LOWER_BOUNDARY})
+target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_UPPER_BOUNDARY=${VTKm_BENCHS_RANGE_UPPER_BOUNDARY})
+
 if(TARGET vtkm_rendering)
  add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering)
 endif()
--- a/benchmarking/README.md
+++ b/benchmarking/README.md
@ -0,0 +1,120 @@
+# BENCHMARKING VTK-m
+
+## TL;DR
+
+When configuring _VTM-m_ with _CMake_ pass the flag `-DVTKm_ENABLE_BENCHMARKS=1`
+. In the build directory you will see the following binaries:
+
+    $ ls bin/Benchmark*
+    bin/BenchmarkArrayTransfer*  bin/BenchmarkCopySpeeds* bin/BenchmarkFieldAlgorithms*
+    bin/BenchmarkRayTracing* bin/BenchmarkAtomicArray*    bin/BenchmarkDeviceAdapter*
+    bin/BenchmarkFilters* bin/BenchmarkTopologyAlgorithms*
+
+Taking as an example `BenchmarkArrayTransfer`, we can run it as:
+
+    $ bin/BenchmarkArrayTransfer -d Any
+
+---
+
+## Parts of this Documents
+
+0. [TL;DR](#TL;DR)
+1. [Devices](#choosing-devices)
+2. [Filters](#run-a-subset-of-your-benchmarks)
+4. [Compare with baseline](#compare-with-baseline)
+5. [Installing compare.py](#installing-compare-benchmarkspy)
+
+---
+
+## Choosing devices
+
+Taking as an example `BenchmarkArrayTransfer`, we can determine in which
+device we can run it by simply:
+
+    $ bin/BenchmarkArrayTransfer
+    ...
+    Valid devices: "Any" "Serial"
+    ...
+
+Upon the _Valid devices_ you can chose in which device to run the benchmark by:
+
+    $ bin/BenchmarkArrayTransfer -d Serial
+
+
+## Run a subset of your benchmarks
+
+_VTK-m_ benchmarks uses [Google Benchmarks] which allows you to choose a subset
+of benchmaks by using the flag `--benchmark_filter=REGEX`
+
+For instance, if you want to run all the benchmarks that writes something you
+would run:
+
+    $ bin/BenchmarkArrayTransfer -d Serial --benchmark_filter='Write'
+
+Note you can list all of the available benchmarks with the option:
+`--benchmark_list_tests`.
+
+## Compare with baseline
+
+_VTM-m_ ships with a helper script based in [Google Benchmarks] `compare.py`
+named `compare-benchmarks.py` which lets you compare benchmarks using different
+devices, filters, and binaries. After building `VTM-m` it must appear on the 
+`bin` directory within your `build` directory.
+
+When running `compare-benchmarks.py`:
+ - You can specify the baseline benchmark binary path and its arguments in 
+   `--benchmark1=`
+ - The contender benchmark binary path and its arguments in `--benchmark2=`
+ - Extra options to be passed to `compare.py` must come after `--`
+
+### Compare between filters
+
+When comparing filters, we only can use one benchmark binary with a single device
+as shown in the following example:
+
+```sh
+$ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Any
+--benchmark_filter=1024' --filter1='Read' --filter2=Write -- filters
+
+# It will output something like this:
+
+Benchmark                                                                          Time             CPU      Time Old      Time New       CPU Old       CPU New
+---------------------------------------------------------------------------------------------------------------------------------------------------------------
+BenchContToExec[Read vs. Write]<F32>/Bytes:1024/manual_time                     +0.2694         +0.2655         18521         23511         18766         23749
+BenchExecToCont[Read vs. Write]<F32>/Bytes:1024/manual_time                     +0.0212         +0.0209         25910         26460         26152         26698
+```
+
+### Compare between devices
+
+When comparing two benchmarks using two devices use the _option_ `benchmark`
+after `--` and call `./compare-benchmarks.py` as follows:
+
+```sh
+$ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Serial
+--benchmark_filter=1024' --benchmark2='./BenchmarkArrayTransfer -d Cuda
+--benchmark_filter=1024' -- benchmarks
+
+
+# It will output something like this:
+
+Benchmark                                                              Time             CPU      Time Old      Time New       CPU Old       CPU New
+---------------------------------------------------------------------------------------------------------------------------------------------------
+BenchContToExecRead<F32>/Bytes:1024/manual_time                     +0.0127         +0.0120         18388         18622         18632         18856
+BenchContToExecWrite<F32>/Bytes:1024/manual_time                    +0.0010         +0.0006         23471         23496         23712         23726
+BenchContToExecReadWrite<F32>/Bytes:1024/manual_time                -0.0034         -0.0041         26363         26274         26611         26502
+BenchRoundTripRead<F32>/Bytes:1024/manual_time                      +0.0055         +0.0056         20635         20748         21172         21291
+BenchRoundTripReadWrite<F32>/Bytes:1024/manual_time                 +0.0084         +0.0082         29288         29535         29662         29905
+BenchExecToContRead<F32>/Bytes:1024/manual_time                     +0.0025         +0.0021         25883         25947         26122         26178
+BenchExecToContWrite<F32>/Bytes:1024/manual_time                    -0.0027         -0.0038         26375         26305         26622         26522
+BenchExecToContReadWrite<F32>/Bytes:1024/manual_time                +0.0041         +0.0039         25639         25745         25871         25972
+```
+
+## Installing compare-benchmarks.py
+
+`compare-benchmarks.py` relies on `compare.py` from Google Benchmarks which also
+relies in `SciPy`, you can find instructions [here][SciPy] regarding its
+installation.
+
+[Google Benchmarks]: https://github.com/google/benchmark
+[Compare.py]:        https://github.com/google/benchmark/blob/master/tools/compare.py
+[SciPy]:             https://www.scipy.org/install.html
--- a/data/README.md
+++ b/data/README.md
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30a14a308f64c6fc2969e2b959d79dacdc5affda1d1c0e24f8e176304147146
+size 643
--- a/data/baseline/5x6_7_MC_Rank0_Block0_Round1_CombinedMesh.ctm
+++ b/data/baseline/5x6_7_MC_Rank0_Block0_Round1_CombinedMesh.ctm
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5706bddc644b5b120ffbd424b3073ce989735272726de711ca8dac19b4a30ee1
+size 2653
--- a/data/baseline/contour-tangle.png
+++ b/data/baseline/contour-tangle.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:785051d9773c4a0ced2701de3499f9cd948da2a4c846a5187e30dfb5cb0783cb
+size 10830
--- a/data/baseline/contour-uniform.png
+++ b/data/baseline/contour-uniform.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d990b5f0e9ef27e4e5f87f4c62c4f9974992506521f32bd5901ac6670e71bfa
+size 9656
--- a/data/baseline/contour-wedge.png
+++ b/data/baseline/contour-wedge.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54e09a09c97a20627e54c835d2d488bc9f692ef1315122ab60241c006ab78813
+size 19742
--- a/data/baseline/point-transform.png
+++ b/data/baseline/point-transform.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1472e6002ca4ad4012e0c9f067f8254290fabe93c82713a4994ad97a7fdbdfc
+size 31218
--- a/data/baseline/split-sharp-edges.png
+++ b/data/baseline/split-sharp-edges.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ff6d72bd325ffe0fb3b22bfdc294b6d674384afd662290424bb77634202b4ef
+size 71150
--- a/data/baseline/streamline.png
+++ b/data/baseline/streamline.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24c71e8846fe62e6f6eefdb72c9729639061af80bf9d3453d35c8c6838de9174
+size 37162
--- a/Show More
+++ b/Show More