Merge branch 'master' into add/hyperstructstats

2024-09-16 17:22:55 +00:00 · 2020-05-28 00:38:29 -07:00 · 2020-05-28 00:38:29 -07:00 · 8a27ff5109
commit 8a27ff5109
parent 43e470de72 cdc1b7c256
598 changed files with 18937 additions and 10028 deletions
--- a/.github/issue_template.md
+++ b/.github/issue_template.md
@ -1,2 +1,2 @@
 Thanks for trying to contribute to VTK-m.  The GitHub repository is a mirror provided
-for convenience, as VTK-m uses https://gitlab.kitware.com/vtk/vtk-m/issues for issue tracking. 
+for convenience, as VTK-m uses https://gitlab.kitware.com/vtk/vtk-m/-/issues for issue tracking. 
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@ -2,4 +2,4 @@ Thanks for trying to contribute to VTK-m.  The GitHub repository
 is a mirror provided for convenience, as VTK-m uses gitlab.kitware.com
 for all pull requests. Our contribution instructions are located at:

-  https://gitlab.kitware.com/vtk/vtk-m/tree/master/CONTRIBUTING.md
+  https://gitlab.kitware.com/vtk/vtk-m/-/tree/master/CONTRIBUTING.md
--- a/.gitlab-ci-ecp.yml
+++ b/.gitlab-ci-ecp.yml
@ -0,0 +1,125 @@
+
+
+.slurm_p9_cuda: &slurm_p9_cuda
+    tags:
+        - nmc
+        - slurm
+        - nmc-xxfe1-sched-001
+        - xx-fe1
+    variables:
+        NMC_FE1_SLURM_PARAMETERS: " -N1 -p ecp-p9-4v100 --extra-node-info=*:*:* -t 1:30:00 "
+        CC: "gcc"
+        CXX: "g++"
+        CUDAHOSTCXX: "g++"
+    before_script:
+        # We need gcc-4.8.5, which is the system default compiler but not a compiler
+        # listed under the module system.
+        #
+        # That means to get this to work properly we explicitly do not request
+        # any compiler.
+        - module load cuda cmake/3.14.5
+
+
+.slurm_p9_opemp: &slurm_p9_opemp
+    tags:
+        - nmc
+        - slurm
+        - nmc-xxfe1-sched-001
+        - xx-fe1
+    variables:
+        NMC_FE1_SLURM_PARAMETERS: " -N1 -p ecp-p9-4v100 --extra-node-info=*:*:* -t 1:30:00 "
+    before_script:
+        - module load gcc/8.3.0 openmpi/3.1.4 cmake/3.14.5
+
+.cmake_build_artifacts: &cmake_build_artifacts
+    artifacts:
+        expire_in: 24 hours
+        when: always
+        paths:
+            # The artifacts of the build.
+            - vtkm-build/bin/
+            - vtkm-build/include/
+
+            # CTest files.
+            # XXX(globbing): Can be simplified with support from
+            # https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
+            - vtkm-build/CTestCustom*.cmake
+            - vtkm-build/CTestTestfile.cmake
+            - vtkm-build/*/CTestTestfile.cmake
+            - vtkm-build/*/*/CTestTestfile.cmake
+            - vtkm-build/*/*/*/CTestTestfile.cmake
+            - vtkm-build/*/*/*/*/CTestTestfile.cmake
+            - vtkm-build/*/*/*/*/*/CTestTestfile.cmake
+            - vtkm-build/Testing/
+
+            # CDash files.
+            - vtkm-build/DartConfiguration.tcl
+
+.cmake_build_p9_cuda: &cmake_build_p9_cuda
+    stage: build
+    script:
+        - srun env | grep SLURM_JOB_NAME
+        - mkdir vtkm-build
+        - pushd vtkm-build
+        - cmake -DCMAKE_BUILD_TYPE=Release -DVTKm_ENABLE_CUDA=ON -S ../
+        - cmake --build . -j20
+        - popd
+
+.cmake_build_p9_openmp: &cmake_build_p9_openmp
+    stage: build
+    script:
+        - srun env | grep SLURM_JOB_NAME
+        - mkdir vtkm-build
+        - pushd vtkm-build
+        - cmake -DCMAKE_BUILD_TYPE=Release -DVTKm_ENABLE_OPENMP=ON -S ../
+        - cmake --build . -j20
+        - popd
+
+
+
+.cmake_test_p9: &cmake_test_p9
+    stage: test
+    script:
+        - echo "running the test using artifacts of the build"
+        - pushd vtkm-build
+        # We need to exclude the following tests
+        #   - CopyrightStatement
+        #   - TestInstallSetup
+        #   - SourceInInstall
+        # Which we can do by using an exclude regex
+        - ctest -E "Install|CopyrightStatement"
+        - popd
+
+stages:
+    - build
+    - test
+
+build:p9_openmp:
+    extends:
+        - .slurm_p9_opemp
+        - .cmake_build_artifacts
+        - .cmake_build_p9_openmp
+
+test:p9_openmp:
+    extends:
+        - .slurm_p9_opemp
+        - .cmake_test_p9
+    dependencies:
+        - build:p9_openmp
+    needs:
+        - build:p9_openmp
+
+build:p9_cuda:
+    extends:
+        - .slurm_p9_cuda
+        - .cmake_build_artifacts
+        - .cmake_build_p9_cuda
+
+test:p9_cuda:
+    extends:
+        - .slurm_p9_cuda
+        - .cmake_test_p9
+    dependencies:
+        - build:p9_cuda
+    needs:
+        - build:p9_cuda
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -0,0 +1,171 @@
+
+# Docker Images:
+#
+#   * .gitlab/ci/docker/centos7/cuda10.2/
+#     - cuda
+#     - gcc 4.8.5
+#   * .gitlab/ci/docker/centos8/base/
+#     - gcc 8.3.1
+#     - clang 8.0.1
+#     - openmp
+#     - asan, ubsan
+#   * .gitlab/ci/docker/rhel8/cuda10.2/
+#     - cuda
+#     - gcc 8.2.1
+#   * .gitlab/ci/docker/ubuntu1604/base/
+#     - gcc 4.8
+#     - clang 3.8
+#     - clang 5.0
+#     - tbb
+#     - openmpi
+#   * .gitlab/ci/docker/ubuntu1604/cuda9.2/
+#     - cuda
+#     - gcc 5.4
+#     - tbb
+#     - openmp
+#     - openmpi
+#   * .gitlab/ci/docker/ubuntu1804/base/
+#     - gcc 6.5
+#     - gcc 7.4
+#     - gcc 9
+#     - clang 8
+#     - tbb
+#     - openmp
+#     - mpich2
+#   * .gitlab/ci/docker/ubuntu1804/cuda10.1/
+#     - cuda
+#     - gcc 7.4
+#     - tbb
+#     - openmp
+#     - mpich2
+
+.docker_image: &docker_image
+  variables:
+    GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci
+
+.centos7: &centos7
+  image: "kitware/vtkm:ci-centos7_cuda10.2-20200410"
+  extends:
+    - .docker_image
+
+.centos8: &centos8
+  image: "kitware/vtkm:ci-centos8-20200410"
+  extends:
+    - .docker_image
+
+.rhel8: &rhel8
+  image: "kitware/vtkm:ci-rhel8_cuda10.2-20200410"
+  extends:
+    - .docker_image
+
+.ubuntu1604: &ubuntu1604
+  image: "kitware/vtkm:ci-ubuntu1604-20200410"
+  extends:
+    - .docker_image
+
+.ubuntu1604_cuda: &ubuntu1604_cuda
+  image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200410"
+  extends:
+    - .docker_image
+
+.ubuntu1804: &ubuntu1804
+  image: "kitware/vtkm:ci-ubuntu1804-20200410"
+  extends:
+    - .docker_image
+
+.ubuntu1804_cuda: &ubuntu1804_cuda
+  image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200410"
+  extends:
+    - .docker_image
+
+.only-default: &only-default
+  only:
+    - master
+    - merge_requests
+    - tags
+
+.only-master: &only-master
+  only:
+    - master
+
+# General Longer Term Tasks:
+# - setup asan, and ubsan as sub-pipeline
+# - setup clang tidy as sub-pipeline
+#
+# Current Tasks:
+# - Determine if we can get sccache to work with CUDA
+# - Setup a machine to replicate the issue in https://gitlab.kitware.com/vtk/vtk-m/-/issues/447
+#   Note: Centos7 doesn't work as they ship separate standard library
+#   headers for each version. We will have to figure out something else
+#   like using spack or building llvm/clang from source
+
+stages:
+  - build
+  - test
+
+.cmake_build_linux: &cmake_build_linux
+  stage: build
+  timeout:  2 hours
+  interruptible: true
+  before_script:
+    - .gitlab/ci/config/sccache.sh
+    - export PATH=$PWD/.gitlab:$PATH
+    - SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
+    - sccache --show-stats
+    - "cmake --version"
+    - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
+    - "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
+  script:
+    - "ctest -VV -S .gitlab/ci/ctest_build.cmake"
+    - sccache --show-stats
+  artifacts:
+    expire_in: 24 hours
+    when: always
+    paths:
+      # The artifacts of the build.
+      - build/bin/
+      - build/include/
+      - build/lib/
+
+      # CTest and CMake install files.
+      # XXX(globbing): Can be simplified with support from
+      # https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
+      #
+      # Note: this also captures our CIState.cmake file
+      - build/CMakeCache.txt
+      - build/*.cmake
+      - build/*/*.cmake
+      - build/*/*/*.cmake
+      - build/*/*/*/*.cmake
+      - build/*/*/*/*/*.cmake
+      - build/*/*/*/*/*/*.cmake
+      - build/Testing/
+
+      # CDash files.
+      - build/DartConfiguration.tcl
+
+.cmake_test_linux: &cmake_test_linux
+  stage: test
+  timeout:  50 minutes
+  interruptible: true
+  script:
+    #Need to use our custom ctest-latest symlink
+    #This will allow us to use 3.17+ which has support
+    #for running failed tests multiple times so failures
+    #due to system load are not reported
+    - "ctest-latest -VV -S .gitlab/ci/ctest_test.cmake"
+
+.cmake_memcheck_linux: &cmake_memcheck_linux
+  stage: test
+  timeout:  2 hours
+  interruptible: true
+  script:
+    - "ctest-latest -VV -S .gitlab/ci/ctest_memcheck.cmake"
+
+include:
+  - local: '/.gitlab/ci/centos7.yml'
+  - local: '/.gitlab/ci/centos8.yml'
+  - local: '/.gitlab/ci/rhel8.yml'
+  - local: '/.gitlab/ci/ubuntu1604.yml'
+  - local: '/.gitlab/ci/ubuntu1804.yml'
+  - local: '/.gitlab/ci/windows10.yml'
--- a/.gitlab/ci/centos7.yml
+++ b/.gitlab/ci/centos7.yml
@ -0,0 +1,54 @@
+
+# Build on centos7 with CUDA and test on rhel8 and centos7
+# gcc 4.8
+build:centos7_gcc48:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - large-memory
+  extends:
+    - .centos7
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    CMAKE_GENERATOR: "Unix Makefiles"
+    VTKM_SETTINGS: "cuda+turing+32bit_ids"
+
+test:centos7_gcc48:
+  tags:
+    - test
+    - cuda-rt
+    - turing
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .centos7
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:centos7_gcc48
+  needs:
+    - build:centos7_gcc48
+
+test:rhel8_test_centos7:
+  tags:
+    - test
+    - cuda-rt
+    - turing
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_test_linux
+    - .only-default
+  variables:
+      CTEST_EXCLUSIONS: "built_against_test_install"
+  dependencies:
+    - build:centos7_gcc48
+  needs:
+    - build:centos7_gcc48
--- a/.gitlab/ci/centos8.yml
+++ b/.gitlab/ci/centos8.yml
@ -0,0 +1,36 @@
+
+# Build on centos8 with serial and test on centos8
+# Uses gcc 8.2.1
+build:centos8_sanitizer:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .centos8
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    CMAKE_GENERATOR: "Unix Makefiles"
+    VTKM_SETTINGS: "serial+shared+openmp+asan+leak"
+
+test:centos8_sanitizer:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - privileged
+  extends:
+    - .centos8
+    - .cmake_memcheck_linux
+    - .only-default
+  variables:
+    OMP_NUM_THREADS: 4
+    CTEST_MEMORYCHECK_TYPE: LeakSanitizer
+  dependencies:
+    - build:centos8_sanitizer
+  needs:
+    - build:centos8_sanitizer
--- a/.gitlab/ci/config/gitlab_ci_setup.cmake
+++ b/.gitlab/ci/config/gitlab_ci_setup.cmake
@ -0,0 +1,97 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+if (NOT DEFINED "ENV{GITLAB_CI}")
+  message(FATAL_ERROR
+    "This script assumes it is being run inside of GitLab-CI")
+endif ()
+
+# Set up the source and build paths.
+set(CTEST_SOURCE_DIRECTORY "$ENV{CI_PROJECT_DIR}")
+set(CTEST_BINARY_DIRECTORY "${CTEST_SOURCE_DIRECTORY}/build")
+
+if ("$ENV{VTKM_SETTINGS}" STREQUAL "")
+  message(FATAL_ERROR
+    "The VTKM_SETTINGS environment variable is required to know what "
+    "build options should be used.")
+endif ()
+
+# Default to Release builds.
+if (NOT "$ENV{CMAKE_BUILD_TYPE}" STREQUAL "")
+  set(CTEST_BUILD_CONFIGURATION "$ENV{CMAKE_BUILD_TYPE}")
+endif ()
+if (NOT CTEST_BUILD_CONFIGURATION)
+  set(CTEST_BUILD_CONFIGURATION "Release")
+endif ()
+
+# Set the build metadata.
+string(TOLOWER ${CTEST_BUILD_CONFIGURATION} build_type)
+set(CTEST_BUILD_NAME "${build_type}+$ENV{VTKM_SETTINGS}")
+
+set(site_name "$ENV{CI_JOB_NAME}")
+string(REPLACE "build" "" site_name "${site_name}")
+string(REPLACE "test" "" site_name "${site_name}")
+string(REPLACE ":" "" site_name "${site_name}")
+set(CTEST_SITE ${site_name})
+
+# Default to using Ninja.
+if (NOT "$ENV{CMAKE_GENERATOR}" STREQUAL "")
+  set(CTEST_CMAKE_GENERATOR "$ENV{CMAKE_GENERATOR}")
+endif ()
+if (NOT CTEST_CMAKE_GENERATOR)
+  set(CTEST_CMAKE_GENERATOR "Ninja")
+endif ()
+
+# Determine the track to submit to.
+set(CTEST_TRACK "Experimental")
+if ("$ENV{CI_COMMIT_REF_NAME}" STREQUAL "master")
+  set(CTEST_TRACK "Nightly")
+endif ()
+
+if (CTEST_CMAKE_GENERATOR STREQUAL "Unix Makefiles")
+  include(ProcessorCount)
+  ProcessorCount(nproc)
+  set(CTEST_BUILD_FLAGS "-j${nproc}")
+endif ()
+
+if(DEFINED ENV{CTEST_MEMORYCHECK_TYPE})
+  set(env_value "$ENV{CTEST_MEMORYCHECK_TYPE}")
+  list(APPEND optional_variables "set(CTEST_MEMORYCHECK_TYPE ${env_value})")
+endif()
+
+if(DEFINED ENV{CTEST_MEMORYCHECK_SANITIZER_OPTIONS})
+  set(env_value "$ENV{CTEST_MEMORYCHECK_SANITIZER_OPTIONS}")
+  list(APPEND optional_variables "set(CTEST_MEMORYCHECK_SANITIZER_OPTIONS ${env_value})")
+endif()
+
+#We need to do write this information out to a file in the build directory
+file(TO_CMAKE_PATH "${CTEST_SOURCE_DIRECTORY}" src_path) #converted so we can run on windows
+file(TO_CMAKE_PATH "${CTEST_BINARY_DIRECTORY}" bin_path) #converted so we can run on windows
+
+set(state
+"
+  set(CTEST_SOURCE_DIRECTORY \"${src_path}\")
+  set(CTEST_BINARY_DIRECTORY \"${bin_path}\")
+
+  set(CTEST_BUILD_NAME ${CTEST_BUILD_NAME})
+  set(CTEST_SITE ${CTEST_SITE})
+
+  set(CTEST_CMAKE_GENERATOR \"${CTEST_CMAKE_GENERATOR}\")
+  set(CTEST_BUILD_CONFIGURATION ${CTEST_BUILD_CONFIGURATION})
+  set(CTEST_BUILD_FLAGS \"${CTEST_BUILD_FLAGS}\")
+
+  set(CTEST_TRACK ${CTEST_TRACK})
+
+  ${optional_variables}
+"
+)
+file(WRITE ${CTEST_BINARY_DIRECTORY}/CIState.cmake "${state}")
--- a/.gitlab/ci/config/initial_config.cmake
+++ b/.gitlab/ci/config/initial_config.cmake
@ -0,0 +1,93 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+string(REPLACE "+" ";" options "$ENV{VTKM_SETTINGS}")
+
+foreach(option IN LISTS options)
+
+  if(static STREQUAL option)
+    set(BUILD_SHARED_LIBS "OFF" CACHE STRING "")
+
+  elseif(shared STREQUAL option)
+    set(BUILD_SHARED_LIBS "ON" CACHE STRING "")
+
+  elseif(vtk_types STREQUAL option)
+    set(VTKm_USE_DEFAULT_TYPES_FOR_VTK "ON" CACHE STRING "")
+
+  elseif(32bit_ids STREQUAL option)
+    set(VTKm_USE_64BIT_IDS "OFF" CACHE STRING "")
+
+  elseif(64bit_floats STREQUAL option)
+    set(VTKm_USE_DOUBLE_PRECISION "ON" CACHE STRING "")
+
+  elseif(asan STREQUAL option)
+    set(VTKm_ENABLE_SANITIZER "ON" CACHE STRING "")
+    list(APPEND sanitizers "address")
+
+  elseif(leak STREQUAL option)
+    set(VTKm_ENABLE_SANITIZER "ON" CACHE STRING "")
+    list(APPEND sanitizers "leak")
+
+  elseif(examples STREQUAL option)
+    set(VTKm_ENABLE_EXAMPLES "ON" CACHE STRING "")
+
+  elseif(docs STREQUAL option)
+    set(VTKm_ENABLE_DOCUMENTATION "ON" CACHE STRING "")
+
+  elseif(benchmarks STREQUAL option)
+    set(VTKm_ENABLE_BENCHMARKS "ON" CACHE STRING "")
+
+  elseif(mpi STREQUAL option)
+    set(VTKm_ENABLE_MPI "ON" CACHE STRING "")
+
+  elseif(tbb STREQUAL option)
+    set(VTKm_ENABLE_TBB "ON" CACHE STRING "")
+
+  elseif(openmp STREQUAL option)
+    set(VTKm_ENABLE_OPENMP "ON" CACHE STRING "")
+
+  elseif(cuda STREQUAL option)
+    set(VTKm_ENABLE_CUDA "ON" CACHE STRING "")
+
+  elseif(maxwell STREQUAL option)
+    set(VTKm_CUDA_Architecture "maxwell" CACHE STRING "")
+
+  elseif(pascal STREQUAL option)
+    set(VTKm_CUDA_Architecture "pascal" CACHE STRING "")
+
+  elseif(volta STREQUAL option)
+    set(VTKm_CUDA_Architecture "volta" CACHE STRING "")
+
+  elseif(turing STREQUAL option)
+    set(VTKm_CUDA_Architecture "turing" CACHE STRING "")
+  endif()
+
+endforeach()
+
+set(CTEST_USE_LAUNCHERS "ON" CACHE STRING "")
+
+# We need to store the absolute path so that
+# the launcher still work even when sccache isn't
+# on our path.
+find_program(SCCACHE_COMMAND NAMES sccache)
+if(SCCACHE_COMMAND)
+  set(CMAKE_C_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
+  set(CMAKE_CXX_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
+  if(VTKm_ENABLE_CUDA)
+    set(CMAKE_CUDA_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
+  endif()
+endif()
+
+# Setup all the sanitizers as a list
+if(sanitizers)
+  set(VTKm_USE_SANITIZER "${sanitizers}"  CACHE STRING "" FORCE)
+endif()
--- a/.gitlab/ci/config/sccache.sh
+++ b/.gitlab/ci/config/sccache.sh
@ -0,0 +1,16 @@
+#!/bin/sh
+
+set -e
+
+readonly version="nvcc_v4"
+readonly sha256sum="260779b4a740fe8373d251d1e318541a98dd5cd2f8051eedd55227a5a852fdf7"
+readonly filename="sccache-0.2.14-$version-x86_64-unknown-linux-musl"
+readonly tarball="$filename.tar.gz"
+
+cd .gitlab
+
+echo "$sha256sum  $tarball" > sccache.sha256sum
+curl -OL "https://github.com/robertmaynard/sccache/releases/download/$version/$tarball"
+sha256sum --check sccache.sha256sum
+tar xf "$tarball"
+#mv "$filename/sccache" .
--- a/.gitlab/ci/config/setup_vs_powershell.ps1
+++ b/.gitlab/ci/config/setup_vs_powershell.ps1
@ -0,0 +1,14 @@
+
+$tempFile = "$env:temp\vcvars.txt"
+
+if ($env:CI_JOB_NAME -eq "build:windows_vs2019") {
+  cmd.exe /c "call `"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat`" && set > $tempFile"
+} else {
+  cmd.exe /c "call `"C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat`" && set > $tempFile"
+}
+
+Get-Content "$tempFile" | Foreach-Object {
+  if ($_ -match "^(.*?)=(.*)$") {
+    Set-Content "env:\$($matches[1])" $matches[2]
+  }
+}
--- a/.gitlab/ci/ctest_build.cmake
+++ b/.gitlab/ci/ctest_build.cmake
@ -0,0 +1,40 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Read the files from the build directory that contain
+# host information ( name, parallel level, etc )
+include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
+ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
+
+
+# Pick up from where the configure left off.
+ctest_start(APPEND)
+message(STATUS "CTEST_BUILD_FLAGS: ${CTEST_BUILD_FLAGS}")
+ctest_build(APPEND
+  NUMBER_WARNINGS num_warnings
+  RETURN_VALUE build_result)
+
+if(NOT DEFINED ENV{GITLAB_CI_EMULATION})
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.15)
+    ctest_submit(PARTS Build BUILD_ID build_id)
+    message(STATUS "Build submission build_id: ${build_id}")
+  else()
+    ctest_submit(PARTS Build)
+  endif()
+
+
+endif()
+
+if (build_result)
+  message(FATAL_ERROR
+    "Failed to build")
+endif ()
--- a/.gitlab/ci/ctest_configure.cmake
+++ b/.gitlab/ci/ctest_configure.cmake
@ -0,0 +1,55 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Read the files from the build directory that contain
+# host information ( name, parallel level, etc )
+include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
+ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
+
+set(cmake_args
+  -C "${CMAKE_CURRENT_LIST_DIR}/config/initial_config.cmake")
+
+# Create an entry in CDash.
+ctest_start(Experimental TRACK "${CTEST_TRACK}")
+
+# Gather update information.
+find_package(Git)
+set(CTEST_UPDATE_VERSION_ONLY ON)
+set(CTEST_UPDATE_COMMAND "${GIT_EXECUTABLE}")
+
+# Don't do updates when running via reproduce_ci_env.py
+if(NOT DEFINED ENV{GITLAB_CI_EMULATION})
+  ctest_update()
+endif()
+
+# Configure the project.
+ctest_configure(APPEND
+  OPTIONS "${cmake_args}"
+  RETURN_VALUE configure_result)
+
+# We can now submit because we've configured.
+if(NOT DEFINED ENV{GITLAB_CI_EMULATION})
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.15)
+    ctest_submit(PARTS Update BUILD_ID build_id)
+    message(STATUS "Update submission build_id: ${build_id}")
+    ctest_submit(PARTS Configure BUILD_ID build_id)
+    message(STATUS "Configure submission build_id: ${build_id}")
+  else()
+    ctest_submit(PARTS Update)
+    ctest_submit(PARTS Configure)
+  endif()
+endif()
+
+if (configure_result)
+  message(FATAL_ERROR
+    "Failed to configure")
+endif ()
--- a/.gitlab/ci/ctest_memcheck.cmake
+++ b/.gitlab/ci/ctest_memcheck.cmake
@ -0,0 +1,61 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Read the files from the build directory that contain
+# host information ( name, parallel level, etc )
+include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
+ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
+
+# Pick up from where the configure left off.
+ctest_start(APPEND)
+
+if(NOT CTEST_MEMORYCHECK_TYPE)
+  set(CTEST_MEMORYCHECK_TYPE "$ENV{CTEST_MEMORYCHECK_TYPE}")
+endif()
+
+if(NOT CTEST_MEMORYCHECK_SANITIZER_OPTIONS)
+  set(CTEST_MEMORYCHECK_SANITIZER_OPTIONS "$ENV{CTEST_MEMORYCHECK_SANITIZER_OPTIONS}")
+endif()
+
+if(NOT CTEST_MEMORYCHECK_SUPPRESSIONS_FILE)
+  if(CTEST_MEMORYCHECK_TYPE STREQUAL "LeakSanitizer")
+    set(CTEST_MEMORYCHECK_SUPPRESSIONS_FILE "${CTEST_SOURCE_DIRECTORY}/CMake/testing/lsan.supp")
+  endif()
+endif()
+
+set(test_exclusions
+  # placeholder for tests to exclude
+)
+
+string(REPLACE ";" "|" test_exclusions "${test_exclusions}")
+if (test_exclusions)
+  set(test_exclusions "(${test_exclusions})")
+endif ()
+
+# reduced parallel level so we don't exhaust system resources
+ctest_memcheck(
+  PARALLEL_LEVEL "4"
+  RETURN_VALUE test_result
+  EXCLUDE "${test_exclusions}"
+  DEFECT_COUNT defects)
+
+ctest_submit(PARTS Memcheck BUILD_ID build_id)
+  message(STATUS "Memcheck submission build_id: ${build_id}")
+
+if (defects)
+  message(FATAL_ERROR "Found ${defects} memcheck defects")
+endif ()
+
+
+if (test_result)
+  message(FATAL_ERROR "Failed to test")
+endif ()
--- a/.gitlab/ci/ctest_test.cmake
+++ b/.gitlab/ci/ctest_test.cmake
@ -0,0 +1,46 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# Read the files from the build directory that contain
+# host information ( name, parallel level, etc )
+include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
+ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
+
+# Pick up from where the configure left off.
+ctest_start(APPEND)
+
+set(test_exclusions
+  # placeholder for tests to exclude provided by the env
+  $ENV{CTEST_EXCLUSIONS}
+)
+
+string(REPLACE ";" "|" test_exclusions "${test_exclusions}")
+if (test_exclusions)
+  set(test_exclusions "(${test_exclusions})")
+endif ()
+
+ctest_test(APPEND
+  PARALLEL_LEVEL "10"
+  RETURN_VALUE test_result
+  EXCLUDE "${test_exclusions}"
+  REPEAT "UNTIL_PASS:3"
+  )
+  message(STATUS "ctest_test RETURN_VALUE: ${test_result}")
+
+if(NOT DEFINED ENV{GITLAB_CI_EMULATION})
+  ctest_submit(PARTS Test BUILD_ID build_id)
+  message(STATUS "Test submission build_id: ${build_id}")
+endif()
+
+if (test_result)
+  message(FATAL_ERROR "Failed to test")
+endif ()
--- a/.gitlab/ci/docker/centos7/cuda10.2/Dockerfile
+++ b/.gitlab/ci/docker/centos7/cuda10.2/Dockerfile
@ -0,0 +1,21 @@
+FROM nvidia/cuda:10.2-devel-centos7
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+RUN yum install cmake make gcc gcc-c++ -y
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
+RUN yum install git git-lfs -y
+
+# Provide a consistent CMake path across all images
+# Install CMake 3.13 as it is the minium for cuda builds
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.13.5/cmake-3.13.5-Linux-x86_64.sh > cmake-3.13.5-Linux-x86_64.sh && \
+    sh cmake-3.13.5-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.sh > cmake-3.17.0-Linux-x86_64.sh && \
+    sh cmake-3.17.0-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/centos8/base/Dockerfile
+++ b/.gitlab/ci/docker/centos8/base/Dockerfile
@ -0,0 +1,18 @@
+FROM centos:8
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+RUN yum install make gcc gcc-c++ curl libasan libubsan libomp clang -y
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
+RUN yum install git git-lfs -y
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.sh > cmake-3.17.0-Linux-x86_64.sh && \
+    sh cmake-3.17.0-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+# Provide a consistent CMake path across all images. Just use the lastest cmake
+RUN mkdir -p /opt/cmake/bin && ln -s /opt/cmake-latest/bin/cmake /opt/cmake/bin/cmake
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/rhel8/cuda10.2/Dockerfile
+++ b/.gitlab/ci/docker/rhel8/cuda10.2/Dockerfile
@ -0,0 +1,18 @@
+FROM nvidia/cuda:10.2-devel-ubi8
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+RUN yum install make gcc gcc-c++ curl -y
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
+RUN yum install git git-lfs -y
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.sh > cmake-3.17.0-Linux-x86_64.sh && \
+    sh cmake-3.17.0-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+# Provide a consistent CMake path across all images. Just use the lastest cmake
+RUN mkdir -p /opt/cmake/bin && ln -s /opt/cmake-latest/bin/cmake /opt/cmake/bin/cmake
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/ubuntu1604/base/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1604/base/Dockerfile
@ -0,0 +1,51 @@
+FROM ubuntu:16.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      autoconf \
+      automake \
+      autotools-dev \
+      clang-3.8 \
+      clang-5.0 \
+      curl \
+      g++ \
+      g++-4.8 \
+      libtbb-dev \
+      make \
+      ninja-build \
+      software-properties-common \
+      ssh
+
+# extra dependencies for dejagore machine
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
+RUN apt-get install -y --no-install-recommends \
+      git \
+      git-lfs \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Provide a modern OpenMPI verion that supports
+# running as root via environment variables
+RUN mkdir /opt/openmpi && \
+    curl -L https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.3.tar.gz > openmpi-4.0.3.tar.gz && \
+    tar -xf openmpi-4.0.3.tar.gz && \
+    cd openmpi-4.0.3 && \
+    ./configure --prefix=/opt/openmpi && \
+    make -j all && \
+    make install
+
+# Provide a consistent CMake path across all images
+# Install CMake 3.12 as it is the minium for non-cuda builds
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.12.4/cmake-3.12.4-Linux-x86_64.sh > cmake-3.12.4-Linux-x86_64.sh && \
+    sh cmake-3.12.4-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.sh > cmake-3.17.0-Linux-x86_64.sh && \
+    sh cmake-3.17.0-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/ubuntu1604/cuda9.2/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1604/cuda9.2/Dockerfile
@ -0,0 +1,49 @@
+FROM nvidia/cuda:9.2-devel-ubuntu16.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      autoconf \
+      automake \
+      autotools-dev \
+      curl \
+      g++ \
+      libomp-dev \
+      libtbb-dev \
+      make \
+      ninja-build \
+      software-properties-common \
+      ssh
+
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      git \
+      git-lfs \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Provide a modern OpenMPI verion that supports
+# running as root via environment variables
+RUN mkdir /opt/openmpi && \
+    curl -L https://download.open-mpi.org/release/open-mpi/v4.0/openmpi-4.0.3.tar.gz > openmpi-4.0.3.tar.gz && \
+    tar -xf openmpi-4.0.3.tar.gz && \
+    cd openmpi-4.0.3 && \
+    ./configure --prefix=/opt/openmpi && \
+    make -j all && \
+    make install
+
+# Provide a consistent CMake path across all images
+# Allow tests that require CMake to work correctly
+# Install CMake 3.13 as it is the minium for cuda builds
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.13.5/cmake-3.13.5-Linux-x86_64.sh > cmake-3.13.5-Linux-x86_64.sh && \
+    sh cmake-3.13.5-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.sh > cmake-3.17.0-Linux-x86_64.sh && \
+    sh cmake-3.17.0-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/ubuntu1804/base/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/base/Dockerfile
@ -0,0 +1,44 @@
+FROM ubuntu:18.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      cmake \
+      curl \
+      g++ \
+      g++-6 \
+      git \
+      git-lfs \
+      libmpich-dev \
+      libomp-dev \
+      libtbb-dev \
+      mpich \
+      ninja-build \
+      software-properties-common
+
+# extra dependencies for charm machine
+RUN add-apt-repository ppa:jonathonf/gcc-9.2
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      clang-8 \
+      g++-9 \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# Provide a consistent CMake path across all images
+# Allow tests that require CMake to work correctly
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.16.4/cmake-3.16.4-Linux-x86_64.sh > cmake-3.16.4-Linux-x86_64.sh && \
+    sh cmake-3.16.4-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.sh > cmake-3.17.0-Linux-x86_64.sh && \
+    sh cmake-3.17.0-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile
@ -0,0 +1,35 @@
+FROM nvidia/cuda:10.1-devel-ubuntu18.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      curl \
+      g++ \
+      git \
+      git-lfs \
+      libmpich-dev \
+      libomp-dev \
+      libtbb-dev \
+      mpich \
+      ninja-build \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# Provide a consistent CMake path across all images
+# Allow tests that require CMake to work correctly
+RUN mkdir /opt/cmake && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.16.4/cmake-3.16.4-Linux-x86_64.sh > cmake-3.16.4-Linux-x86_64.sh && \
+    sh cmake-3.16.4-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake-latest/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.0/cmake-3.17.0-Linux-x86_64.sh > cmake-3.17.0-Linux-x86_64.sh && \
+    sh cmake-3.17.0-Linux-x86_64.sh --prefix=/opt/cmake-latest/ --exclude-subdir --skip-license && \
+    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/update_all.sh
+++ b/.gitlab/ci/docker/update_all.sh
@ -0,0 +1,39 @@
+#!/bin/sh
+
+set -e
+set -x
+
+# data is expected to be a string of the form YYYYMMDD
+readonly date="$1"
+
+cd centos7/cuda10.2
+sudo docker build -t kitware/vtkm:ci-centos7_cuda10.2-$date .
+cd ../..
+
+cd centos8/base
+sudo docker build -t kitware/vtkm:ci-centos8-$date .
+cd ../..
+
+cd rhel8/cuda10.2
+sudo docker build -t kitware/vtkm:ci-rhel8_cuda10.2-$date .
+cd ../..
+
+cd ubuntu1604/base
+sudo docker build -t kitware/vtkm:ci-ubuntu1604-$date .
+cd ../..
+
+cd ubuntu1604/cuda9.2
+sudo docker build -t kitware/vtkm:ci-ubuntu1604_cuda9.2-$date .
+cd ../..
+
+cd ubuntu1804/base
+sudo docker build -t kitware/vtkm:ci-ubuntu1804-$date .
+cd ../..
+
+cd ubuntu1804/cuda10.1
+sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda10.1-$date .
+cd ../..
+
+# sudo docker login --username=<docker_hub_name>
+sudo docker push kitware/vtkm
+sudo docker system prune
--- a/.gitlab/ci/rhel8.yml
+++ b/.gitlab/ci/rhel8.yml
@ -0,0 +1,62 @@
+
+# Build on rhel8 with serial and test on rhel8
+# Uses gcc 8.2.1
+build:rhel8:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_GENERATOR: "Unix Makefiles"
+    VTKM_SETTINGS: "serial+shared+64bit_floats+32bit_ids"
+
+test:rhel8:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:rhel8
+  needs:
+    - build:rhel8
+
+# Build on rhel8 with serial and the VTK-supported types
+# Uses gcc 8.2.1
+build:rhel8_vtk_types:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_GENERATOR: "Unix Makefiles"
+    VTKM_SETTINGS: "serial+vtk_types"
+
+test:rhel8_vtk_types:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .rhel8
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:rhel8_vtk_types
+  needs:
+    - build:rhel8_vtk_types
--- a/.gitlab/ci/ubuntu1604.yml
+++ b/.gitlab/ci/ubuntu1604.yml
@ -0,0 +1,149 @@
+
+# Build on ubuntu1604 with CUDA 9.2 and test on ubuntu1604 and ubuntu1804
+# Uses gcc 5, and build for pascal as CUDA 9.2 doesn't support turing
+build:ubuntu1604_gcc5:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - large-memory
+  extends:
+    - .ubuntu1604_cuda
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-5"
+    CXX: "g++-5"
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    VTKM_SETTINGS: "cuda+pascal"
+
+# Temporarily disabled as we don't have a pascal hw gitlab-runner
+# test:ubuntu1604_gcc5:
+#   tags:
+#     - test
+#     - cuda-rt
+#     - pascal
+#     - vtkm
+#     - docker
+#     - linux
+#   extends:
+#     - .ubuntu1604_cuda
+#     - .cmake_test_linux
+#     - .only-default
+#   dependencies:
+#     - build:ubuntu1604_gcc5
+#   needs:
+#     - build:ubuntu1604_gcc5
+# test:ubuntu1804_test_ubuntu1604_gcc5:
+#   tags:
+#     - test
+#     - cuda-rt
+#     - pascal
+#     - vtkm
+#     - docker
+#     - linux
+#   extends:
+#     - .ubuntu1804_cuda
+#     - .cmake_test_linux
+#     - .only-default
+#   dependencies:
+#     - build:ubuntu1604_gcc5
+#   needs:
+#     - build:ubuntu1604_gcc5
+
+# Build on ubuntu1704 with OpenMP + CUDA
+# Runs only on nightlies
+build:ubuntu1604_gcc5_2:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - large-memory
+  extends:
+    - .ubuntu1604_cuda
+    - .cmake_build_linux
+    - .only-master
+  variables:
+    CC: "gcc-5"
+    CXX: "g++-5"
+    CMAKE_BUILD_TYPE: Release
+    VTKM_SETTINGS: "openmp+cuda+pascal+examples"
+
+# Build on ubuntu1604 with mpi + tbb and test on ubuntu1604
+# Uses gcc 4.8
+# Uses OpenMPI
+build:ubuntu1604_gcc48:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1604
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-4.8"
+    CXX: "g++-4.8"
+    CMAKE_BUILD_TYPE: Release
+    #custom openmpi install location
+    CMAKE_PREFIX_PATH: "/opt/openmpi/"
+    VTKM_SETTINGS: "tbb+mpi+shared"
+
+test:ubuntu1604_gcc48:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1604
+    - .cmake_test_linux
+    - .only-default
+  variables:
+    #env flags to allow openmpi to run as root user
+    OMPI_ALLOW_RUN_AS_ROOT: 1
+    OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
+    #mpi location so that `built_against_test_install` tests
+    #pass
+    CMAKE_PREFIX_PATH: "/opt/openmpi/"
+  dependencies:
+    - build:ubuntu1604_gcc48
+  needs:
+    - build:ubuntu1604_gcc48
+
+
+# Build on ubuntu1604 with tbb and test on ubuntu1604
+# Uses clang 5
+build:ubuntu1604_clang5:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1604
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "clang-5.0"
+    CXX: "clang++-5.0"
+    CMAKE_BUILD_TYPE: Debug
+    VTKM_SETTINGS: "tbb+static+64bit_floats"
+
+test:ubuntu1604_clang5:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1604
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1604_clang5
+  needs:
+    - build:ubuntu1604_clang5
--- a/.gitlab/ci/ubuntu1804.yml
+++ b/.gitlab/ci/ubuntu1804.yml
@ -0,0 +1,181 @@
+
+# Build on ubuntu1804 with TBB and OpenMP and test on ubuntu1804
+# Uses gcc 9
+# Uses MPICH2
+build:ubuntu1804_gcc9:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-9"
+    CXX: "g++-9"
+    CMAKE_BUILD_TYPE: Debug
+    VTKM_SETTINGS: "tbb+openmp+mpi+shared"
+
+test:ubuntu1804_gcc9:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_test_linux
+    - .only-default
+  variables:
+    #Restrict OpenMP number of threads since multiple test stages
+    #execute on the same hardware concurrently
+    OMP_NUM_THREADS: 4
+  dependencies:
+    - build:ubuntu1804_gcc9
+  needs:
+    - build:ubuntu1804_gcc9
+
+# Build on ubuntu1804 with CUDA + MPI and test on ubuntu1804
+# Uses gcc 7.4
+# Uses MPICH2
+build:ubuntu1804_gcc7:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - large-memory
+  extends:
+    - .ubuntu1804_cuda
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-7"
+    CXX: "g++-7"
+    VTKM_SETTINGS: "cuda+turing+mpi+64bit_floats"
+
+test:ubuntu1804_gcc7:
+  tags:
+    - test
+    - cuda-rt
+    - turing
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804_cuda
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1804_gcc7
+  needs:
+    - build:ubuntu1804_gcc7
+
+
+# Build on ubuntu1804 with OpenMP and test on ubuntu1804
+# Uses gcc 7.4
+# Runs only on nightlies
+build:ubuntu1804_gcc7_2:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_build_linux
+    - .only-master
+  variables:
+    CC: "gcc-7"
+    CXX: "g++-7"
+    VTKM_SETTINGS: "openmp+shared+examples"
+
+test:ubuntu1804_gcc7_2:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_test_linux
+    - .only-master
+  variables:
+    #Restrict OpenMP number of threads since multiple test stages
+    #execute on the same hardware concurrently
+    OMP_NUM_THREADS: 4
+  dependencies:
+    - build:ubuntu1804_gcc7_2
+  needs:
+    - build:ubuntu1804_gcc7_2
+
+# Build on ubuntu1804 with OpenMP and test on ubuntu1804
+# Uses gcc 6.5
+build:ubuntu1804_gcc6:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "gcc-6"
+    CXX: "g++-6"
+    VTKM_SETTINGS: "openmp+shared+examples"
+
+test:ubuntu1804_gcc6:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_test_linux
+    - .only-default
+  variables:
+    #Restrict OpenMP number of threads since multiple test stages
+    #execute on the same hardware concurrently
+    OMP_NUM_THREADS: 3
+  dependencies:
+    - build:ubuntu1804_gcc6
+  needs:
+    - build:ubuntu1804_gcc6
+
+# Build on ubuntu1804 with TBB and test on ubuntu1804
+# Uses clang 8
+build:ubuntu1804_clang8:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "clang-8"
+    CXX: "clang++-8"
+    CMAKE_BUILD_TYPE: Debug
+    VTKM_SETTINGS: "tbb+shared+examples"
+
+test:ubuntu1804_clang8:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu1804
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1804_clang8
+  needs:
+    - build:ubuntu1804_clang8
--- a/.gitlab/ci/windows10.yml
+++ b/.gitlab/ci/windows10.yml
@ -0,0 +1,87 @@
+
+.cmake_build_windows: &cmake_build_windows
+  stage: build
+  timeout:  2 hours
+  interruptible: true
+  before_script:
+    - .gitlab/ci/config/setup_vs_powershell.ps1
+    - "cmake --version"
+    - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
+    - "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
+  script:
+    - "ctest -VV -S .gitlab/ci/ctest_build.cmake"
+  artifacts:
+    expire_in: 24 hours
+    when: always
+    paths:
+      # The artifacts of the build.
+      - build/bin/
+      - build/include/
+      - build/lib/
+
+      # CTest and CMake install files.
+      # XXX(globbing): Can be simplified with support from
+      # https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
+      #
+      # Note: this also captures our CIState.cmake file
+      - build/CMakeCache.txt
+      - build/*.cmake
+      - build/*/*.cmake
+      - build/*/*/*.cmake
+      - build/*/*/*/*.cmake
+      - build/*/*/*/*/*.cmake
+      - build/*/*/*/*/*/*.cmake
+      - build/Testing/
+
+      # CDash files.
+      - build/DartConfiguration.tcl
+
+
+.cmake_test_windows: &cmake_test_windows
+  stage: test
+  timeout:  50 minutes
+  interruptible: true
+  before_script:
+    - .gitlab/ci/config/setup_vs_powershell.ps1
+  script:
+    #Need to use our custom ctest-latest symlink
+    #This will allow us to use 3.17+ which has support
+    #for running failed tests multiple times so failures
+    #due to system load are not reported
+    - "ctest-latest -VV -S .gitlab/ci/ctest_test.cmake"
+
+# Build on windows10 with Visual Studio
+# Will have CUDA 10.2 once build issues are resolved
+build:windows_vs2019:
+  tags:
+    - build
+    - vtkm
+    - windows
+    - vs2019
+    - shell
+    - large-memory
+  extends:
+    - .cmake_build_windows
+    - .only-default
+  variables:
+    CMAKE_GENERATOR: "Ninja"
+    CMAKE_BUILD_TYPE: Release
+    # Disabled while we track down cub allocator issues with vtkm/io tests
+    # VTKM_SETTINGS: "cuda+turing"
+    VTKM_SETTINGS: "serial"
+
+test:windows_vs2019:
+  tags:
+    - test
+    - vtkm
+    - windows
+    - shell
+    - cuda-rt
+    - turing
+  extends:
+    - .cmake_test_windows
+    - .only-default
+  dependencies:
+    - build:windows_vs2019
+  needs:
+    - build:windows_vs2019
--- a/.hooks-config
+++ b/.hooks-config
@ -0,0 +1,6 @@
+# Loaded by .git/hooks/(pre-commit|commit-msg|prepare-commit-msg)
+# during git commit after local hooks have been installed.
+
+[hooks "chain"]
+	pre-commit = utilities/git/pre-commit
+	pre-push = utilities/git/pre-push
--- a/CMake/FindTBB.cmake
+++ b/CMake/FindTBB.cmake
@ -9,6 +9,7 @@
 #  TBB_INCLUDE_DIRS - the TBB include directories
 #  TBB_LIBRARIES - TBB libraries to be lined, doesn't include malloc or
 #                  malloc proxy
+#  TBB::tbb - imported target for the TBB library
 #
 #  TBB_VERSION_MAJOR - Major Product Version Number
 #  TBB_VERSION_MINOR - Minor Product Version Number
@ -20,10 +21,12 @@
 #  TBB_MALLOC_FOUND - system has TBB malloc library
 #  TBB_MALLOC_INCLUDE_DIRS - the TBB malloc include directories
 #  TBB_MALLOC_LIBRARIES - The TBB malloc libraries to be lined
+#  TBB::malloc - imported target for the TBB malloc library
 #
 #  TBB_MALLOC_PROXY_FOUND - system has TBB malloc proxy library
 #  TBB_MALLOC_PROXY_INCLUDE_DIRS = the TBB malloc proxy include directories
 #  TBB_MALLOC_PROXY_LIBRARIES - The TBB malloc proxy libraries to be lined
+#  TBB::malloc_proxy - imported target for the TBB malloc proxy library
 #
 #
 # This module reads hints about search locations from variables:
@ -65,28 +68,84 @@
 #  FindTBB helper functions and macros
 #

+#====================================================
+# Fix the library path in case it is a linker script
+#====================================================
+function(tbb_extract_real_library library real_library)
+  if(NOT UNIX OR NOT EXISTS ${library})
+    set(${real_library} "${library}" PARENT_SCOPE)
+    return()
+  endif()
+
+  #Read in the first 4 bytes and see if they are the ELF magic number
+  set(_elf_magic "7f454c46")
+  file(READ ${library} _hex_data OFFSET 0 LIMIT 4 HEX)
+  if(_hex_data STREQUAL _elf_magic)
+    #we have opened a elf binary so this is what
+    #we should link to
+    set(${real_library} "${library}" PARENT_SCOPE)
+    return()
+  endif()
+
+  file(READ ${library} _data OFFSET 0 LIMIT 1024)
+  if("${_data}" MATCHES "INPUT \\(([^(]+)\\)")
+    #extract out the .so name from REGEX MATCH command
+    set(_proper_so_name "${CMAKE_MATCH_1}")
+
+    #construct path to the real .so which is presumed to be in the same directory
+    #as the input file
+    get_filename_component(_so_dir "${library}" DIRECTORY)
+    set(${real_library} "${_so_dir}/${_proper_so_name}" PARENT_SCOPE)
+  else()
+    #unable to determine what this library is so just hope everything works
+    #and pass it unmodified.
+    set(${real_library} "${library}" PARENT_SCOPE)
+  endif()
+endfunction()
+
 #===============================================
 # Do the final processing for the package find.
 #===============================================
-macro(findpkg_finish PREFIX)
-  # skip if already processed during this run
-  if (NOT ${PREFIX}_FOUND)
-    if (${PREFIX}_INCLUDE_DIR AND ${PREFIX}_LIBRARY)
-      set(${PREFIX}_FOUND TRUE)
-      set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIR})
-      set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARY})
-    else ()
-      if (${PREFIX}_FIND_REQUIRED AND NOT ${PREFIX}_FIND_QUIETLY)
-        message(FATAL_ERROR "Required library ${PREFIX} not found.")
-      endif ()
+macro(findpkg_finish PREFIX TARGET_NAME)
+  if (${PREFIX}_INCLUDE_DIR AND ${PREFIX}_LIBRARY)
+    set(${PREFIX}_FOUND TRUE)
+    set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIR})
+    set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARY})
+  else ()
+    if (${PREFIX}_FIND_REQUIRED AND NOT ${PREFIX}_FIND_QUIETLY)
+      message(FATAL_ERROR "Required library ${PREFIX} not found.")
    endif ()
-
-   #mark the following variables as internal variables
-   mark_as_advanced(${PREFIX}_INCLUDE_DIR
-                    ${PREFIX}_LIBRARY
-                    ${PREFIX}_LIBRARY_DEBUG
-                    ${PREFIX}_LIBRARY_RELEASE)
  endif ()
+
+  if (NOT TARGET "TBB::${TARGET_NAME}")
+    if (${PREFIX}_LIBRARY_RELEASE)
+      tbb_extract_real_library(${${PREFIX}_LIBRARY_RELEASE} real_release)
+    endif ()
+    if (${PREFIX}_LIBRARY_DEBUG)
+      tbb_extract_real_library(${${PREFIX}_LIBRARY_DEBUG} real_debug)
+    endif ()
+    add_library(TBB::${TARGET_NAME} UNKNOWN IMPORTED)
+    set_target_properties(TBB::${TARGET_NAME} PROPERTIES
+      INTERFACE_INCLUDE_DIRECTORIES "${${PREFIX}_INCLUDE_DIR}")
+    if (${PREFIX}_LIBRARY_DEBUG AND ${PREFIX}_LIBRARY_RELEASE)
+      set_target_properties(TBB::${TARGET_NAME} PROPERTIES
+        IMPORTED_LOCATION "${real_release}"
+        IMPORTED_LOCATION_DEBUG "${real_debug}"
+        IMPORTED_LOCATION_RELEASE "${real_release}")
+    elseif (${PREFIX}_LIBRARY_RELEASE)
+      set_target_properties(TBB::${TARGET_NAME} PROPERTIES
+        IMPORTED_LOCATION "${real_release}")
+    elseif (${PREFIX}_LIBRARY_DEBUG)
+      set_target_properties(TBB::${TARGET_NAME} PROPERTIES
+        IMPORTED_LOCATION "${real_debug}")
+    endif ()
+  endif ()
+
+  #mark the following variables as internal variables
+  mark_as_advanced(${PREFIX}_INCLUDE_DIR
+                   ${PREFIX}_LIBRARY
+                   ${PREFIX}_LIBRARY_DEBUG
+                   ${PREFIX}_LIBRARY_RELEASE)
 endmacro()

 #===============================================
@ -136,20 +195,6 @@ set(TBB_INC_SEARCH_PATH "")
 set(TBB_LIB_SEARCH_PATH "")


-# If we found parts of TBB in a previous pass, add the directories for those
-# components to the list of those we look for.
-if(TBB_INCLUDE_DIR)
-  list(APPEND TBB_INC_SEARCH_PATH ${TBB_INCLUDE_DIR})
-endif()
-
-if(TBB_LIBRARY_RELEASE)
-  get_filename_component(dir ${TBB_LIBRARY_RELEASE} DIRECTORY)
-  list(APPEND TBB_LIB_SEARCH_PATH ${dir})
-elseif(TBB_LIBRARY_DEBUG)
-  get_filename_component(dir ${TBB_LIBRARY_DEBUG} DIRECTORY)
-  list(APPEND TBB_LIB_SEARCH_PATH ${dir})
-endif()
-
 # If user built from sources
 set(TBB_BUILD_PREFIX $ENV{TBB_BUILD_PREFIX})
 if (TBB_BUILD_PREFIX AND ENV_TBB_ROOT)
@ -203,12 +248,23 @@ if (WIN32 AND MSVC)
      list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia32/${COMPILER_PREFIX})
    endif ()
  endforeach ()
-elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND
-       NOT CMAKE_SYSTEM_VERSION VERSION_LESS 13.0)
-  set (USE_LIBCXX OFF)
+endif ()

-  if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
-    set (USE_LIBCXX ON)
+# For OS X binary distribution, choose libc++ based libraries for Mavericks (10.9)
+# and above and AppleClang
+if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND
+    NOT CMAKE_SYSTEM_VERSION VERSION_LESS 13.0)
+  set (USE_LIBCXX OFF)
+  cmake_policy(GET CMP0025 POLICY_VAR)
+
+  if (POLICY_VAR STREQUAL "NEW")
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+      set (USE_LIBCXX ON)
+    endif ()
+  else ()
+    if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+      set (USE_LIBCXX ON)
+    endif ()
  endif ()

  if (USE_LIBCXX)
@ -216,8 +272,10 @@ elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND
      list (APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/libc++ ${dir}/libc++/lib)
    endforeach ()
  endif ()
-elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  # check compiler ABI
+endif ()
+
+# check compiler ABI
+if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  set(COMPILER_PREFIX)
  if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
    list(APPEND COMPILER_PREFIX "gcc4.7")
@ -285,7 +343,7 @@ find_library(TBB_LIBRARY_DEBUG
             PATHS ${TBB_LIB_SEARCH_PATH})
 make_library_set(TBB_LIBRARY)

-findpkg_finish(TBB)
+findpkg_finish(TBB tbb)

 #if we haven't found TBB no point on going any further
 if (NOT TBB_FOUND)
@ -309,7 +367,7 @@ find_library(TBB_MALLOC_LIBRARY_DEBUG
             PATHS ${TBB_LIB_SEARCH_PATH})
 make_library_set(TBB_MALLOC_LIBRARY)

-findpkg_finish(TBB_MALLOC)
+findpkg_finish(TBB_MALLOC tbbmalloc)

 #=============================================================================
 # Look for TBB's malloc proxy package
@ -328,7 +386,7 @@ find_library(TBB_MALLOC_PROXY_LIBRARY_DEBUG
             PATHS ${TBB_LIB_SEARCH_PATH})
 make_library_set(TBB_MALLOC_PROXY_LIBRARY)

-findpkg_finish(TBB_MALLOC_PROXY)
+findpkg_finish(TBB_MALLOC_PROXY tbbmalloc_proxy)


 #=============================================================================
@ -336,10 +394,10 @@ findpkg_finish(TBB_MALLOC_PROXY)
 if(NOT TBB_VERSION)

 #only read the start of the file
- file(READ
+ file(STRINGS
      "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h"
      TBB_VERSION_CONTENTS
-      LIMIT 2048)
+      REGEX "VERSION")

  string(REGEX REPLACE
    ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1"
--- a/CMake/VTKmCheckCopyright.cmake
+++ b/CMake/VTKmCheckCopyright.cmake
@ -15,7 +15,6 @@
 ## cmake -DVTKm_SOURCE_DIR=<VTKm_SOURCE_DIR> -P <VTKm_SOURCE_DIR>/CMake/VTKMCheckCopyright.cmake
 ##

-cmake_minimum_required(VERSION 3.8...3.15 FATAL_ERROR)
 set(FILES_TO_CHECK
  *.txt
  *.cmake
--- a/CMake/VTKmCompilerDynamicAnalysisFlags.cmake
+++ b/CMake/VTKmCompilerDynamicAnalysisFlags.cmake
@ -1,32 +0,0 @@
-##============================================================================
-##  Copyright (c) Kitware, Inc.
-##  All rights reserved.
-##  See LICENSE.txt for details.
-##
-##  This software is distributed WITHOUT ANY WARRANTY; without even
-##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-##  PURPOSE.  See the above copyright notice for more information.
-##============================================================================
-
-#-----------------------------------------------------------------------------
-# check if this is a sanitizer build. If so, set up the environment.
-
-function(vtkm_check_sanitizer_build)
-  string (FIND "${CTEST_MEMORYCHECK_TYPE}" "Sanitizer" SANITIZER_BUILD)
-  if (${SANITIZER_BUILD} GREATER -1)
-    # This is a sanitizer build.
-    # Configure the sanitizer blacklist file
-    set (SANITIZER_BLACKLIST "${VTKm_BINARY_DIR}/sanitizer_blacklist.txt")
-    configure_file (
-      "${VTKm_SOURCE_DIR}/Utilities/DynamicAnalysis/sanitizer_blacklist.txt.in"
-      ${SANITIZER_BLACKLIST}
-      @ONLY
-      )
-
-    # Add the compiler flags for blacklist
-    set (FSANITIZE_BLACKLIST "\"-fsanitize-blacklist=${SANITIZER_BLACKLIST}\"")
-    foreach (entity C CXX SHARED_LINKER EXE_LINKER MODULE_LINKER)
-      set (CMAKE_${entity}_FLAGS "${CMAKE_${entity}_FLAGS} ${FSANITIZE_BLACKLIST}")
-    endforeach ()
-  endif ()
-endfunction()
--- a/CMake/VTKmDeviceAdapters.cmake
+++ b/CMake/VTKmDeviceAdapters.cmake
@ -86,7 +86,6 @@ endif()


 if(VTKm_ENABLE_OPENMP AND NOT TARGET vtkm::openmp)
-  cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
  find_package(OpenMP 4.0 REQUIRED COMPONENTS CXX QUIET)

  add_library(vtkm::openmp INTERFACE IMPORTED GLOBAL)
--- a/CMake/VTKmWrappers.cmake
+++ b/CMake/VTKmWrappers.cmake
@ -378,7 +378,7 @@ function(vtkm_library)
                              EXTENDS_VTKM
                              DEVICE_SOURCES ${VTKm_LIB_DEVICE_SOURCES}
                              )
-  if(NOT VTKm_USE_DEFAULT_SYMBOL_VISIBILITY)
+  if(VTKm_HIDE_PRIVATE_SYMBOLS)
    set_property(TARGET ${lib_name} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
    set_property(TARGET ${lib_name} PROPERTY CXX_VISIBILITY_PRESET "hidden")
  endif()
--- a/CMake/testing/VTKmCompilerDynamicAnalysisFlags.cmake
+++ b/CMake/testing/VTKmCompilerDynamicAnalysisFlags.cmake
@ -0,0 +1,53 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+
+#-----------------------------------------------------------------------------
+# check if this is a sanitizer build. If so, set up the environment.
+
+function(vtkm_check_sanitizer_build)
+
+  # each line is a separate entry
+  set(blacklist_file_content "
+src:${VTKm_SOURCE_DIR}/vtkm/thirdparty/
+")
+  set (sanitizer_blacklist "${VTKm_BINARY_DIR}/sanitizer_blacklist.txt")
+  file(WRITE "${sanitizer_blacklist}" "${blacklist_file_content}")
+
+  set(sanitizer_flags )
+  foreach(sanitizer IN LISTS VTKm_USE_SANITIZER)
+    string(APPEND sanitizer_flags "-fsanitize=${sanitizer} ")
+  endforeach()
+  # Add the compiler flags for blacklist
+  if(VTKM_COMPILER_IS_CLANG)
+    string(APPEND sanitizer_flags "\"-fsanitize-blacklist=${sanitizer_blacklist}\"")
+  endif()
+  foreach (entity C CXX SHARED_LINKER EXE_LINKER)
+    set (CMAKE_${entity}_FLAGS "${CMAKE_${entity}_FLAGS} ${sanitizer_flags}" PARENT_SCOPE)
+  endforeach ()
+
+endfunction()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
+   CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+  set(CMAKE_COMPILER_IS_CLANGXX 1)
+endif()
+
+if(VTKM_COMPILER_IS_CLANG OR VTKM_COMPILER_IS_GNU)
+  vtkm_option(VTKm_ENABLE_SANITIZER "Build with sanitizer support." OFF)
+  mark_as_advanced(VTKm_ENABLE_SANITIZER)
+
+  set(VTKm_USE_SANITIZER "address" CACHE STRING "The sanitizer to use")
+  mark_as_advanced(VTKm_USE_SANITIZER)
+
+  if(VTKm_ENABLE_SANITIZER)
+    vtkm_check_sanitizer_build()
+  endif()
+
+endif()
--- a/CMake/testing/VTKmTestWrappers.cmake
+++ b/CMake/testing/VTKmTestWrappers.cmake
@ -103,6 +103,12 @@ function(vtkm_unit_tests)
  # For Testing Purposes, we will set the default logging level to INFO
  list(APPEND vtkm_default_test_log_level "-v" "INFO")

+  # Add the path to the data directory so tests can find and use data files for testing
+  list(APPEND VTKm_UT_TEST_ARGS "--data-dir=${VTKm_SOURCE_DIR}/data/data")
+
+  # Add the path to the location where regression test images are to be stored
+  list(APPEND VTKm_UT_TEST_ARGS "--baseline-dir=${VTKm_SOURCE_DIR}/data/baseline")
+
  if(VTKm_UT_MPI)
    # for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
    set(test_prog "${test_prog}_mpi")
@ -128,7 +134,7 @@ function(vtkm_unit_tests)
  endif()
  vtkm_add_target_information(${test_prog} DEVICE_SOURCES ${device_sources})

-  if(NOT VTKm_USE_DEFAULT_SYMBOL_VISIBILITY)
+  if(VTKm_HIDE_PRIVATE_SYMBOLS)
    set_property(TARGET ${test_prog} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
    set_property(TARGET ${test_prog} PROPERTY CXX_VISIBILITY_PRESET "hidden")
  endif()
--- a/Utilities/DynamicAnalysis/lsan.supp
+++ b/Utilities/DynamicAnalysis/lsan.supp
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -11,11 +11,11 @@
 # If you want CUDA support, you will need to have CMake 3.9 on Linux/OSX.
 # We require CMake 3.11 with the MSVC generator as the $<COMPILE_LANGUAGE:>
 # generator expression is not supported on older versions.
-cmake_minimum_required(VERSION 3.8...3.15 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
 project (VTKm)

 if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
-  cmake_minimum_required(VERSION 3.11...3.15 FATAL_ERROR)
+  cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
 endif()

 # Update module path
@ -118,7 +118,7 @@ vtkm_option(VTKm_INSTALL_ONLY_LIBRARIES "install only vtk-m libraries and no hea
 # rather than exporting all symbols. This flag is added so that consumers
 # which require static builds can force all symbols on, which is something
 # VTK does.
-vtkm_option(VTKm_USE_DEFAULT_SYMBOL_VISIBILITY "Don't explicitly hide symbols from libraries." OFF)
+vtkm_option(VTKm_HIDE_PRIVATE_SYMBOLS "Hide symbols from libraries." ON)

 vtkm_option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" OFF)
 set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
@ -136,7 +136,7 @@ mark_as_advanced(
  VTKm_ENABLE_LOGGING
  VTKm_NO_ASSERT
  VTKm_INSTALL_ONLY_LIBRARIES
-  VTKm_USE_DEFAULT_SYMBOL_VISIBILITY
+  VTKm_HIDE_PRIVATE_SYMBOLS
  VTKm_ENABLE_DEVELOPER_FLAGS
  VTKm_NO_INSTALL_README_LICENSE
  )
@ -159,6 +159,18 @@ include(VTKmCompilerFlags)


 #-----------------------------------------------------------------------------
+# We need to check and see if git lfs is installed so that test data will
+# be available for use
+if (VTKm_ENABLE_TESTING)
+  file(STRINGS "${VTKm_SOURCE_DIR}/data/data/sentinel-data" sentinel_data LIMIT_COUNT 1)
+  if (NOT sentinel_data STREQUAL "-- DO NOT MODIFY THIS LINE --")
+    message(WARNING
+      "Testing is enabled, but the data is not available. Use git lfs in order "
+      "to obtain the testing data.")
+    set(VTKm_ENABLE_TESTING off)
+  endif()
+endif()
+
 # We include the wrappers unconditionally as VTK-m expects the function to
 # always exist (and early terminate when testing is disabled).
 include(testing/VTKmTestWrappers)
@ -187,10 +199,9 @@ if (VTKm_ENABLE_TESTING)
  # faux variadic template code
  find_package(Pyexpander QUIET)

-  #-----------------------------------------------------------------------------
  # Setup compiler flags for dynamic analysis if needed
-  include(VTKmCompilerDynamicAnalysisFlags)
-  vtkm_check_sanitizer_build()
+  include(testing/VTKmCompilerDynamicAnalysisFlags)
+
 endif (VTKm_ENABLE_TESTING)

 #-----------------------------------------------------------------------------
@ -327,6 +338,8 @@ if (VTKm_ENABLE_TESTING)
  # installed version of VTK-m.
  include(testing/VTKmTestInstall)
  vtkm_test_install()
+else ()
+  set(CTEST_USE_LAUNCHERS off)
 endif()

 #-----------------------------------------------------------------------------
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -32,19 +32,19 @@ Before you begin, perform initial setup:
    This will prompt for your GitLab user name and configure a remote
    called `gitlab` to refer to it.

-5. (Optional but highly recommended.) 
+5. (Optional but highly recommended.)
    [Register with the VTK-m dashboard] on Kitware's CDash instance to
    better know how your code performs in regression tests. After
    registering and signing in, click on "All Dashboards" link in the upper
    left corner, scroll down and click "Subscribe to this project" on the
    right of VTK-m.

-6.  (Optional but highly recommended.) 
+6.  (Optional but highly recommended.)
    [Sign up for the VTK-m mailing list] to communicate with other
    developers and users.

 [GitLab Access]: https://gitlab.kitware.com/users/sign_in
-[Fork VTK-m]: https://gitlab.kitware.com/vtk/vtk-m/forks/new
+[Fork VTK-m]: https://gitlab.kitware.com/vtk/vtk-m/-/forks/new
 [Register with the VTK-m dashboard]: https://open.cdash.org/register.php
 [Sign up for the VTK-m mailing list]: http://vtk.org/mailman/listinfo/vtkm

@ -108,6 +108,9 @@ idea of the feature or fix to be developed given just the branch name.
        This is required as VTK-m uses Git-LFS to efficiently support data
        files.

+4.  If you are adding a new feature or making sigificant changes to API,
+    make sure to add a entry to `docs/changelog`. This allows release
+    notes to properly capture all relevant changes.

 ### Guidelines for Commit Messages ###

@ -175,7 +178,7 @@ upper right.
 When you [pushed your topic branch](#share-a-topic), it will provide you
 with a url of the form

-    https://gitlab.kitware.com/<username>/vtk-m/merge_requests/new
+    https://gitlab.kitware.com/<username>/vtk-m/-/merge_requests/new

 You can copy/paste that into your web browser to create a new merge
 request. Alternately, you can visit your fork in GitLab, browse to the
@ -203,7 +206,7 @@ will be filled out for you.

 5.  In the "**Description**" field provide a high-level description of the
    change the topic makes and any relevant information about how to try
-    it. 
+    it.
    *   Use `@username` syntax to draw attention of specific developers.
        This syntax may be used anywhere outside literal text and code
        blocks.  Or, wait until the [next step](#review-a-merge-request)
@ -226,6 +229,10 @@ will be filled out for you.
 6.  The "**Assign to**", "**Milestone**", and "**Labels**" fields may be
    left blank.

+7.  Enable the "**Allow commits from members who can merge to the target branch.**" option,
+    so that reviewers can modify the merge request. This allows reviewers to change
+    minor style issues without overwhelming the author with change requests.
+
 7.  Use the "**Submit merge request**" button to create the merge request
    and visit its page.

@ -318,32 +325,49 @@ succeeds.

 ### Testing ###

-VTK-m has a [buildbot](http://buildbot.net) instance watching for merge
-requests to test. Each time a merge request is updated the buildbot user
-(@buildbot) will automatically trigger a new build on all VTK-m buildbot
-workers. The buildbot user (@buildbot) will respond with a comment linking
-to the CDash results when it schedules builds.
+Each time a merge request is created or updated automated testing
+is automatically triggered, and shows up under the pipeline tab.

-The buildbot user (@buildbot) will also respond to any comment with the
-form:
+Developers can track the status of the pipeline for a merge
+request by using the Pipeline tab on a merge request or by
+clicking on stage icons as shown below:
+
+![alt text](docs/build_stage.png "Pipeline")
+
+When trying to diagnose why a build or tests stage has failed it
+generally is easier to look at the pruned information reported
+on [VTK-m's CDash Dashboard](https://open.cdash.org/index.php?project=VTKM).
+To make it easier to see only the results for a given merge request
+you can click the `cdash` link under the external stage ( rightmost pipeline stage icon )
+
+![alt text](docs/external_stage.png "CDash Link")
+
+In addition to the gitlab pipelines the buildbot user (@buildbot) will respond
+with a comment linking to the CDash results when it schedules builds.
+
+The builds for VTK-m that show up as part of the `external` stage of the
+gitlab pipeline are driven via buildbot, and have a different workflow.
+When you need to do things such as retry a build, you must issue commands
+via comments of the following form. The buildbot user (@buildbot) will
+respond signify that the command has been executed

    Do: test

 The `Do: test` command accepts the following arguments:

-  * `--oneshot` 
+  * `--oneshot`
        only build the *current* hash of the branch; updates will not be
        built using this command
-  * `--stop` 
+  * `--stop`
        clear the list of commands for the merge request
-  * `--superbuild` 
+  * `--superbuild`
        build the superbuilds related to the project
-  * `--clear` 
+  * `--clear`
        clear previous commands before adding this command
-  * `--regex-include <arg>` or `-i <arg>` 
+  * `--regex-include <arg>` or `-i <arg>`
        only build on builders matching `<arg>` (a Python regular
        expression)
-  * `--regex-exclude <arg>` or `-e <arg>` 
+  * `--regex-exclude <arg>` or `-e <arg>`
        excludes builds on builders matching `<arg>` (a Python regular
        expression)

@ -451,7 +475,7 @@ will stop running so that you can make changes. Make the changes you need,
 use `git add` to stage those changes, and then use

    $ git rebase --continue
-	
+
 to have git continue the rebase process. You can always run `git status` to
 get help about what to do next.

--- a/README.md
+++ b/README.md
@ -70,9 +70,7 @@ VTK-m Requires:
      + MSVC 2015+
      + Intel 17.0.4+
  + [CMake](http://www.cmake.org/download/)
-      + CMake 3.8+
-      + CMake 3.11+ (for Visual Studio generator)
-      + CMake 3.12+ (for OpenMP support)
+      + CMake 3.12+
      + CMake 3.13+ (for CUDA support)

 Optional dependencies are:
@ -105,18 +103,18 @@ Optional dependencies are:

 VTK-m has been tested on the following configurations:c
  + On Linux
-      + GCC 4.8.5, 5.4.0, 6.4.0, 7.3.0, Clang 5.0, 6.0, 7.0, Intel 17.0.4, Intel 19.0.0
-      + CMake 3.13.3, 3.14.1
-      + CUDA 9.2.148, 10.0.130, 10.1.105
+      + GCC 4.8.5, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0
+      + CMake 3.12, 3.13, 3.16, 3.17
+      + CUDA 9.2.148, 10.0.130, 10.1.105, 10.2.89
      + TBB 4.4 U2, 2017 U7
  + On Windows
      + Visual Studio 2015, 2017
-      + CMake 3.8.2, 3.11.1, 3.12.4
+      + CMake 3.12, 3.17
      + CUDA 10.1
      + TBB 2017 U3, 2018 U2
  + On MacOS
      + AppleClang 9.1
-      + CMake 3.12.3
+      + CMake 3.12
      + TBB 2018


@ -159,7 +157,7 @@ Marching Cubes algorithm on it, and render the results to an image:
 #include <vtkm/Range.h>
 #include <vtkm/cont/ColorTable.h>
 #include <vtkm/filter/Contour.h>
-#include <vtkm/io/reader/VTKDataSetReader.h>
+#include <vtkm/io/VTKDataSetReader.h>
 #include <vtkm/rendering/Actor.h>
 #include <vtkm/rendering/Camera.h>
 #include <vtkm/rendering/CanvasRayTracer.h>
@ -168,7 +166,7 @@ Marching Cubes algorithm on it, and render the results to an image:
 #include <vtkm/rendering/Scene.h>
 #include <vtkm/rendering/View3D.h>

-vtkm::io::reader::VTKDataSetReader reader("path/to/vtk_image_file");
+vtkm::io::VTKDataSetReader reader("path/to/vtk_image_file.vtk");
 vtkm::cont::DataSet inputData = reader.ReadDataSet();
 std::string fieldName = "scalars";

@ -204,7 +202,7 @@ scene.AddActor(vtkm::rendering::Actor(outputData.GetCellSet(),
 vtkm::rendering::View3D view(scene, mapper, canvas, camera, bg);
 view.Initialize();
 view.Paint();
-view.SaveAs("demo_output.pnm");
+view.SaveAs("demo_output.png");
 ```

 A minimal CMakeLists.txt such as the following one can be used to build this
@ -232,7 +230,7 @@ See [LICENSE.txt](LICENSE.txt) for details.
 [VTK-m Doxygen]:            http://m.vtk.org/documentation/
 [VTK-m download page]:      http://m.vtk.org/index.php/VTK-m_Releases
 [VTK-m git repository]:     https://gitlab.kitware.com/vtk/vtk-m/
-[VTK-m Issue Tracker]:      https://gitlab.kitware.com/vtk/vtk-m/issues
+[VTK-m Issue Tracker]:      https://gitlab.kitware.com/vtk/vtk-m/-/issues
 [VTK-m Overview]:           http://m.vtk.org/images/2/29/VTKmVis2016.pptx
 [VTK-m Users Guide]:        http://m.vtk.org/images/c/c8/VTKmUsersGuide.pdf
 [VTK-m users email list]:   http://vtk.org/mailman/listinfo/vtkm
--- a/Utilities/CI/.gitignore
+++ b/Utilities/CI/.gitignore
@ -0,0 +1 @@
+env/
--- a/Utilities/CI/DeveloperSetup.md
+++ b/Utilities/CI/DeveloperSetup.md
@ -0,0 +1,37 @@
+#How to setup machine to use CI scripts#
+
+#OSX and Unix#
+
+
+# Requirements #
+
+- Docker
+- Python3
+-- PyYAML
+
+The CI scripts require python3 and the PyYAML package.
+
+Generally the best way to setup this environment is to create a python
+virtual env so you don't pollute your system. This means getting pip
+the python package manager, and virtual env which allow for isolation
+of a projects python dependencies.
+
+```
+sudo easy_install pip
+sudo pip install virtualenv
+```
+
+Next we need to create a new virtual env of python. I personally
+like to setup this in `vtkm/Utilities/CI/env`.
+
+```
+mkdir env
+virtualenv env
+```
+
+Now all we have to do is setup the requirements:
+
+```
+./env/bin/pip install -r requirements.txt
+```
+
--- a/Utilities/CI/reproduce_ci_env.py
+++ b/Utilities/CI/reproduce_ci_env.py
@ -0,0 +1,307 @@
+#!/bin/env python3
+
+#=============================================================================
+#
+#  Copyright (c) Kitware, Inc.
+#  All rights reserved.
+#  See LICENSE.txt for details.
+#
+#  This software is distributed WITHOUT ANY WARRANTY; without even
+#  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+#  PURPOSE.  See the above copyright notice for more information.
+#
+#===============
+
+import enum
+import os
+import tempfile
+import string
+import subprocess
+import sys
+import platform
+import re
+import yaml
+
+def get_root_dir():
+  dir_path = os.path.dirname(os.path.realpath(__file__))
+  #find the where .gitlab-ci.yml is located
+  try:
+    src_root = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'], cwd=dir_path)
+    src_root = str(src_root, 'utf-8')
+    src_root = src_root.rstrip('\n')
+    # Corrections in case the filename is a funny Cygwin path
+    src_root = re.sub(r'^/cygdrive/([a-z])/', r'\1:/', src_root)
+    return src_root
+  except subprocess.CalledProcessError:
+    return None
+
+def extract_stage_job_from_cmdline(*args):
+  if len(args) == 1:
+    stage_and_job = str(args[0]).split(':')
+    if len(stage_and_job) == 1:
+      stage_and_job = ['build', stage_and_job[0]]
+    return stage_and_job
+  return args
+
+def load_ci_file(ci_file_path):
+  ci_state = {}
+  if ci_file_path:
+    root_dir = os.path.dirname(ci_file_path)
+    ci_state = yaml.safe_load(open(ci_file_path))
+    if 'include' in ci_state:
+      for inc in ci_state['include']:
+        if 'local' in inc:
+          #the local paths can start with '/'
+          include_path = inc['local'].lstrip('/')
+          include_path = os.path.join(root_dir, include_path)
+          ci_state.update(yaml.safe_load(open(include_path)))
+  return ci_state
+
+def ci_stages_and_jobs(ci_state):
+  stages = ci_state['stages']
+  jobs = dict((s,[]) for s in stages)
+  for key in ci_state:
+    maybe_stage = key.split(':')
+    if maybe_stage[0] in stages:
+      jobs[maybe_stage[0]].append(maybe_stage[1])
+  return jobs
+
+def subset_yml(ci_state, stage, name):
+  #given a stage and name generate a new yaml
+  #file that only contains information for stage and name.
+  #Does basic extend merging so that recreating the env is easier
+  runner_yml = {}
+  yml_name = stage+":"+name
+  runner_yml[yml_name] = ci_state[yml_name]
+  entry = runner_yml[yml_name]
+
+  #Flatten 'extends' entries, only presume the first level of inheritance is
+  #important
+  if 'extends' in entry:
+    to_merge = []
+
+    if not isinstance(entry['extends'], list):
+      entry['extends'] = [ entry['extends'] ]
+
+    for e in entry['extends']:
+      entry.update(ci_state[e])
+    del entry['extends']
+  return runner_yml
+
+class CallMode(enum.Enum):
+  call = 1
+  output = 2
+
+
+def subprocess_call_docker(cmd, cwd, mode=CallMode.call):
+  system = platform.system()
+  if (system == 'Windows') or (system == 'Darwin'):
+    # Windows and MacOS run Docker in a VM, so they don't need sudo
+    full_cmd = ['docker'] + cmd
+  else:
+    # Unix needs to run docker with root privileges
+    full_cmd = ['sudo', 'docker'] + cmd
+  print(" ".join(full_cmd), flush=True)
+
+  if mode is CallMode.call:
+    return subprocess.check_call(full_cmd, cwd=cwd)
+  if mode is CallMode.output:
+    return subprocess.check_output(full_cmd, cwd=cwd)
+
+###############################################################################
+#
+#     User Command: 'list'
+#
+###############################################################################
+def list_jobs(ci_file_path, *args):
+  ci_state = load_ci_file(ci_file_path)
+  jobs = ci_stages_and_jobs(ci_state)
+  for key,values in jobs.items():
+    print('Jobs for Stage:', key)
+    for v in values:
+      print('\t',v)
+    print('')
+
+
+###############################################################################
+#
+#     User Command: 'build' | 'setup'
+#
+###############################################################################
+def create_container(ci_file_path, *args):
+  ci_state = load_ci_file(ci_file_path)
+  ci_jobs = ci_stages_and_jobs(ci_state)
+  stage,name = extract_stage_job_from_cmdline(*args)
+
+  if not stage in ci_jobs:
+    print('Unable to find stage: ', stage)
+    print('Valid stages are:', list(ci_jobs.keys()))
+    exit(1)
+
+  if not name in ci_jobs[stage]:
+    print('Unable to find job: ', name)
+    print('Valid jobs are:', ci_jobs[stage])
+    exit(1)
+
+  #we now have the relevant subset of the yml
+  #fully expanded into a single definition
+  subset = subset_yml(ci_state, stage, name)
+
+  runner_name = stage+":"+name
+  runner = subset[runner_name]
+  src_dir = get_root_dir()
+  gitlab_env = [ k + '="' + v + '"' for k,v in runner['variables'].items()]
+
+  # propagate any https/http proxy info
+  if os.getenv('http_proxy'):
+    gitlab_env = [ 'http_proxy=' + os.getenv('http_proxy') ] + gitlab_env
+  if os.getenv('https_proxy'):
+    gitlab_env = [ 'https_proxy=' + os.getenv('https_proxy') ] + gitlab_env
+
+  # The script and before_script could be anywhere!
+  script_search_locations = [ci_state, subset, runner]
+  for loc in script_search_locations:
+    if 'before_script' in loc:
+      before_script = loc['before_script']
+    if 'script' in loc:
+      script = loc['script']
+
+  docker_template = string.Template('''
+FROM $image
+ENV GITLAB_CI=1 \
+    GITLAB_CI_EMULATION=1 \
+    CI_PROJECT_DIR=. \
+    CI_JOB_NAME=$job_name
+#Copy all of this project to the src directory
+COPY . /src
+ENV $gitlab_env
+WORKDIR /src
+#Let git fix issues from copying across OS (such as windows EOL)
+#Note that this will remove any changes not committed.
+RUN echo "$before_script || true" >> /setup-gitlab-env.sh && \
+    echo "$script || true" >> /run-gitlab-stage.sh && \
+    git reset --hard && \
+    bash /setup-gitlab-env.sh
+''')
+
+  docker_content = docker_template.substitute(image=runner['image'],
+                    job_name='local-build'+runner_name,
+                    src_dir=src_dir,
+                    gitlab_env= " ".join(gitlab_env),
+                    before_script=" && ".join(before_script),
+                    script=" && ".join(script))
+
+  # Write out the file
+  docker_file = tempfile.NamedTemporaryFile(delete=False)
+  docker_file.write(bytes(docker_content, 'utf-8'))
+  docker_file.close()
+
+  # now we need to run docker and build this image with a name equal to the
+  # ci name, and the docker context to be the current git repo root dir so
+  # we can copy the current project src automagically
+  try:
+    subprocess_call_docker(['build', '-f', docker_file.name, '-t', runner_name, src_dir],
+                           cwd=src_dir)
+  except subprocess.CalledProcessError:
+    print('Unable to build the docker image for: ', runner_name)
+    exit(1)
+  finally:
+    # remove the temp file
+    os.remove(docker_file.name)
+
+###############################################################################
+#
+#     User Command: 'help'
+#
+###############################################################################
+def run_container(ci_file_path, *args):
+  # Exec/Run ( https://docs.docker.com/engine/reference/commandline/exec/#run-docker-exec-on-a-running-container )
+  src_dir = get_root_dir()
+  stage,name = extract_stage_job_from_cmdline(*args)
+  image_name = stage+':'+name
+
+  try:
+    cmd = ['run', '-itd', image_name]
+    container_id = subprocess_call_docker(cmd, cwd=src_dir, mode=CallMode.output)
+    container_id = str(container_id, 'utf-8')
+    container_id= container_id.rstrip('\n')
+  except subprocess.CalledProcessError:
+    print('Unable to run the docker image for: ', image_name)
+    exit(1)
+
+  try:
+    cmd = ['exec', '-it', container_id, 'bash']
+    subprocess_call_docker(cmd, cwd=src_dir)
+  except subprocess.CalledProcessError:
+    print('Unable to attach an iteractive shell to : ', container_id)
+  pass
+
+  try:
+    cmd = ['container', 'stop', container_id]
+    subprocess_call_docker(cmd, cwd=src_dir)
+  except subprocess.CalledProcessError:
+    print('Unable to stop container: ', container_id)
+  pass
+
+###############################################################################
+#
+#     User Command: 'help'
+#
+###############################################################################
+def help_usage(ci_file_path, *args):
+  print('Setup gitlab-ci docker environments/state locally')
+  print('Usage: reproduce_ci_env.py [command] [stage] <name>')
+  print('\n')
+  print('Commands:\n' + \
+  '\n'+\
+  '  list: List all stage and job names for gitlab-ci\n'+\
+  '  create: build a docker container for this gitlab-ci job.\n'+\
+  '        Will match the <stage> to docker repo, and <name> to the tag. \n' +\
+  '        If no explicit <stage> is provided will default to `build` stage. \n' +\
+  '  run: Launch an interactive shell inside the docker image\n' +\
+  '        for a given stage:name with the correct environment and will automatically\n' +\
+  '        run the associated stage script.\n'
+  '        If no explicit <stage> is provided will default to `build` stage. \n')
+  print('Example:\n' + \
+  '\n'+\
+  '  reproduce_ci_env create centos7\n'+\
+  '  reproduce_ci_env run build:centos7\n')
+
+###############################################################################
+def main(argv):
+  ci_file_path = os.path.join(get_root_dir(), '.gitlab-ci.yml')
+  if len(argv) == 0:
+    help_usage( ci_file_path  )
+    exit(1)
+  if len(argv) > 3:
+    help_usage( ci_file_path  )
+    exit(1)
+
+  #commands we want
+  # - list
+  # -- list all 'jobs'
+  # - create | setup
+  # -- create a docker image that represents a given stage:name
+  # - run | exec
+  # -- run the script for the stage:name inside the correct docker image
+  #    and provide an interactive shell
+  # -- help
+  #setup arg function table
+  commands = {
+    'list': list_jobs,
+    'create': create_container,
+    'setup': create_container,
+    'exec': run_container,
+    'run': run_container,
+    'help': help_usage
+    }
+  if argv[0] in commands:
+    #splat the subset of the vector so they are separate call parameters
+    commands[argv[0]]( ci_file_path, *argv[1:3] )
+  else:
+    commands['help']( ci_file_path )
+    exit(1)
+  exit(0)
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
--- a/Utilities/CI/requirements.txt
+++ b/Utilities/CI/requirements.txt
@ -0,0 +1 @@
+PyYAML
--- a/Utilities/DynamicAnalysis/sanitizer_blacklist.txt.in
+++ b/Utilities/DynamicAnalysis/sanitizer_blacklist.txt.in
@ -1,2 +0,0 @@
-# Blacklist third party libraries from invoking sanitizer errors
-src:@VTKm_SOURCE_DIR@/vtkm/thirdparty/*
--- a/Utilities/GitSetup/git-gitlab-push
+++ b/Utilities/GitSetup/git-gitlab-push
@ -23,7 +23,8 @@ OPTIONS
    Show what would be pushed without actually updating the destination

 -f,--force
-    Force-push the topic HEAD to rewrite the destination branch
+    Force-push the topic HEAD to rewrite the destination branch (use twice
+    to ignore stale remote tracking branches)

 --no-default
    Do not push the default branch (e.g. master)
@ -73,7 +74,14 @@ set_upstream=true
 # Parse the command line options.
 while test $# != 0; do
 	case "$1" in
-		-f|--force)    force='+'; lease=true ;;
+		-f|--force)
+			if test -n "$force"; then
+				lease=false
+			else
+				lease=true
+			fi
+			force='+'
+			;;
 		--no-topic)    no_topic=1; set_upstream=false ;;
 		--dry-run)     dry_run=--dry-run ;;
 		--no-default)  no_default=1 ;;
--- a/Utilities/GitSetup/setup-lfs
+++ b/Utilities/GitSetup/setup-lfs
@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+#=============================================================================
+# Copyright 2017 Kitware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#=============================================================================
+
+# Run this script to set up the local Git repository to push LFS data to
+# remotes.
+
+die() {
+	echo 1>&2 "$@" ; exit 1
+}
+
+# Make sure we are inside the repository.
+cd "${BASH_SOURCE%/*}" &&
+
+# Set the LFS filter configuration up.
+if git lfs version; then
+	git config filter.lfs.clean "git-lfs clean -- %f" &&
+	git config filter.lfs.smudge "git-lfs smudge -- %f" &&
+	git config filter.lfs.process "git-lfs filter-process" &&
+	git config filter.lfs.required true &&
+	git lfs fetch &&
+	git lfs checkout &&
+	echo 'LFS is now configured.'
+else
+	die 'Git LFS is not available. Please make it available on the PATH' \
+		'either by installing it through your system provider or installing it' \
+		'from <https://git-lfs.github.com>.'
+fi
--- a/Utilities/Scripts/benchCompare.py
+++ b/Utilities/Scripts/benchCompare.py
@ -1,157 +0,0 @@
-#!/usr/bin/env python3
-#
-# Compares the output from BenchmarkDeviceAdapter from the serial
-# device to a parallel device and prints a table containing the results.
-#
-# Example usage:
-#
-# $ BenchmarkDeviceAdapter_SERIAL > serial.out
-# $ BenchmarkDeviceAdapter_TBB > tbb.out
-# $ benchCompare.py serial.out tbb.out
-#
-#
-# The number of threads (optional -- only used to generate the "Warn" column)
-maxThreads = 4
-#
-# Print debugging output:
-doDebug = False
-#
-# End config options.
-
-import re
-import sys
-
-assert(len(sys.argv) == 3)
-
-def debug(str):
-  if (doDebug): print(str)
-
-# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
-typeParser = re.compile("\\*{3} ([^*]+) on device ([^*]+) \\*{15}")
-
-# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
-nameParser = re.compile("Benchmark '([^-]+)' results:")
-
-# Parses "mean = 0.0125s" --> 0.0125
-meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
-
-# Parses "std dev = 0.0125s" --> 0.0125
-stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
-
-serialFilename = sys.argv[1]
-parallelFilename = sys.argv[2]
-
-serialFile = open(serialFilename, 'r')
-parallelFile = open(parallelFilename, 'r')
-
-class BenchKey:
-  def __init__(self, name_, type_):
-    self.name = name_
-    self.type = type_
-
-  def __eq__(self, other):
-    return self.name == other.name and self.type == other.type
-
-  def __lt__(self, other):
-    if self.name < other.name: return True
-    elif self.name > other.name: return False
-    else: return self.type < other.type
-
-  def __hash__(self):
-    return (self.name + self.type).__hash__()
-
-class BenchData:
-  def __init__(self, mean_, stdDev_):
-    self.mean = mean_
-    self.stdDev = stdDev_
-
-def parseFile(f, benchmarks):
-  type = ""
-  bench = ""
-  mean = -1.
-  stdDev = -1.
-  for line in f:
-    debug("Line: {}".format(line))
-
-    typeRes = typeParser.match(line)
-    if typeRes:
-      type = typeRes.group(1)
-      debug("Found type: {}".format(type))
-      continue
-
-    nameRes = nameParser.match(line)
-    if nameRes:
-      name = nameRes.group(1)
-      debug("Found name: {}".format(name))
-      continue
-
-    meanRes = meanParser.match(line)
-    if meanRes:
-      mean = float(meanRes.group(1))
-      debug("Found mean: {}".format(mean))
-      continue
-
-    stdDevRes = stdDevParser.match(line)
-    if stdDevRes:
-      stdDev = float(stdDevRes.group(1))
-      debug("Found stddev: {}".format(stdDev))
-
-      # stdDev is always the last parse for a given benchmark, add entry now
-      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
-      debug("{} records found.".format(len(benchmarks)))
-
-      mean = -1.
-      stdDev = -1.
-
-      continue
-
-serialBenchmarks = {}
-parallelBenchmarks = {}
-
-parseFile(serialFile, serialBenchmarks)
-parseFile(parallelFile, parallelBenchmarks)
-
-serialKeys = set(serialBenchmarks.keys())
-parallelKeys = set(parallelBenchmarks.keys())
-
-commonKeys = sorted(list(serialKeys.intersection(parallelKeys)))
-
-serialOnlyKeys = sorted(list(serialKeys.difference(parallelKeys)))
-parallelOnlyKeys = sorted(list(parallelKeys.difference(serialKeys)))
-
-debug("{} serial keys\n{} parallel keys\n{} common keys\n{} serialOnly keys\n{} parallelOnly keys.".format(
-        len(serialKeys), len(parallelKeys), len(commonKeys), len(serialOnlyKeys), len(parallelOnlyKeys)))
-
-if len(serialOnlyKeys) > 0:
-  print("Keys found only in serial:")
-  for k in serialOnlyKeys:
-    print("%s (%s)"%(k.name, k.type))
-  print("")
-
-if len(parallelOnlyKeys) > 0:
-  print("Keys found only in parallel:")
-  for k in parallelOnlyKeys:
-    print("%s (%s)"%(k.name, k.type))
-  print("")
-
-print("Comparison:")
-print("| %7s | %4s | %8s    %8s | %8s    %8s | %s (%s) |"%(
-        "Speedup", "Warn", "serial", "", "parallel", "", "Benchmark", "Type"))
-print("|-%7s-|-%4s-|-%8s----%8s-|-%8s----%8s-|-%s--%s--|"%(
-        "-"*7, "-"*4, "-"*8, "-"*8, "-"*8, "-"*8, "-"*9, "-"*4))
-for key in commonKeys:
-  sData = serialBenchmarks[key]
-  pData = parallelBenchmarks[key]
-  speedup = sData.mean / pData.mean if pData.mean != 0. else 0.
-  if speedup > maxThreads * .9:
-    flag = "    "
-  elif speedup > maxThreads * .75:
-    flag = "!   "
-  elif speedup > maxThreads * .5:
-    flag = "!!  "
-  elif speedup > maxThreads * .25:
-    flag = "!!! "
-  else:
-    flag = "!!!!"
-  print("| %7.3f | %4s | %08.6f +- %08.6f | %08.6f +- %08.6f | %s (%s) |"%(
-          speedup, flag, sData.mean, sData.stdDev, pData.mean, pData.stdDev, key.name, key.type))
--- a/Utilities/Scripts/benchSummary.py
+++ b/Utilities/Scripts/benchSummary.py
@ -1,111 +0,0 @@
-#!/usr/bin/env python
-#
-# Prints a concise summary of a benchmark output as a TSV blob.
-#
-# Example usage:
-#
-# $ BenchmarkXXX_DEVICE > bench.out
-# $ benchSummary.py bench.out
-#
-# Options SortByType, SortByName, or SortByMean may be passed after the
-# filename to sort the output by the indicated quantity. If no sort option
-# is provided, the output order matches the input. If multiple options are
-# specified, the list will be sorted repeatedly in the order requested.
-
-import re
-import sys
-
-assert(len(sys.argv) >= 2)
-
-# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
-typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
-
-# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
-nameParser = re.compile("Benchmark '([^-]+)' results:")
-
-# Parses "mean = 0.0125s" --> 0.0125
-meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
-
-# Parses "std dev = 0.0125s" --> 0.0125
-stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
-
-filename = sys.argv[1]
-benchFile = open(filename, 'r')
-
-sortOpt = None
-if len(sys.argv) > 2:
-  sortOpt = sys.argv[2:]
-
-class BenchKey:
-  def __init__(self, name_, type_):
-    self.name = name_
-    self.type = type_
-
-  def __eq__(self, other):
-    return self.name == other.name and self.type == other.type
-
-  def __lt__(self, other):
-    if self.name < other.name: return True
-    elif self.name > other.name: return False
-    else: return self.type < other.type
-
-  def __hash__(self):
-    return (self.name + self.type).__hash__()
-
-class BenchData:
-  def __init__(self, mean_, stdDev_):
-    self.mean = mean_
-    self.stdDev = stdDev_
-
-def parseFile(f, benchmarks):
-  type = ""
-  bench = ""
-  mean = -1.
-  stdDev = -1.
-  for line in f:
-    typeRes = typeParser.match(line)
-    if typeRes:
-      type = typeRes.group(1)
-      continue
-
-    nameRes = nameParser.match(line)
-    if nameRes:
-      name = nameRes.group(1)
-      continue
-
-    meanRes = meanParser.match(line)
-    if meanRes:
-      mean = float(meanRes.group(1))
-      continue
-
-    stdDevRes = stdDevParser.match(line)
-    if stdDevRes:
-      stdDev = float(stdDevRes.group(1))
-
-      # stdDev is always the last parse for a given benchmark, add entry now
-      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
-
-      mean = -1.
-      stdDev = -1.
-
-      continue
-
-benchmarks = {}
-parseFile(benchFile, benchmarks)
-
-# Sort keys by type:
-keys = benchmarks.keys()
-if sortOpt:
-  for opt in sortOpt:
-    if opt.lower() == "sortbytype":
-      keys = sorted(keys, key=lambda k: k.type)
-    elif opt.lower() == "sortbyname":
-      keys = sorted(keys, key=lambda k: k.name)
-    elif opt.lower() == "sortbymean":
-      keys = sorted(keys, key=lambda k: benchmarks[k].mean)
-
-print("# Summary: (%s)"%filename)
-print("%-9s\t%-9s\t%-9s\t%-s"%("Mean", "Stdev", "Stdev%", "Benchmark (type)"))
-for key in keys:
-  data = benchmarks[key]
-  print("%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.mean, data.stdDev, data.stdDev / data.mean * 100., key.name, key.type))
--- a/Utilities/Scripts/benchSummaryWithBaselines.py
+++ b/Utilities/Scripts/benchSummaryWithBaselines.py
@ -1,156 +0,0 @@
-#!/usr/bin/env python
-#
-# Prints a concise summary of a benchmark output as a TSV blob. Benchmarks are
-# expected to have "Baseline" in the name, and a matching benchmark with the
-# same name but Baseline replaced with something else. For example,
-#
-# Baseline benchmark name: "Some benchmark: Baseline, Size=4"
-# Test benchmark name:     "Some benchmark: Blahblah, Size=4"
-#
-# The output will print the baseline, test, and overhead times for the
-# benchmarks.
-#
-# Example usage:
-#
-# $ BenchmarkXXX_DEVICE > bench.out
-# $ benchSummaryWithBaselines.py bench.out
-#
-# Options SortByType, SortByName, SortByOverhead, or SortByRatio
-# (testtime/baseline) may be passed after the filename to sort the output by
-# the indicated quantity. If no sort option is provided, the output order
-# matches the input. If multiple options are specified, the list will be sorted
-# repeatedly in the order requested.
-
-import re
-import sys
-
-assert(len(sys.argv) >= 2)
-
-# Parses "*** vtkm::Float64 ***************" --> vtkm::Float64
-typeParser = re.compile("\\*{3} ([^*]+) \\*{15}")
-
-# Parses "Benchmark 'Benchmark name' results:" --> Benchmark name
-nameParser = re.compile("Benchmark '([^-]+)' results:")
-
-# Parses "mean = 0.0125s" --> 0.0125
-meanParser = re.compile("\\s+mean = ([0-9.Ee+-]+)s")
-
-# Parses "std dev = 0.0125s" --> 0.0125
-stdDevParser = re.compile("\\s+std dev = ([naN0-9.Ee+-]+)s")
-
-# Parses "SomeText Baseline Other Text" --> ("SomeText ", " Other Text")
-baselineParser = re.compile("(.*)Baseline(.*)")
-
-filename = sys.argv[1]
-benchFile = open(filename, 'r')
-
-sortOpt = None
-if len(sys.argv) > 2:
-  sortOpt = sys.argv[2:]
-
-class BenchKey:
-  def __init__(self, name_, type_):
-    self.name = name_
-    self.type = type_
-
-  def __eq__(self, other):
-    return self.name == other.name and self.type == other.type
-
-  def __lt__(self, other):
-    if self.name < other.name: return True
-    elif self.name > other.name: return False
-    else: return self.type < other.type
-
-  def __hash__(self):
-    return (self.name + self.type).__hash__()
-
-class BenchData:
-  def __init__(self, mean_, stdDev_):
-    self.mean = mean_
-    self.stdDev = stdDev_
-
-def parseFile(f, benchmarks):
-  type = ""
-  bench = ""
-  mean = -1.
-  stdDev = -1.
-  for line in f:
-    typeRes = typeParser.match(line)
-    if typeRes:
-      type = typeRes.group(1)
-      continue
-
-    nameRes = nameParser.match(line)
-    if nameRes:
-      name = nameRes.group(1)
-      continue
-
-    meanRes = meanParser.match(line)
-    if meanRes:
-      mean = float(meanRes.group(1))
-      continue
-
-    stdDevRes = stdDevParser.match(line)
-    if stdDevRes:
-      stdDev = float(stdDevRes.group(1))
-
-      # stdDev is always the last parse for a given benchmark, add entry now
-      benchmarks[BenchKey(name, type)] = BenchData(mean, stdDev)
-
-      mean = -1.
-      stdDev = -1.
-
-      continue
-
-class BaselinedBenchData:
-  def __init__(self, baseline, test):
-    self.baseline = baseline.mean
-    self.test = test.mean
-    self.overhead = test.mean - baseline.mean
-
-def findBaselines(benchmarks):
-  result = {}
-
-  for baseKey in benchmarks.keys():
-    # Look for baseline entries
-    baselineRes = baselineParser.match(baseKey.name)
-    if baselineRes:
-      prefix = baselineRes.group(1)
-      suffix = baselineRes.group(2)
-
-      # Find the test entry matching the baseline:
-      for testKey in benchmarks.keys():
-        if baseKey.type != testKey.type: # Need same type
-          continue
-        if baseKey.name == testKey.name: # Skip the base key
-          continue
-        if testKey.name.startswith(prefix) and testKey.name.endswith(suffix):
-          newName = (prefix + suffix).replace(", ,", ",")
-          newKey = BenchKey(newName, testKey.type)
-          newVal = BaselinedBenchData(benchmarks[baseKey], benchmarks[testKey])
-          result[newKey] = newVal
-  return result
-
-benchmarks = {}
-parseFile(benchFile, benchmarks)
-benchmarks = findBaselines(benchmarks)
-
-# Sort keys by type:
-keys = benchmarks.keys()
-if sortOpt:
-  for opt in sortOpt:
-    if opt.lower() == "sortbytype":
-      keys = sorted(keys, key=lambda k: k.type)
-    elif opt.lower() == "sortbyname":
-      keys = sorted(keys, key=lambda k: k.name)
-    elif opt.lower() == "sortbyoverhead":
-      keys = sorted(keys, key=lambda k: benchmarks[k].overhead)
-    elif opt.lower() == "sortbyratio":
-      keys = sorted(keys, key=lambda k: benchmarks[k].overhead / benchmarks[k].baseline)
-
-print("# Summary: (%s)"%filename)
-print("%-9s\t%-9s\t%-9s\t%-9s\t%-s"%("Baseline", "TestTime", "Overhead", "Test/Base", "Benchmark (type)"))
-for key in keys:
-  data = benchmarks[key]
-  print("%9.6f\t%9.6f\t%9.6f\t%9.6f\t%s (%s)"%(data.baseline, data.test,
-        data.overhead, data.test / data.baseline, key.name, key.type))
--- a/Utilities/Scripts/compare-benchmarks.py
+++ b/Utilities/Scripts/compare-benchmarks.py
@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+"""
+compare-benchmarks.py - VTKm + Google Benchmarks compare.py
+"""
+
+import getopt
+import subprocess
+import sys
+import time
+import os
+
+CURRENT_DIR = os.path.dirname(os.path.realpath(__file__))
+COMPARE_PY_PATH = os.path.join(CURRENT_DIR, 'compare.py')
+COMPARE_PY = sys.executable + " " + COMPARE_PY_PATH
+
+class Bench():
+    def __init__(self):
+        self.__cmd = None
+
+    @property
+    def cmd(self):
+        return self.__cmd
+
+    @cmd.setter
+    def cmd(self, c):
+        self.__cmd = c
+
+    def launch(self):
+        output_file = "bench-%d.json" % time.time()
+        cmd_exec = "%s --benchmark_out=%s --benchmark_out_format=json" \
+                % (self.cmd, output_file)
+        print(cmd_exec)
+        subprocess.call(cmd_exec, shell=True)
+        return output_file
+
+def print_help(error_msg = None):
+    if error_msg != None:
+        print(error_msg)
+
+    print("usage: compare-benchmarks <opts>\n" \
+            " --benchmark1='<benchmark1> [arg1] [arg2] ...'"\
+            " [--filter1=<filter1>]\n"\
+            " --benchmark2='<benchmark2> [arg1] [arg2] ...'"\
+            " [--filter2=<filter2>]\n"\
+            " -- [-opt] benchmarks|filters|benchmarksfiltered\n\n" \
+            "compare.py help:")
+
+    subprocess.call(COMPARE_PY, shell=True)
+    sys.exit(0)
+
+# -----------------------------------------------------------------------------
+def main():
+    is_filters = False
+    filter1 = str()
+    filter2 = str()
+    bench1 = Bench()
+    bench2 = Bench()
+
+    options, remainder = getopt.gnu_getopt(sys.argv[1:], '',
+            ['help','benchmark1=', 'benchmark2=', 'filter1=', 'filter2='])
+
+    for opt, arg in options:
+        if opt == "--benchmark1":
+            bench1.cmd = arg
+
+        if opt == "--benchmark2":
+            bench2.cmd = arg
+
+        if opt == "--filter1":
+            filter1 = arg
+
+        if opt == "--filter2":
+            filter2 = arg
+
+        if opt == "--help":
+            print_help()
+
+    if bench1.cmd == None:
+        print_help("ERROR: no benchmarks chosen")
+
+    for arg in remainder:
+        if arg == "filters":
+           is_filters = True
+
+    if is_filters and bench2.cmd != None:
+        print_help("ERROR: filters option can only accept --benchmark1= and --filter1")
+
+    b1_output = bench1.launch()
+    b2_output = bench2.launch() if not is_filters else filter1 + " " + filter2
+
+    cmd = "%s %s %s %s" % (COMPARE_PY, " ".join(remainder), b1_output, b2_output)
+    print(cmd)
+    subprocess.call(cmd, shell=True)
+
+    os.remove(b1_output)
+
+    if not is_filters:
+        os.remove(b2_output)
+
+if  __name__ == '__main__':
+    main()
--- a/Utilities/Scripts/compare.py
+++ b/Utilities/Scripts/compare.py
@ -0,0 +1,408 @@
+#!/usr/bin/env python
+
+import unittest
+"""
+compare.py - versatile benchmark output compare tool
+"""
+
+import argparse
+from argparse import ArgumentParser
+import sys
+import gbench
+from gbench import util, report
+from gbench.util import *
+
+
+def check_inputs(in1, in2, flags):
+    """
+    Perform checking on the user provided inputs and diagnose any abnormalities
+    """
+    in1_kind, in1_err = classify_input_file(in1)
+    in2_kind, in2_err = classify_input_file(in2)
+    output_file = find_benchmark_flag('--benchmark_out=', flags)
+    output_type = find_benchmark_flag('--benchmark_out_format=', flags)
+    if in1_kind == IT_Executable and in2_kind == IT_Executable and output_file:
+        print(("WARNING: '--benchmark_out=%s' will be passed to both "
+               "benchmarks causing it to be overwritten") % output_file)
+    if in1_kind == IT_JSON and in2_kind == IT_JSON and len(flags) > 0:
+        print("WARNING: passing optional flags has no effect since both "
+              "inputs are JSON")
+    if output_type is not None and output_type != 'json':
+        print(("ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
+               " is not supported.") % output_type)
+        sys.exit(1)
+
+
+def create_parser():
+    parser = ArgumentParser(
+        description='versatile benchmark output compare tool')
+
+    parser.add_argument(
+        '-a',
+        '--display_aggregates_only',
+        dest='display_aggregates_only',
+        action="store_true",
+        help="If there are repetitions, by default, we display everything - the"
+             " actual runs, and the aggregates computed. Sometimes, it is "
+             "desirable to only view the aggregates. E.g. when there are a lot "
+             "of repetitions. Do note that only the display is affected. "
+             "Internally, all the actual runs are still used, e.g. for U test.")
+
+    utest = parser.add_argument_group()
+    utest.add_argument(
+        '--no-utest',
+        dest='utest',
+        default=True,
+        action="store_false",
+        help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS))
+    alpha_default = 0.05
+    utest.add_argument(
+        "--alpha",
+        dest='utest_alpha',
+        default=alpha_default,
+        type=float,
+        help=("significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)") %
+        alpha_default)
+
+    subparsers = parser.add_subparsers(
+        help='This tool has multiple modes of operation:',
+        dest='mode')
+
+    parser_a = subparsers.add_parser(
+        'benchmarks',
+        help='The most simple use-case, compare all the output of these two benchmarks')
+    baseline = parser_a.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test_baseline',
+        metavar='test_baseline',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    contender = parser_a.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'test_contender',
+        metavar='test_contender',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    parser_a.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    parser_b = subparsers.add_parser(
+        'filters', help='Compare filter one with the filter two of benchmark')
+    baseline = parser_b.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test',
+        metavar='test',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    baseline.add_argument(
+        'filter_baseline',
+        metavar='filter_baseline',
+        type=str,
+        nargs=1,
+        help='The first filter, that will be used as baseline')
+    contender = parser_b.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'filter_contender',
+        metavar='filter_contender',
+        type=str,
+        nargs=1,
+        help='The second filter, that will be compared against the baseline')
+    parser_b.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    parser_c = subparsers.add_parser(
+        'benchmarksfiltered',
+        help='Compare filter one of first benchmark with filter two of the second benchmark')
+    baseline = parser_c.add_argument_group(
+        'baseline', 'The benchmark baseline')
+    baseline.add_argument(
+        'test_baseline',
+        metavar='test_baseline',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='A benchmark executable or JSON output file')
+    baseline.add_argument(
+        'filter_baseline',
+        metavar='filter_baseline',
+        type=str,
+        nargs=1,
+        help='The first filter, that will be used as baseline')
+    contender = parser_c.add_argument_group(
+        'contender', 'The benchmark that will be compared against the baseline')
+    contender.add_argument(
+        'test_contender',
+        metavar='test_contender',
+        type=argparse.FileType('r'),
+        nargs=1,
+        help='The second benchmark executable or JSON output file, that will be compared against the baseline')
+    contender.add_argument(
+        'filter_contender',
+        metavar='filter_contender',
+        type=str,
+        nargs=1,
+        help='The second filter, that will be compared against the baseline')
+    parser_c.add_argument(
+        'benchmark_options',
+        metavar='benchmark_options',
+        nargs=argparse.REMAINDER,
+        help='Arguments to pass when running benchmark executables')
+
+    return parser
+
+
+def main():
+    # Parse the command line flags
+    parser = create_parser()
+    args, unknown_args = parser.parse_known_args()
+    if args.mode is None:
+        parser.print_help()
+        exit(1)
+    assert not unknown_args
+    benchmark_options = args.benchmark_options
+
+    if args.mode == 'benchmarks':
+        test_baseline = args.test_baseline[0].name
+        test_contender = args.test_contender[0].name
+        filter_baseline = ''
+        filter_contender = ''
+
+        # NOTE: if test_baseline == test_contender, you are analyzing the stdev
+
+        description = 'Comparing %s to %s' % (test_baseline, test_contender)
+    elif args.mode == 'filters':
+        test_baseline = args.test[0].name
+        test_contender = args.test[0].name
+        filter_baseline = args.filter_baseline[0]
+        filter_contender = args.filter_contender[0]
+
+        # NOTE: if filter_baseline == filter_contender, you are analyzing the
+        # stdev
+
+        description = 'Comparing %s to %s (from %s)' % (
+            filter_baseline, filter_contender, args.test[0].name)
+    elif args.mode == 'benchmarksfiltered':
+        test_baseline = args.test_baseline[0].name
+        test_contender = args.test_contender[0].name
+        filter_baseline = args.filter_baseline[0]
+        filter_contender = args.filter_contender[0]
+
+        # NOTE: if test_baseline == test_contender and
+        # filter_baseline == filter_contender, you are analyzing the stdev
+
+        description = 'Comparing %s (from %s) to %s (from %s)' % (
+            filter_baseline, test_baseline, filter_contender, test_contender)
+    else:
+        # should never happen
+        print("Unrecognized mode of operation: '%s'" % args.mode)
+        parser.print_help()
+        exit(1)
+
+    check_inputs(test_baseline, test_contender, benchmark_options)
+
+    if args.display_aggregates_only:
+        benchmark_options += ['--benchmark_display_aggregates_only=true']
+
+    options_baseline = []
+    options_contender = []
+
+    if filter_baseline and filter_contender:
+        options_baseline = ['--benchmark_filter=%s' % filter_baseline]
+        options_contender = ['--benchmark_filter=%s' % filter_contender]
+
+    # Run the benchmarks and report the results
+    json1 = json1_orig = gbench.util.run_or_load_benchmark(
+        test_baseline, benchmark_options + options_baseline)
+    json2 = json2_orig = gbench.util.run_or_load_benchmark(
+        test_contender, benchmark_options + options_contender)
+
+    # Now, filter the benchmarks so that the difference report can work
+    if filter_baseline and filter_contender:
+        replacement = '[%s vs. %s]' % (filter_baseline, filter_contender)
+        json1 = gbench.report.filter_benchmark(
+            json1_orig, filter_baseline, replacement)
+        json2 = gbench.report.filter_benchmark(
+            json2_orig, filter_contender, replacement)
+
+    # Diff and output
+    output_lines = gbench.report.generate_difference_report(
+        json1, json2, args.display_aggregates_only,
+        args.utest, args.utest_alpha)
+    print(description)
+    for ln in output_lines:
+        print(ln)
+
+
+class TestParser(unittest.TestCase):
+    def setUp(self):
+        self.parser = create_parser()
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'gbench',
+            'Inputs')
+        self.testInput0 = os.path.join(testInputs, 'test1_run1.json')
+        self.testInput1 = os.path.join(testInputs, 'test1_run2.json')
+
+    def test_benchmarks_basic(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_basic_without_utest(self):
+        parsed = self.parser.parse_args(
+            ['--no-utest', 'benchmarks', self.testInput0, self.testInput1])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertFalse(parsed.utest)
+        self.assertEqual(parsed.utest_alpha, 0.05)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_basic_display_aggregates_only(self):
+        parsed = self.parser.parse_args(
+            ['-a', 'benchmarks', self.testInput0, self.testInput1])
+        self.assertTrue(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_basic_with_utest_alpha(self):
+        parsed = self.parser.parse_args(
+            ['--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.utest_alpha, 0.314)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_basic_without_utest_with_utest_alpha(self):
+        parsed = self.parser.parse_args(
+            ['--no-utest', '--alpha=0.314', 'benchmarks', self.testInput0, self.testInput1])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertFalse(parsed.utest)
+        self.assertEqual(parsed.utest_alpha, 0.314)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarks_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1, 'd'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.benchmark_options, ['d'])
+
+    def test_benchmarks_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['benchmarks', self.testInput0, self.testInput1, '--', 'e'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarks')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.benchmark_options, ['e'])
+
+    def test_filters_basic(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_filters_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd', 'e'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.benchmark_options, ['e'])
+
+    def test_filters_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['filters', self.testInput0, 'c', 'd', '--', 'f'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'filters')
+        self.assertEqual(parsed.test[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.filter_contender[0], 'd')
+        self.assertEqual(parsed.benchmark_options, ['f'])
+
+    def test_benchmarksfiltered_basic(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertFalse(parsed.benchmark_options)
+
+    def test_benchmarksfiltered_with_remainder(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', 'f'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.benchmark_options[0], 'f')
+
+    def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
+        parsed = self.parser.parse_args(
+            ['benchmarksfiltered', self.testInput0, 'c', self.testInput1, 'e', '--', 'g'])
+        self.assertFalse(parsed.display_aggregates_only)
+        self.assertTrue(parsed.utest)
+        self.assertEqual(parsed.mode, 'benchmarksfiltered')
+        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
+        self.assertEqual(parsed.filter_baseline[0], 'c')
+        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
+        self.assertEqual(parsed.filter_contender[0], 'e')
+        self.assertEqual(parsed.benchmark_options[0], 'g')
+
+
+if __name__ == '__main__':
+    # unittest.main()
+    main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/Utilities/Scripts/gbench/init.py
+++ b/Utilities/Scripts/gbench/init.py
@ -0,0 +1,8 @@
+"""Google Benchmark tooling"""
+
+__author__ = 'Eric Fiselier'
+__email__ = 'eric@efcs.ca'
+__versioninfo__ = (0, 5, 0)
+__version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
+
+__all__ = []
--- a/Utilities/Scripts/gbench/report.py
+++ b/Utilities/Scripts/gbench/report.py
@ -0,0 +1,541 @@
+import unittest
+"""report.py - Utilities for reporting statistics about benchmark results
+"""
+import os
+import re
+import copy
+
+from scipy.stats import mannwhitneyu
+
+
+class BenchmarkColor(object):
+    def __init__(self, name, code):
+        self.name = name
+        self.code = code
+
+    def __repr__(self):
+        return '%s%r' % (self.__class__.__name__,
+                         (self.name, self.code))
+
+    def __format__(self, format):
+        return self.code
+
+
+# Benchmark Colors Enumeration
+BC_NONE = BenchmarkColor('NONE', '')
+BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
+BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
+BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
+BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
+BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
+BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
+BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
+BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
+BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
+BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
+BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
+
+UTEST_MIN_REPETITIONS = 2
+UTEST_OPTIMAL_REPETITIONS = 9  # Lowest reasonable number, More is better.
+UTEST_COL_NAME = "_pvalue"
+
+
+def color_format(use_color, fmt_str, *args, **kwargs):
+    """
+    Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
+    'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
+    is False then all color codes in 'args' and 'kwargs' are replaced with
+    the empty string.
+    """
+    assert use_color is True or use_color is False
+    if not use_color:
+        args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                for arg in args]
+        kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
+                  for key, arg in kwargs.items()}
+    return fmt_str.format(*args, **kwargs)
+
+
+def find_longest_name(benchmark_list):
+    """
+    Return the length of the longest benchmark name in a given list of
+    benchmark JSON objects
+    """
+    longest_name = 1
+    for bc in benchmark_list:
+        if len(bc['name']) > longest_name:
+            longest_name = len(bc['name'])
+    return longest_name
+
+
+def calculate_change(old_val, new_val):
+    """
+    Return a float representing the decimal change between old_val and new_val.
+    """
+    if old_val == 0 and new_val == 0:
+        return 0.0
+    if old_val == 0:
+        return float(new_val - old_val) / (float(old_val + new_val) / 2)
+    return float(new_val - old_val) / abs(old_val)
+
+
+def filter_benchmark(json_orig, family, replacement=""):
+    """
+    Apply a filter to the json, and only leave the 'family' of benchmarks.
+    """
+    regex = re.compile(family)
+    filtered = {}
+    filtered['benchmarks'] = []
+    for be in json_orig['benchmarks']:
+        if not regex.search(be['name']):
+            continue
+        filteredbench = copy.deepcopy(be)  # Do NOT modify the old name!
+        filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
+        filtered['benchmarks'].append(filteredbench)
+    return filtered
+
+
+def get_unique_benchmark_names(json):
+    """
+    While *keeping* the order, give all the unique 'names' used for benchmarks.
+    """
+    seen = set()
+    uniqued = [x['name'] for x in json['benchmarks']
+               if x['name'] not in seen and
+               (seen.add(x['name']) or True)]
+    return uniqued
+
+
+def intersect(list1, list2):
+    """
+    Given two lists, get a new list consisting of the elements only contained
+    in *both of the input lists*, while preserving the ordering.
+    """
+    return [x for x in list1 if x in list2]
+
+
+def is_potentially_comparable_benchmark(x):
+    return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
+
+
+def partition_benchmarks(json1, json2):
+    """
+    While preserving the ordering, find benchmarks with the same names in
+    both of the inputs, and group them.
+    (i.e. partition/filter into groups with common name)
+    """
+    json1_unique_names = get_unique_benchmark_names(json1)
+    json2_unique_names = get_unique_benchmark_names(json2)
+    names = intersect(json1_unique_names, json2_unique_names)
+    partitions = []
+    for name in names:
+        time_unit = None
+        # Pick the time unit from the first entry of the lhs benchmark.
+        # We should be careful not to crash with unexpected input.
+        for x in json1['benchmarks']:
+            if (x['name'] == name and is_potentially_comparable_benchmark(x)):
+                time_unit = x['time_unit']
+                break
+        if time_unit is None:
+            continue
+        # Filter by name and time unit.
+        # All the repetitions are assumed to be comparable.
+        lhs = [x for x in json1['benchmarks'] if x['name'] == name and
+               x['time_unit'] == time_unit]
+        rhs = [x for x in json2['benchmarks'] if x['name'] == name and
+               x['time_unit'] == time_unit]
+        partitions.append([lhs, rhs])
+    return partitions
+
+
+def extract_field(partition, field_name):
+    # The count of elements may be different. We want *all* of them.
+    lhs = [x[field_name] for x in partition[0]]
+    rhs = [x[field_name] for x in partition[1]]
+    return [lhs, rhs]
+
+def calc_utest(timings_cpu, timings_time):
+    min_rep_cnt = min(len(timings_time[0]),
+                      len(timings_time[1]),
+                      len(timings_cpu[0]),
+                      len(timings_cpu[1]))
+
+    # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
+    if min_rep_cnt < UTEST_MIN_REPETITIONS:
+        return False, None, None
+
+    time_pvalue = mannwhitneyu(
+        timings_time[0], timings_time[1], alternative='two-sided').pvalue
+    cpu_pvalue = mannwhitneyu(
+        timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
+
+    return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
+
+def print_utest(partition, utest_alpha, first_col_width, use_color=True):
+    def get_utest_color(pval):
+        return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
+
+    timings_time = extract_field(partition, 'real_time')
+    timings_cpu = extract_field(partition, 'cpu_time')
+    have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
+
+    # Check if we failed miserably with minimum required repetitions for utest
+    if not have_optimal_repetitions and cpu_pvalue is None and time_pvalue is None:
+        return []
+
+    dsc = "U Test, Repetitions: {} vs {}".format(
+        len(timings_cpu[0]), len(timings_cpu[1]))
+    dsc_color = BC_OKGREEN
+
+    # We still got some results to show but issue a warning about it.
+    if not have_optimal_repetitions:
+        dsc_color = BC_WARNING
+        dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
+            UTEST_OPTIMAL_REPETITIONS)
+
+    special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{}      {}"
+
+    last_name = partition[0][0]['name']
+    return [color_format(use_color,
+                         special_str,
+                         BC_HEADER,
+                         "{}{}".format(last_name, UTEST_COL_NAME),
+                         first_col_width,
+                         get_utest_color(time_pvalue), time_pvalue,
+                         get_utest_color(cpu_pvalue), cpu_pvalue,
+                         dsc_color, dsc,
+                         endc=BC_ENDC)]
+
+
+def generate_difference_report(
+        json1,
+        json2,
+        display_aggregates_only=False,
+        utest=False,
+        utest_alpha=0.05,
+        use_color=True):
+    """
+    Calculate and report the difference between each test of two benchmarks
+    runs specified as 'json1' and 'json2'.
+    """
+    assert utest is True or utest is False
+    first_col_width = find_longest_name(json1['benchmarks'])
+
+    def find_test(name):
+        for b in json2['benchmarks']:
+            if b['name'] == name:
+                return b
+        return None
+
+    first_col_width = max(
+        first_col_width,
+        len('Benchmark'))
+    first_col_width += len(UTEST_COL_NAME)
+    first_line = "{:<{}s}Time             CPU      Time Old      Time New       CPU Old       CPU New".format(
+        'Benchmark', 12 + first_col_width)
+    output_strs = [first_line, '-' * len(first_line)]
+
+    partitions = partition_benchmarks(json1, json2)
+    for partition in partitions:
+        # Careful, we may have different repetition count.
+        for i in range(min(len(partition[0]), len(partition[1]))):
+            bn = partition[0][i]
+            other_bench = partition[1][i]
+
+            # *If* we were asked to only display aggregates,
+            # and if it is non-aggregate, then skip it.
+            if display_aggregates_only and 'run_type' in bn and 'run_type' in other_bench:
+                assert bn['run_type'] == other_bench['run_type']
+                if bn['run_type'] != 'aggregate':
+                    continue
+
+            fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
+
+            def get_color(res):
+                if res > 0.05:
+                    return BC_FAIL
+                elif res > -0.07:
+                    return BC_WHITE
+                else:
+                    return BC_CYAN
+
+            tres = calculate_change(bn['real_time'], other_bench['real_time'])
+            cpures = calculate_change(bn['cpu_time'], other_bench['cpu_time'])
+            output_strs += [color_format(use_color,
+                                         fmt_str,
+                                         BC_HEADER,
+                                         bn['name'],
+                                         first_col_width,
+                                         get_color(tres),
+                                         tres,
+                                         get_color(cpures),
+                                         cpures,
+                                         bn['real_time'],
+                                         other_bench['real_time'],
+                                         bn['cpu_time'],
+                                         other_bench['cpu_time'],
+                                         endc=BC_ENDC)]
+
+        # After processing the whole partition, if requested, do the U test.
+        if utest:
+            output_strs += print_utest(partition,
+                                       utest_alpha=utest_alpha,
+                                       first_col_width=first_col_width,
+                                       use_color=use_color)
+
+    return output_strs
+
+
+###############################################################################
+# Unit tests
+
+
+class TestGetUniqueBenchmarkNames(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput = os.path.join(testInputs, 'test3_run0.json')
+        with open(testOutput, 'r') as f:
+            json = json.load(f)
+        return json
+
+    def test_basic(self):
+        expect_lines = [
+            'BM_One',
+            'BM_Two',
+            'short',  # These two are not sorted
+            'medium',  # These two are not sorted
+        ]
+        json = self.load_results()
+        output_lines = get_unique_benchmark_names(json)
+        print("\n")
+        print("\n".join(output_lines))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            self.assertEqual(expect_lines[i], output_lines[i])
+
+
+class TestReportDifference(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test1_run1.json')
+        testOutput2 = os.path.join(testInputs, 'test1_run2.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_basic(self):
+        expect_lines = [
+            ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
+            ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
+            ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
+            ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
+            ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
+            ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
+            ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
+            ['BM_100xSlower', '+99.0000', '+99.0000',
+                '100', '10000', '100', '10000'],
+            ['BM_100xFaster', '-0.9900', '-0.9900',
+                '10000', '100', '10000', '100'],
+            ['BM_10PercentCPUToTime', '+0.1000',
+                '-0.1000', '100', '110', '100', '90'],
+            ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
+            ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
+        ]
+        json1, json2 = self.load_results()
+        output_lines_with_header = generate_difference_report(
+            json1, json2, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 7)
+            self.assertEqual(expect_lines[i], parts)
+
+
+class TestReportDifferenceBetweenFamilies(unittest.TestCase):
+    def load_result(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput = os.path.join(testInputs, 'test2_run.json')
+        with open(testOutput, 'r') as f:
+            json = json.load(f)
+        return json
+
+    def test_basic(self):
+        expect_lines = [
+            ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
+            ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
+            ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
+            ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
+        ]
+        json = self.load_result()
+        json1 = filter_benchmark(json, "BM_Z.ro", ".")
+        json2 = filter_benchmark(json, "BM_O.e", ".")
+        output_lines_with_header = generate_difference_report(
+            json1, json2, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(len(parts), 7)
+            self.assertEqual(expect_lines[i], parts)
+
+
+class TestReportDifferenceWithUTest(unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test3_run0.json')
+        testOutput2 = os.path.join(testInputs, 'test3_run1.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_utest(self):
+        expect_lines = []
+        expect_lines = [
+            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
+            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
+            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
+            ['BM_Two_pvalue',
+             '0.6985',
+             '0.6985',
+             'U',
+             'Test,',
+             'Repetitions:',
+             '2',
+             'vs',
+             '2.',
+             'WARNING:',
+             'Results',
+             'unreliable!',
+             '9+',
+             'repetitions',
+             'recommended.'],
+            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
+            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
+            ['short_pvalue',
+             '0.7671',
+             '0.1489',
+             'U',
+             'Test,',
+             'Repetitions:',
+             '2',
+             'vs',
+             '3.',
+             'WARNING:',
+             'Results',
+             'unreliable!',
+             '9+',
+             'repetitions',
+             'recommended.'],
+            ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
+        ]
+        json1, json2 = self.load_results()
+        output_lines_with_header = generate_difference_report(
+            json1, json2, utest=True, utest_alpha=0.05, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(expect_lines[i], parts)
+
+
+class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
+        unittest.TestCase):
+    def load_results(self):
+        import json
+        testInputs = os.path.join(
+            os.path.dirname(
+                os.path.realpath(__file__)),
+            'Inputs')
+        testOutput1 = os.path.join(testInputs, 'test3_run0.json')
+        testOutput2 = os.path.join(testInputs, 'test3_run1.json')
+        with open(testOutput1, 'r') as f:
+            json1 = json.load(f)
+        with open(testOutput2, 'r') as f:
+            json2 = json.load(f)
+        return json1, json2
+
+    def test_utest(self):
+        expect_lines = []
+        expect_lines = [
+            ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
+            ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
+            ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
+            ['BM_Two_pvalue',
+             '0.6985',
+             '0.6985',
+             'U',
+             'Test,',
+             'Repetitions:',
+             '2',
+             'vs',
+             '2.',
+             'WARNING:',
+             'Results',
+             'unreliable!',
+             '9+',
+             'repetitions',
+             'recommended.'],
+            ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
+            ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
+            ['short_pvalue',
+             '0.7671',
+             '0.1489',
+             'U',
+             'Test,',
+             'Repetitions:',
+             '2',
+             'vs',
+             '3.',
+             'WARNING:',
+             'Results',
+             'unreliable!',
+             '9+',
+             'repetitions',
+             'recommended.'],
+        ]
+        json1, json2 = self.load_results()
+        output_lines_with_header = generate_difference_report(
+            json1, json2, display_aggregates_only=True,
+            utest=True, utest_alpha=0.05, use_color=False)
+        output_lines = output_lines_with_header[2:]
+        print("\n")
+        print("\n".join(output_lines_with_header))
+        self.assertEqual(len(output_lines), len(expect_lines))
+        for i in range(0, len(output_lines)):
+            parts = [x for x in output_lines[i].split(' ') if x]
+            self.assertEqual(expect_lines[i], parts)
+
+
+if __name__ == '__main__':
+    unittest.main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/Utilities/Scripts/gbench/util.py
+++ b/Utilities/Scripts/gbench/util.py
@ -0,0 +1,164 @@
+"""util.py - General utilities for running, loading, and processing benchmarks
+"""
+import json
+import os
+import tempfile
+import subprocess
+import sys
+
+# Input file type enumeration
+IT_Invalid = 0
+IT_JSON = 1
+IT_Executable = 2
+
+_num_magic_bytes = 2 if sys.platform.startswith('win') else 4
+
+
+def is_executable_file(filename):
+    """
+    Return 'True' if 'filename' names a valid file which is likely
+    an executable. A file is considered an executable if it starts with the
+    magic bytes for a EXE, Mach O, or ELF file.
+    """
+    if not os.path.isfile(filename):
+        return False
+    with open(filename, mode='rb') as f:
+        magic_bytes = f.read(_num_magic_bytes)
+    if sys.platform == 'darwin':
+        return magic_bytes in [
+            b'\xfe\xed\xfa\xce',  # MH_MAGIC
+            b'\xce\xfa\xed\xfe',  # MH_CIGAM
+            b'\xfe\xed\xfa\xcf',  # MH_MAGIC_64
+            b'\xcf\xfa\xed\xfe',  # MH_CIGAM_64
+            b'\xca\xfe\xba\xbe',  # FAT_MAGIC
+            b'\xbe\xba\xfe\xca'   # FAT_CIGAM
+        ]
+    elif sys.platform.startswith('win'):
+        return magic_bytes == b'MZ'
+    else:
+        return magic_bytes == b'\x7FELF'
+
+
+def is_json_file(filename):
+    """
+    Returns 'True' if 'filename' names a valid JSON output file.
+    'False' otherwise.
+    """
+    try:
+        with open(filename, 'r') as f:
+            json.load(f)
+        return True
+    except BaseException:
+        pass
+    return False
+
+
+def classify_input_file(filename):
+    """
+    Return a tuple (type, msg) where 'type' specifies the classified type
+    of 'filename'. If 'type' is 'IT_Invalid' then 'msg' is a human readable
+    string represeting the error.
+    """
+    ftype = IT_Invalid
+    err_msg = None
+    if not os.path.exists(filename):
+        err_msg = "'%s' does not exist" % filename
+    elif not os.path.isfile(filename):
+        err_msg = "'%s' does not name a file" % filename
+    elif is_executable_file(filename):
+        ftype = IT_Executable
+    elif is_json_file(filename):
+        ftype = IT_JSON
+    else:
+        err_msg = "'%s' does not name a valid benchmark executable or JSON file" % filename
+    return ftype, err_msg
+
+
+def check_input_file(filename):
+    """
+    Classify the file named by 'filename' and return the classification.
+    If the file is classified as 'IT_Invalid' print an error message and exit
+    the program.
+    """
+    ftype, msg = classify_input_file(filename)
+    if ftype == IT_Invalid:
+        print("Invalid input file: %s" % msg)
+        sys.exit(1)
+    return ftype
+
+
+def find_benchmark_flag(prefix, benchmark_flags):
+    """
+    Search the specified list of flags for a flag matching `<prefix><arg>` and
+    if it is found return the arg it specifies. If specified more than once the
+    last value is returned. If the flag is not found None is returned.
+    """
+    assert prefix.startswith('--') and prefix.endswith('=')
+    result = None
+    for f in benchmark_flags:
+        if f.startswith(prefix):
+            result = f[len(prefix):]
+    return result
+
+
+def remove_benchmark_flags(prefix, benchmark_flags):
+    """
+    Return a new list containing the specified benchmark_flags except those
+    with the specified prefix.
+    """
+    assert prefix.startswith('--') and prefix.endswith('=')
+    return [f for f in benchmark_flags if not f.startswith(prefix)]
+
+
+def load_benchmark_results(fname):
+    """
+    Read benchmark output from a file and return the JSON object.
+    REQUIRES: 'fname' names a file containing JSON benchmark output.
+    """
+    with open(fname, 'r') as f:
+        return json.load(f)
+
+
+def run_benchmark(exe_name, benchmark_flags):
+    """
+    Run a benchmark specified by 'exe_name' with the specified
+    'benchmark_flags'. The benchmark is run directly as a subprocess to preserve
+    real time console output.
+    RETURNS: A JSON object representing the benchmark output
+    """
+    output_name = find_benchmark_flag('--benchmark_out=',
+                                      benchmark_flags)
+    is_temp_output = False
+    if output_name is None:
+        is_temp_output = True
+        thandle, output_name = tempfile.mkstemp()
+        os.close(thandle)
+        benchmark_flags = list(benchmark_flags) + \
+            ['--benchmark_out=%s' % output_name]
+
+    cmd = [exe_name] + benchmark_flags
+    print("RUNNING: %s" % ' '.join(cmd))
+    exitCode = subprocess.call(cmd)
+    if exitCode != 0:
+        print('TEST FAILED...')
+        sys.exit(exitCode)
+    json_res = load_benchmark_results(output_name)
+    if is_temp_output:
+        os.unlink(output_name)
+    return json_res
+
+
+def run_or_load_benchmark(filename, benchmark_flags):
+    """
+    Get the results for a specified benchmark. If 'filename' specifies
+    an executable benchmark then the results are generated by running the
+    benchmark. Otherwise 'filename' must name a valid JSON output file,
+    which is loaded and the result returned.
+    """
+    ftype = check_input_file(filename)
+    if ftype == IT_JSON:
+        return load_benchmark_results(filename)
+    elif ftype == IT_Executable:
+        return run_benchmark(filename, benchmark_flags)
+    else:
+        assert False  # This branch is unreachable
--- a/Utilities/Scripts/strip_asm.py
+++ b/Utilities/Scripts/strip_asm.py
@ -0,0 +1,151 @@
+#!/usr/bin/env python
+
+"""
+strip_asm.py - Cleanup ASM output for the specified file
+"""
+
+from argparse import ArgumentParser
+import sys
+import os
+import re
+
+def find_used_labels(asm):
+    found = set()
+    label_re = re.compile("\s*j[a-z]+\s+\.L([a-zA-Z0-9][a-zA-Z0-9_]*)")
+    for l in asm.splitlines():
+        m = label_re.match(l)
+        if m:
+            found.add('.L%s' % m.group(1))
+    return found
+
+
+def normalize_labels(asm):
+    decls = set()
+    label_decl = re.compile("^[.]{0,1}L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+    for l in asm.splitlines():
+        m = label_decl.match(l)
+        if m:
+            decls.add(m.group(0))
+    if len(decls) == 0:
+        return asm
+    needs_dot = next(iter(decls))[0] != '.'
+    if not needs_dot:
+        return asm
+    for ld in decls:
+        asm = re.sub("(^|\s+)" + ld + "(?=:|\s)", '\\1.' + ld, asm)
+    return asm
+
+
+def transform_labels(asm):
+    asm = normalize_labels(asm)
+    used_decls = find_used_labels(asm)
+    new_asm = ''
+    label_decl = re.compile("^\.L([a-zA-Z0-9][a-zA-Z0-9_]*)(?=:)")
+    for l in asm.splitlines():
+        m = label_decl.match(l)
+        if not m or m.group(0) in used_decls:
+            new_asm += l
+            new_asm += '\n'
+    return new_asm
+
+
+def is_identifier(tk):
+    if len(tk) == 0:
+        return False
+    first = tk[0]
+    if not first.isalpha() and first != '_':
+        return False
+    for i in range(1, len(tk)):
+        c = tk[i]
+        if not c.isalnum() and c != '_':
+            return False
+    return True
+
+def process_identifiers(l):
+    """
+    process_identifiers - process all identifiers and modify them to have
+    consistent names across all platforms; specifically across ELF and MachO.
+    For example, MachO inserts an additional understore at the beginning of
+    names. This function removes that.
+    """
+    parts = re.split(r'([a-zA-Z0-9_]+)', l)
+    new_line = ''
+    for tk in parts:
+        if is_identifier(tk):
+            if tk.startswith('__Z'):
+                tk = tk[1:]
+            elif tk.startswith('_') and len(tk) > 1 and \
+                    tk[1].isalpha() and tk[1] != 'Z':
+                tk = tk[1:]
+        new_line += tk
+    return new_line
+
+
+def process_asm(asm):
+    """
+    Strip the ASM of unwanted directives and lines
+    """
+    new_contents = ''
+    asm = transform_labels(asm)
+
+    # TODO: Add more things we want to remove
+    discard_regexes = [
+        re.compile("\s+\..*$"), # directive
+        re.compile("\s*#(NO_APP|APP)$"), #inline ASM
+        re.compile("\s*#.*$"), # comment line
+        re.compile("\s*\.globa?l\s*([.a-zA-Z_][a-zA-Z0-9$_.]*)"), #global directive
+        re.compile("\s*\.(string|asciz|ascii|[1248]?byte|short|word|long|quad|value|zero)"),
+    ]
+    keep_regexes = [
+
+    ]
+    fn_label_def = re.compile("^[a-zA-Z_][a-zA-Z0-9_.]*:")
+    for l in asm.splitlines():
+        # Remove Mach-O attribute
+        l = l.replace('@GOTPCREL', '')
+        add_line = True
+        for reg in discard_regexes:
+            if reg.match(l) is not None:
+                add_line = False
+                break
+        for reg in keep_regexes:
+            if reg.match(l) is not None:
+                add_line = True
+                break
+        if add_line:
+            if fn_label_def.match(l) and len(new_contents) != 0:
+                new_contents += '\n'
+            l = process_identifiers(l)
+            new_contents += l
+            new_contents += '\n'
+    return new_contents
+
+def main():
+    parser = ArgumentParser(
+        description='generate a stripped assembly file')
+    parser.add_argument(
+        'input', metavar='input', type=str, nargs=1,
+        help='An input assembly file')
+    parser.add_argument(
+        'out', metavar='output', type=str, nargs=1,
+        help='The output file')
+    args, unknown_args = parser.parse_known_args()
+    input = args.input[0]
+    output = args.out[0]
+    if not os.path.isfile(input):
+        print(("ERROR: input file '%s' does not exist") % input)
+        sys.exit(1)
+    contents = None
+    with open(input, 'r') as f:
+        contents = f.read()
+    new_contents = process_asm(contents)
+    with open(output, 'w') as f:
+        f.write(new_contents)
+
+
+if __name__ == '__main__':
+    main()
+
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
+# kate: indent-mode python; remove-trailing-spaces modified;
--- a/Utilities/SetupForDevelopment.sh
+++ b/Utilities/SetupForDevelopment.sh
@ -3,6 +3,7 @@
 cd "${BASH_SOURCE%/*}/.." &&
 Utilities/GitSetup/setup-user && echo &&
 Utilities/GitSetup/setup-hooks && echo &&
+Utilities/GitSetup/setup-lfs && echo &&
 (Utilities/GitSetup/setup-upstream ||
 echo 'Failed to setup origin.  Run this again to retry.') && echo &&
 (Utilities/GitSetup/setup-gitlab ||
@ -27,3 +28,6 @@ echo "Set up git gitlab-push" &&
 git config alias.gitlab-sync '!bash Utilities/GitSetup/git-gitlab-sync' &&
 echo "Set up git gitlab-sync" &&
 true
+
+SetupForDevelopment=1
+git config hooks.SetupForDevelopment ${SetupForDevelopment_VERSION}
--- a/Utilities/hooks/pre-commit
+++ b/Utilities/hooks/pre-commit
@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+# Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
+# file Copyright.txt or https://cmake.org/licensing for details.
+
+die() {
+  echo 'pre-commit hook failure' 1>&2
+  echo '-----------------------' 1>&2
+  echo '' 1>&2
+  echo "$@" 1>&2
+  exit 1
+}
+
+#-----------------------------------------------------------------------------
+
+# Check that development setup is up-to-date.
+lastSetupForDevelopment=$(git config --get hooks.SetupForDevelopment || echo 0)
+eval $(grep '^SetupForDevelopment_VERSION=' "${BASH_SOURCE%/*}/../SetupForDevelopment.sh")
+test -n "$SetupForDevelopment_VERSION" || SetupForDevelopment_VERSION=0
+if test $lastSetupForDevelopment -lt $SetupForDevelopment_VERSION; then
+  die 'Developer setup in this work tree is out of date.  Please re-run
+
+  Utilities/SetupForDevelopment.sh
+'
+fi
--- a/Utilities/hooks/pre-push
+++ b/Utilities/hooks/pre-push
@ -0,0 +1,3 @@
+#!/bin/sh
+command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path.\n"; exit 2; }
+git lfs pre-push "$@"
--- a/Utilities/update-gitsetup.bash
+++ b/Utilities/update-gitsetup.bash
@ -18,6 +18,7 @@ README
 git-gitlab-push
 setup-gitlab
 setup-hooks
+setup-lfs
 setup-ssh
 setup-upstream
 setup-user
--- a/benchmarking/BenchmarkArrayTransfer.cxx
+++ b/benchmarking/BenchmarkArrayTransfer.cxx
@ -473,12 +473,25 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchExecToContReadWrite,

 int main(int argc, char* argv[])
 {
-  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
-  Config = vtkm::cont::Initialize(argc, argv, opts);
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  // Initialize command line args
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }

  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkAtomicArray.cxx
+++ b/benchmarking/BenchmarkAtomicArray.cxx
@ -506,11 +506,24 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(
 int main(int argc, char* argv[])
 {
  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
-  Config = vtkm::cont::Initialize(argc, argv, opts);
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }

  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkCopySpeeds.cxx
+++ b/benchmarking/BenchmarkCopySpeeds.cxx
@ -95,11 +95,23 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(CopySpeed,
 int main(int argc, char* argv[])
 {
  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
-  Config = vtkm::cont::Initialize(argc, argv, opts);
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  // Setup device:
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }

 // Handle NumThreads command-line arg:
 #ifdef VTKM_ENABLE_TBB
@ -126,5 +138,5 @@ int main(int argc, char* argv[])
 #endif // TBB

  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkDeviceAdapter.cxx
+++ b/benchmarking/BenchmarkDeviceAdapter.cxx
@ -39,6 +39,40 @@
 namespace
 {

+// Parametrize the input size samples for most of the benchmarks
+//
+// Define at compile time:
+//
+//   Being VTKm_BENCHS_RANGE_LOWER_BOUNDARY b0 and,
+//   being VTKm_BENCHS_RANGE_UPPER_BOUNDARY b1
+//
+// This will create the following sample sizes b0, b0*2^3, b0*2^6, ..., b1.
+//
+// Notice that setting up VTKm_BENCHS_RANGE_LOWER_BOUNDARY / VTKm_BENCHS_RANGE_UPPER_BOUNDARY
+// will affect both ShortRange and FullRange.
+//
+#ifndef VTKm_BENCHS_RANGE_LOWER_BOUNDARY
+#define FULL_RANGE_LOWER_BOUNDARY (1 << 12)  //  4 KiB
+#define SHORT_RANGE_LOWER_BOUNDARY (1 << 15) // 32 KiB
+
+#else
+#define FULL_RANGE_LOWER_BOUNDARY (VTKm_BENCHS_RANGE_LOWER_BOUNDARY)
+#define SHORT_RANGE_LOWER_BOUNDARY (VTKm_BENCHS_RANGE_LOWER_BOUNDARY)
+
+#endif
+
+#ifndef VTKm_BENCHS_RANGE_UPPER_BOUNDARY
+#define FULL_RANGE_UPPER_BOUNDARY (1 << 27)             // 128 MiB
+#define SHORT_RANGE_UPPER_BOUNDARY (1 << 27)            // 128 MiB
+#define BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING (1 << 26) // 64 MiB
+
+#else
+#define FULL_RANGE_UPPER_BOUNDARY (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
+#define SHORT_RANGE_UPPER_BOUNDARY (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
+#define BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING (VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
+
+#endif
+
 // Default sampling rate is x8 and always includes min/max,
 // so this will generate 7 samples at:
 // 1: 4 KiB
@ -47,15 +81,17 @@ namespace
 // 4: 2 MiB
 // 5: 16 MiB
 // 6: 128 MiB
-static const std::pair<int64_t, int64_t> FullRange{ 1 << 12, 1 << 27 }; // 4KiB, 128MiB
+static const std::pair<int64_t, int64_t> FullRange{ FULL_RANGE_LOWER_BOUNDARY,
+                                                    FULL_RANGE_UPPER_BOUNDARY };

 // Smaller range that can be used to reduce the number of benchmarks. Used
 // with `RangeMultiplier(SmallRangeMultiplier)`, this produces:
 // 1: 32 KiB
 // 2: 2 MiB
 // 3: 128 MiB
-static const std::pair<int64_t, int64_t> SmallRange{ 1 << 15, 1 << 27 }; // 4KiB, 128MiB
-static constexpr int SmallRangeMultiplier = 1 << 21;                     // Ensure a sample at 2MiB
+static const std::pair<int64_t, int64_t> SmallRange{ SHORT_RANGE_LOWER_BOUNDARY,
+                                                     SHORT_RANGE_UPPER_BOUNDARY };
+static constexpr int SmallRangeMultiplier = 1 << 21; // Ensure a sample at 2MiB

 using TypeList = vtkm::List<vtkm::UInt8,
                            vtkm::Float32,
@ -351,7 +387,7 @@ void BenchBitFieldToUnorderedSetGenerator(benchmark::internal::Benchmark* bm)
 {
  // Use a reduced NUM_BYTES_MAX value here -- these benchmarks allocate one
  // 8-byte id per bit, so this caps the index array out at 512 MB:
-  static constexpr int64_t numBytesMax = 1 << 26; // 64 MiB of bits
+  static int64_t numBytesMax = std::min(1 << 29, BITFIELD_TO_UNORDEREDSET_MAX_SAMPLING);

  bm->UseManualTime();
  bm->ArgNames({ "Size", "C" });
@ -368,7 +404,7 @@ template <typename ValueType>
 void BenchCopy(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

  state.SetLabel(SizeAndValuesString(numBytes, numValues));
@ -393,6 +429,7 @@ void BenchCopy(benchmark::State& state)
  state.SetBytesProcessed(static_cast<int64_t>(numBytes) * iterations);
  state.SetItemsProcessed(static_cast<int64_t>(numValues) * iterations);
 };
+
 VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCopy, ->Ranges({ FullRange })->ArgName("Size"), TypeList);

 template <typename ValueType>
@ -534,7 +571,7 @@ void BenchCountSetBitsGenerator(benchmark::internal::Benchmark* bm)

  for (int64_t config = 0; config < 6; ++config)
  {
-    bm->Ranges({ FullRange, { config, config } });
+    bm->Ranges({ { FullRange.first, FullRange.second }, { config, config } });
  }
 }
 VTKM_BENCHMARK_APPLY(BenchCountSetBits, BenchCountSetBitsGenerator);
@ -543,7 +580,7 @@ template <typename ValueType>
 void BenchFillArrayHandle(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

  state.SetLabel(SizeAndValuesString(numBytes, numValues));
@ -573,7 +610,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFillArrayHandle,
 void BenchFillBitFieldBool(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numBits = numBytes * CHAR_BIT;
  const bool value = state.range(1) != 0;

@ -603,7 +640,7 @@ template <typename WordType>
 void BenchFillBitFieldMask(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numBits = numBytes * CHAR_BIT;
  const WordType mask = static_cast<WordType>(0x1);

@ -680,7 +717,7 @@ template <typename ValueType>
 void BenchReduce(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

  state.SetLabel(SizeAndValuesString(numBytes, numValues));
@ -715,10 +752,10 @@ void BenchReduceByKey(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;

-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

-  const vtkm::Id percentKeys = state.range(1);
+  const vtkm::Id percentKeys = static_cast<vtkm::Id>(state.range(1));
  const vtkm::Id numKeys = std::max((numValues * percentKeys) / 100, vtkm::Id{ 1 });

  {
@ -770,7 +807,7 @@ template <typename ValueType>
 void BenchScanExclusive(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

  state.SetLabel(SizeAndValuesString(numBytes, numValues));
@ -804,7 +841,7 @@ template <typename ValueType>
 void BenchScanExtended(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

  state.SetLabel(SizeAndValuesString(numBytes, numValues));
@ -838,7 +875,7 @@ template <typename ValueType>
 void BenchScanInclusive(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

  state.SetLabel(SizeAndValuesString(numBytes, numValues));
@ -872,7 +909,7 @@ template <typename ValueType>
 void BenchSort(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

  state.SetLabel(SizeAndValuesString(numBytes, numValues));
@ -913,7 +950,7 @@ void BenchSortByKey(benchmark::State& state)
  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

-  const vtkm::Id percentKeys = state.range(1);
+  const vtkm::Id percentKeys = static_cast<vtkm::Id>(state.range(1));
  const vtkm::Id numKeys = std::max((numValues * percentKeys) / 100, vtkm::Id{ 1 });

  {
@ -968,7 +1005,7 @@ template <typename ValueType>
 void BenchStableSortIndices(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

  state.SetLabel(SizeAndValuesString(numBytes, numValues));
@ -1005,10 +1042,10 @@ template <typename ValueType>
 void BenchStableSortIndicesUnique(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

-  const vtkm::Id percentUnique = state.range(1);
+  const vtkm::Id percentUnique = static_cast<vtkm::Id>(state.range(1));
  const vtkm::Id numUnique = std::max((numValues * percentUnique) / 100, vtkm::Id{ 1 });

  {
@ -1053,8 +1090,10 @@ void BenchmarkStableSortIndicesUniqueGenerator(benchmark::internal::Benchmark* b
  bm->ArgNames({ "Size", "%Uniq" });
  for (int64_t pcntUnique = 0; pcntUnique <= 100; pcntUnique += 25)
  {
-    // Cap the max size here at 21 MiB. This sort is too slow.
-    bm->Ranges({ { SmallRange.first, 1 << 21 }, { pcntUnique, pcntUnique } });
+    // Cap the max size here at 2 MiB. This sort is too slow.
+    const int64_t maxSize = 1 << 21;
+    bm->Ranges(
+      { { SmallRange.first, std::min(maxSize, SmallRange.second) }, { pcntUnique, pcntUnique } });
  }
 }

@ -1066,10 +1105,10 @@ template <typename ValueType>
 void BenchUnique(benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;
-  const vtkm::Id numBytes = state.range(0);
+  const vtkm::Id numBytes = static_cast<vtkm::Id>(state.range(0));
  const vtkm::Id numValues = BytesToWords<ValueType>(numBytes);

-  const vtkm::Id percentUnique = state.range(1);
+  const vtkm::Id percentUnique = static_cast<vtkm::Id>(state.range(1));
  const vtkm::Id numUnique = std::max((numValues * percentUnique) / 100, vtkm::Id{ 1 });

  {
@ -1167,12 +1206,23 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchUpperBounds,

 int main(int argc, char* argv[])
 {
-  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
-  Config = vtkm::cont::Initialize(argc, argv, opts);
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  // Setup device:
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }

 // Handle NumThreads command-line arg:
 #ifdef VTKM_ENABLE_TBB
@ -1199,5 +1249,5 @@ int main(int argc, char* argv[])
 #endif // TBB

  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkFieldAlgorithms.cxx
+++ b/benchmarking/BenchmarkFieldAlgorithms.cxx
@ -942,12 +942,24 @@ VTKM_BENCHMARK(Bench2VirtualImplicitFunctions);
 int main(int argc, char* argv[])
 {
  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
-  Config = vtkm::cont::Initialize(argc, argv, opts);
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  // Setup device:
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }

  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkFilters.cxx
+++ b/benchmarking/BenchmarkFilters.cxx
@ -44,7 +44,7 @@
 #include <vtkm/filter/WarpScalar.h>
 #include <vtkm/filter/WarpVector.h>

-#include <vtkm/io/reader/VTKDataSetReader.h>
+#include <vtkm/io/VTKDataSetReader.h>

 #include <vtkm/source/Wavelet.h>
 #include <vtkm/worklet/DispatcherMapField.h>
@ -106,27 +106,6 @@ bool InputIsStructured()
    InputDataSet.GetCellSet().IsType<vtkm::cont::CellSetStructured<1>>();
 }

-// Limit the filter executions to only consider the following types, otherwise
-// compile times and binary sizes are nuts.
-using FieldTypes = vtkm::List<vtkm::Float32, vtkm::Float64, vtkm::Vec3f_32, vtkm::Vec3f_64>;
-
-using StructuredCellList = vtkm::List<vtkm::cont::CellSetStructured<3>>;
-
-using UnstructuredCellList =
-  vtkm::List<vtkm::cont::CellSetExplicit<>, vtkm::cont::CellSetSingleType<>>;
-
-using AllCellList = vtkm::ListAppend<StructuredCellList, UnstructuredCellList>;
-
-class BenchmarkFilterPolicy : public vtkm::filter::PolicyBase<BenchmarkFilterPolicy>
-{
-public:
-  using FieldTypeList = FieldTypes;
-
-  using StructuredCellSetList = StructuredCellList;
-  using UnstructuredCellSetList = UnstructuredCellList;
-  using AllCellSetList = AllCellList;
-};
-
 enum GradOpts : int
 {
  Gradient = 1,
@ -174,13 +153,12 @@ void BenchGradient(::benchmark::State& state, int options)
    filter.SetColumnMajorOrdering();
  }

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -224,13 +202,12 @@ void BenchThreshold(::benchmark::State& state)
  filter.SetLowerThreshold(mid - quarter);
  filter.SetUpperThreshold(mid + quarter);

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -261,13 +238,12 @@ void BenchThresholdPoints(::benchmark::State& state)
  filter.SetUpperThreshold(mid + quarter);
  filter.SetCompactPoints(compactPoints);

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -283,13 +259,12 @@ void BenchCellAverage(::benchmark::State& state)
  vtkm::filter::CellAverage filter;
  filter.SetActiveField(PointScalarsName, vtkm::cont::Field::Association::POINTS);

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -305,13 +280,12 @@ void BenchPointAverage(::benchmark::State& state)
  vtkm::filter::PointAverage filter;
  filter.SetActiveField(CellScalarsName, vtkm::cont::Field::Association::CELL_SET);

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -329,13 +303,12 @@ void BenchWarpScalar(::benchmark::State& state)
  filter.SetNormalField(PointVectorsName, vtkm::cont::Field::Association::POINTS);
  filter.SetScalarFactorField(PointScalarsName, vtkm::cont::Field::Association::POINTS);

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -352,13 +325,12 @@ void BenchWarpVector(::benchmark::State& state)
  filter.SetUseCoordinateSystemAsField(true);
  filter.SetVectorField(PointVectorsName, vtkm::cont::Field::Association::POINTS);

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -399,13 +371,12 @@ void BenchContour(::benchmark::State& state)
  filter.SetComputeFastNormalsForStructured(fastNormals);
  filter.SetComputeFastNormalsForUnstructured(fastNormals);

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -438,13 +409,12 @@ void BenchExternalFaces(::benchmark::State& state)
  vtkm::filter::ExternalFaces filter;
  filter.SetCompactPoints(compactPoints);

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -466,13 +436,12 @@ void BenchTetrahedralize(::benchmark::State& state)

  vtkm::filter::Tetrahedralize filter;

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -496,13 +465,12 @@ void BenchVertexClustering(::benchmark::State& state)
  vtkm::filter::VertexClustering filter;
  filter.SetNumberOfDivisions({ numDivs });

-  BenchmarkFilterPolicy policy;
  vtkm::cont::Timer timer{ device };
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet, policy);
+    auto result = filter.Execute(InputDataSet);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -1005,7 +973,7 @@ void InitDataSet(int& argc, char** argv)
  if (!filename.empty())
  {
    std::cerr << "[InitDataSet] Loading file: " << filename << "\n";
-    vtkm::io::reader::VTKDataSetReader reader(filename);
+    vtkm::io::VTKDataSetReader reader(filename);
    InputDataSet = reader.ReadDataSet();
  }
  else
@ -1040,12 +1008,23 @@ void InitDataSet(int& argc, char** argv)
 int main(int argc, char* argv[])
 {
  auto opts = vtkm::cont::InitializeOptions::RequireDevice;
-  Config = vtkm::cont::Initialize(argc, argv, opts);

-  // Setup device:
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);

-  InitDataSet(argc, argv);
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+    InitDataSet(argc, args.data());
+  }

  const std::string dataSetSummary = []() -> std::string {
    std::ostringstream out;
@ -1054,5 +1033,5 @@ int main(int argc, char* argv[])
  }();

  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, dataSetSummary);
+  VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, args.data(), dataSetSummary);
 }
--- a/benchmarking/BenchmarkRayTracing.cxx
+++ b/benchmarking/BenchmarkRayTracing.cxx
@ -64,7 +64,7 @@ void BenchRayTracing(::benchmark::State& state)

  vtkm::rendering::CanvasRayTracer canvas(1920, 1080);
  vtkm::rendering::raytracing::Camera rayCamera;
-  rayCamera.SetParameters(camera, canvas);
+  rayCamera.SetParameters(camera, vtkm::Int32(canvas.GetWidth()), vtkm::Int32(canvas.GetHeight()));
  vtkm::rendering::raytracing::Ray<vtkm::Float32> rays;
  rayCamera.CreateRays(rays, coords.GetBounds());

@ -116,13 +116,24 @@ VTKM_BENCHMARK(BenchRayTracing);

 int main(int argc, char* argv[])
 {
-  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
-  Config = vtkm::cont::Initialize(argc, argv, opts);
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  // Setup device:
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }

  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/BenchmarkTopologyAlgorithms.cxx
+++ b/benchmarking/BenchmarkTopologyAlgorithms.cxx
@ -380,12 +380,24 @@ VTKM_BENCHMARK_TEMPLATES(BenchClassificationDynamic, ValueTypes);
 int main(int argc, char* argv[])
 {
  // Parse VTK-m options:
-  auto opts = vtkm::cont::InitializeOptions::RequireDevice | vtkm::cont::InitializeOptions::AddHelp;
-  Config = vtkm::cont::Initialize(argc, argv, opts);
+  auto opts = vtkm::cont::InitializeOptions::RequireDevice;

-  // Setup device:
-  vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+
+  // Parse VTK-m options:
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+
+  // This occurs when it is help
+  if (opts == vtkm::cont::InitializeOptions::None)
+  {
+    std::cout << Config.Usage << std::endl;
+  }
+  else
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }

  // handle benchmarking related args and run benchmarks:
-  VTKM_EXECUTE_BENCHMARKS(argc, argv);
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
 }
--- a/benchmarking/Benchmarker.h
+++ b/benchmarking/Benchmarker.h
@ -181,7 +181,8 @@
 /// ```
 /// void BenchFunc(::benchmark::State& state)
 /// ```
-#define VTKM_BENCHMARK(BenchFunc) BENCHMARK(BenchFunc)->UseManualTime()
+#define VTKM_BENCHMARK(BenchFunc)                                                                  \
+  BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)

 /// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
 ///
@ -195,7 +196,8 @@
 /// Note the similarity to the raw Google Benchmark usage of
 /// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
 /// the Google Benchmark documentation for more details on the available options.
-#define VTKM_BENCHMARK_OPTS(BenchFunc, options) BENCHMARK(BenchFunc)->UseManualTime() options
+#define VTKM_BENCHMARK_OPTS(BenchFunc, options)                                                    \
+  BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options

 /// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
 ///
@ -210,7 +212,7 @@
 ///
 /// See the Google Benchmark documentation for more details on the available options.
 #define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor)                                              \
-  BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()
+  BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)

 /// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
 ///
@ -237,7 +239,9 @@
 /// ```
 #define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList)                                \
  VTKM_BENCHMARK_TEMPLATES_APPLY(                                                                  \
-    BenchFunc, [](::benchmark::internal::Benchmark* bm) { bm options; }, TypeList)
+    BenchFunc,                                                                                     \
+    [](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); },       \
+    TypeList)

 /// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
 ///
@ -313,7 +317,7 @@ private:
      this->Apply(bm);

      // Always use manual time with vtkm::cont::Timer to capture CUDA times accurately.
-      bm->UseManualTime();
+      bm->UseManualTime()->Unit(benchmark::kMillisecond);
    }
  };

@ -388,6 +392,37 @@ static inline vtkm::Id ExecuteBenchmarks(int& argc,

  return static_cast<vtkm::Id>(num);
 }
+
+void InitializeArgs(int* argc, std::vector<char*>& args, vtkm::cont::InitializeOptions& opts)
+{
+  bool isHelp = false;
+
+  // Inject --help
+  if (*argc == 1)
+  {
+    const char* help = "--help"; // We want it to be static
+    args.push_back(const_cast<char*>(help));
+    *argc = *argc + 1;
+  }
+
+  args.push_back(nullptr);
+
+  for (size_t i = 0; i < static_cast<size_t>(*argc); ++i)
+  {
+    auto opt_s = std::string(args[i]);
+    if (opt_s == "--help" || opt_s == "-help" || opt_s == "-h")
+    {
+      isHelp = true;
+    }
+  }
+
+  if (!isHelp)
+  {
+    return;
+  }
+
+  opts = vtkm::cont::InitializeOptions::None;
+}
 }
 }
 } // end namespace vtkm::bench::detail
--- a/benchmarking/CMakeLists.txt
+++ b/benchmarking/CMakeLists.txt
@ -47,10 +47,17 @@ set(benchmarks
  BenchmarkTopologyAlgorithms
  )

+set(VTKm_BENCHS_RANGE_LOWER_BOUNDARY 4096 CACHE STRING "Smallest sample for input size bench for BenchmarkDeviceAdapter")
+set(VTKm_BENCHS_RANGE_UPPER_BOUNDARY 134217728 CACHE STRING "Biggest sample for input size bench for BenchmarkDeviceAdapter")
+mark_as_advanced(VTKm_BENCHS_RANGE_LOWER_BOUNDARY VTKm_BENCHS_RANGE_UPPER_BOUNDARY)
+
 foreach (benchmark ${benchmarks})
-  add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_source vtkm_filter)
+  add_benchmark(NAME ${benchmark} FILE ${benchmark}.cxx LIBS vtkm_source vtkm_filter vtkm_io)
 endforeach ()

+target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_LOWER_BOUNDARY=${VTKm_BENCHS_RANGE_LOWER_BOUNDARY})
+target_compile_definitions(BenchmarkDeviceAdapter PUBLIC VTKm_BENCHS_RANGE_UPPER_BOUNDARY=${VTKm_BENCHS_RANGE_UPPER_BOUNDARY})
+
 if(TARGET vtkm_rendering)
  add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering)
 endif()
--- a/benchmarking/README.md
+++ b/benchmarking/README.md
@ -0,0 +1,120 @@
+# BENCHMARKING VTK-m
+
+## TL;DR
+
+When configuring _VTM-m_ with _CMake_ pass the flag `-DVTKm_ENABLE_BENCHMARKS=1`
+. In the build directory you will see the following binaries:
+
+    $ ls bin/Benchmark*
+    bin/BenchmarkArrayTransfer*  bin/BenchmarkCopySpeeds* bin/BenchmarkFieldAlgorithms*
+    bin/BenchmarkRayTracing* bin/BenchmarkAtomicArray*    bin/BenchmarkDeviceAdapter*
+    bin/BenchmarkFilters* bin/BenchmarkTopologyAlgorithms*
+
+Taking as an example `BenchmarkArrayTransfer`, we can run it as:
+
+    $ bin/BenchmarkArrayTransfer -d Any
+
+---
+
+## Parts of this Documents
+
+0. [TL;DR](#TL;DR)
+1. [Devices](#choosing-devices)
+2. [Filters](#run-a-subset-of-your-benchmarks)
+4. [Compare with baseline](#compare-with-baseline)
+5. [Installing compare.py](#installing-compare-benchmarkspy)
+
+---
+
+## Choosing devices
+
+Taking as an example `BenchmarkArrayTransfer`, we can determine in which
+device we can run it by simply:
+
+    $ bin/BenchmarkArrayTransfer
+    ...
+    Valid devices: "Any" "Serial"
+    ...
+
+Upon the _Valid devices_ you can chose in which device to run the benchmark by:
+
+    $ bin/BenchmarkArrayTransfer -d Serial
+
+
+## Run a subset of your benchmarks
+
+_VTK-m_ benchmarks uses [Google Benchmarks] which allows you to choose a subset
+of benchmaks by using the flag `--benchmark_filter=REGEX`
+
+For instance, if you want to run all the benchmarks that writes something you
+would run:
+
+    $ bin/BenchmarkArrayTransfer -d Serial --benchmark_filter='Write'
+
+Note you can list all of the available benchmarks with the option:
+`--benchmark_list_tests`.
+
+## Compare with baseline
+
+_VTM-m_ ships with a helper script based in [Google Benchmarks] `compare.py`
+named `compare-benchmarks.py` which lets you compare benchmarks using different
+devices, filters, and binaries. After building `VTM-m` it must appear on the 
+`bin` directory within your `build` directory.
+
+When running `compare-benchmarks.py`:
+ - You can specify the baseline benchmark binary path and its arguments in 
+   `--benchmark1=`
+ - The contender benchmark binary path and its arguments in `--benchmark2=`
+ - Extra options to be passed to `compare.py` must come after `--`
+
+### Compare between filters
+
+When comparing filters, we only can use one benchmark binary with a single device
+as shown in the following example:
+
+```sh
+$ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Any
+--benchmark_filter=1024' --filter1='Read' --filter2=Write -- filters
+
+# It will output something like this:
+
+Benchmark                                                                          Time             CPU      Time Old      Time New       CPU Old       CPU New
+---------------------------------------------------------------------------------------------------------------------------------------------------------------
+BenchContToExec[Read vs. Write]<F32>/Bytes:1024/manual_time                     +0.2694         +0.2655         18521         23511         18766         23749
+BenchExecToCont[Read vs. Write]<F32>/Bytes:1024/manual_time                     +0.0212         +0.0209         25910         26460         26152         26698
+```
+
+### Compare between devices
+
+When comparing two benchmarks using two devices use the _option_ `benchmark`
+after `--` and call `./compare-benchmarks.py` as follows:
+
+```sh
+$ ./compare-benchmarks.py --benchmark1='./BenchmarkArrayTransfer -d Serial
+--benchmark_filter=1024' --benchmark2='./BenchmarkArrayTransfer -d Cuda
+--benchmark_filter=1024' -- benchmarks
+
+
+# It will output something like this:
+
+Benchmark                                                              Time             CPU      Time Old      Time New       CPU Old       CPU New
+---------------------------------------------------------------------------------------------------------------------------------------------------
+BenchContToExecRead<F32>/Bytes:1024/manual_time                     +0.0127         +0.0120         18388         18622         18632         18856
+BenchContToExecWrite<F32>/Bytes:1024/manual_time                    +0.0010         +0.0006         23471         23496         23712         23726
+BenchContToExecReadWrite<F32>/Bytes:1024/manual_time                -0.0034         -0.0041         26363         26274         26611         26502
+BenchRoundTripRead<F32>/Bytes:1024/manual_time                      +0.0055         +0.0056         20635         20748         21172         21291
+BenchRoundTripReadWrite<F32>/Bytes:1024/manual_time                 +0.0084         +0.0082         29288         29535         29662         29905
+BenchExecToContRead<F32>/Bytes:1024/manual_time                     +0.0025         +0.0021         25883         25947         26122         26178
+BenchExecToContWrite<F32>/Bytes:1024/manual_time                    -0.0027         -0.0038         26375         26305         26622         26522
+BenchExecToContReadWrite<F32>/Bytes:1024/manual_time                +0.0041         +0.0039         25639         25745         25871         25972
+```
+
+## Installing compare-benchmarks.py
+
+`compare-benchmarks.py` relies on `compare.py` from Google Benchmarks which also
+relies in `SciPy`, you can find instructions [here][SciPy] regarding its
+installation.
+
+[Google Benchmarks]: https://github.com/google/benchmark
+[Compare.py]:        https://github.com/google/benchmark/blob/master/tools/compare.py
+[SciPy]:             https://www.scipy.org/install.html
--- a/data/README.md
+++ b/data/README.md
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30a14a308f64c6fc2969e2b959d79dacdc5affda1d1c0e24f8e176304147146
+size 643
--- a/data/temporal_datasets/DoubleGyre_0.vtk
+++ b/data/temporal_datasets/DoubleGyre_0.vtk
--- a/data/temporal_datasets/DoubleGyre_5.vtk
+++ b/data/temporal_datasets/DoubleGyre_5.vtk
--- a/data/data/rectilinear/magField.vtk
+++ b/data/data/rectilinear/magField.vtk
--- a/data/data/rectilinear/noise.vtk
+++ b/data/data/rectilinear/noise.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c23821f7436bce6d71593698e3cb0047752b4dd671513f8c4e961d4489f199f
+size 12110311
--- a/data/data/sentinel-data
+++ b/data/data/sentinel-data
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cac2600b02c8352270d8251718cd8effddb6c78d133f1fd0f08b1bccb0eac51d
+size 121
--- a/data/data/uniform/noise
+++ b/data/data/uniform/noise
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f59e6715ba9b64a173777d4cb169417b9fa198fee7231e0e0817563695af775f
+size 500000
--- a/data/data/uniform/noise.bov
+++ b/data/data/uniform/noise.bov
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2071da4365341303e401d20dc6e8bb35ebeccc24b801cc81ab87e75e3a4ef654
+size 339
--- a/data/data/unstructured/ucd3d.vtk
+++ b/data/data/unstructured/ucd3d.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a178b29073f2aa0d15375b07d0bdd28369422a352b5dcb5155cf67aebe54bbc
+size 286099
--- a/data/data/unstructured/wedge_cells.vtk
+++ b/data/data/unstructured/wedge_cells.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e415c5dfd711901fdb0abb4dab2606b41c032938867200bdceebc13414b3bc8
+size 4045
--- a/data/sample.cosmotools
+++ b/data/sample.cosmotools
@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9dc63465d864ec7a4546f1d006ca0153a5bb4c78fd4a42d16ad1177dabc70d75
-size 80263
--- a/data/tornado.vec
+++ b/data/tornado.vec
@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dca8105bf888e67a9fe476a563d69ac708925aec519814fb520c6057e5ca0a1f
-size 1327116
--- a/docs/CI-README.md
+++ b/docs/CI-README.md
@ -0,0 +1,280 @@
+
+Gitlab CI
+===============
+
+# High level view
+1. Kitware Gitlab CI
+    - Why pipelines
+    - Gitlab runner tags
+
+2. How to use docker builders locally
+    - Setting up docker
+    - Setting up nvidia runtime
+    - Running docker images
+
+3. How to Add/Update Kitware Gitlab CI
+    - How to add a new builder
+    - How to add a new tester
+    - How to update an existing docker image
+
+4. ECP OSTI CI
+    - Issues
+
+# Kitware Gitlab CI
+
+GitLab CI/CD allows for software development through continous integration, delivery, and deployment.
+VTK-m uses continuous integration to verify every merge request, by running a pipeline of scripts to build, test,
+the code changes across a wide range of hardware and configurations before merging them into master.
+
+This workflow allow everyone to easily catch build failures, bugs, and errors before VTK-m is deployed in a
+production enviornment. Making sure VTK-m is a robust library provides not only confidence to our users
+but to every VTK-m developer. When the system is working developers can be confident that failures
+seen during CI are related to the specific changes they have made.
+
+GitLab CI/CD is configured by a file called `.gitlab-ci.yml` located at the root of the VTK-m repository.
+The scripts set in this file are executed by the [GitLab Runners](https://docs.gitlab.com/runner/) associated with VTK-m.
+
+## Why pipelines
+
+Pipelines are the top-level component of continuous integration. For VTK-m the pipeline contains build and test stages, with the possibilty of adding subsequent stages such as coverage, or memory checking.
+
+Decomposing the build and test into separate components comes with some significant benifits for VTK-m developers.
+The most impactful change is that we now have the ability to compile VTK-m on dedicated 'compilation' machines and
+test on machines with less memory or an older CPU improving turnaround time. Additionally since we are heavily
+leveraging docker, VTK-m build stages can be better load balanced across the set of builders as we don't have
+a tight coupling between a machine and build configuration.
+
+## Gitlab runner tags
+
+Current gitlab runner tags for VTK-m are:
+
+    - build
+        Signifies that this is will be doing compilation
+    - test
+        Signifies that this is will be running tests
+    - vtkm
+        Allows us to make sure VTK-m ci is only run on VTK-m allocated hardware
+    - docker
+        Used to state that the gitlab-runner must support docker based ci
+    - linux
+        Used to state that we require a linux based gitlab-runner
+    - large-memory
+        Used to state that this step will require a machine that has lots of memory.
+        This is currently used for cuda `build` requests
+    - cuda-rt
+        Used to state that the runner is required to have the cuda runtime enviornment.
+        This isn't required to `build` VTK-m, only `test`
+    - maxwell
+    - pascal
+    - turing
+        Only used on a `test` stage to signifiy which GPU hardware is required to
+        run the VTK-m tests
+
+# How to use docker builders locally
+
+When diagnosing issues from the docker builders it can be useful to iterate locally on a 
+solution.
+
+If you haven't set up docker locally we recommend following the official getting started guide:
+    - https://docs.docker.com/get-started/
+
+
+## Setting up nvidia runtime
+
+To properly test VTK-m inside docker containers when the CUDA backend is enabled you will need
+to have installed the nvidia-container-runtime ( https://github.com/NVIDIA/nvidia-container-runtime )
+and be using a recent version of docker ( we recommend docker-ce )
+
+
+Once nvidia-container-runtime is installed you will want the default-runtime be `nvidia` so
+that `docker run` will automatically support gpus. The easiest way to do so is to add
+the following to your `/etc/docker/daemon.json`
+
+```
+{
+ "default-runtime": "nvidia",
+    "runtimes": {
+        "nvidia": {
+            "path": "/usr/bin/nvidia-container-runtime",
+            "runtimeArgs": []
+        }
+    },
+}
+```
+
+## Running docker images
+
+To simplify reproducing docker based CI workers locally, VTK-m has python program that handles all the
+work automatically for you.
+
+The program is located in `[Utilities/CI/reproduce_ci_env.py ]` and requires python3 and pyyaml. 
+
+To use the program is really easy! The following two commands will create the `build:rhel8` gitlab-ci
+worker as a docker image and setup a container just as how gitlab-ci would be before the actual
+compilation of VTK-m. Instead of doing the compilation, instead you will be given an interactive shell. 
+
+```
+./reproduce_ci_env.py create rhel8
+./reproduce_ci_env.py run rhel8
+```
+
+To compile VTK-m from the the interactive shell you would do the following:
+```
+> src]# cd build/
+> build]# cmake --build .
+```
+
+# How to Add/Update Kitware Gitlab CI
+
+Adding new build or test stages is necessary when a given combination of compiler, platform,
+and VTK-m options isn't already captured by existing builders. Each definition is composed via 3 components; tags, variables, and extends.
+
+Tags are used to by gitlab-ci to match a given build to a set of possible execution locations.
+Therefore we encode information such as we require docker or the linux kernel into tags.
+The full set of VTK-m tags each meaning are found under the `runner tags` section of the document.
+
+Extends is used to compose the execution enviornment of the builder. Basically this means
+setting up the correct build/test enviornment and specifying the CMake scripts that need
+to be executed. So a linux docker based builder would extend the docker image they want,
+plus `.cmake_build_linux`. A MacOS builder would extend `.cmake_build_macos`.
+
+Variables control stage specific information such as runtime enviornment variables,
+or VTK-m CMake options.
+
+## How to add a new builder
+
+Each builder definition is placed inside the respective OS `yml` file located in
+`.gitlab/ci/`. Therefore if you are adding a builder that will run on Ubuntu 20.04 it
+would go into `.gitlab/ci/ubuntu2004.yml`.
+
+Variables are used to control the following components:
+
+    - Compiler
+    - VTK-m CMake Options
+    - Static / Shared
+    - Release / Debug / MinSizeRel
+
+An example defitinon of a builder would look like:
+```yml
+build:ubuntu2004_$<compiler>:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu2004
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CC: "$<c-compiler-command>"
+    CXX: "$<cxx-compiler-command>"
+    CMAKE_BUILD_TYPE: "Debug|Release|MinSizeRel"
+    VTKM_SETTINGS: "tbb+openmp+mpi"
+```
+
+If this builder requires a new docker image a coupe of extra steps are required
+
+1. Add the docker image to the proper folder under `.gitlab/ci/docker`. Images
+are laid out with the primary folder being the OS and the secondary folder the
+primary device adapter it adds. We currently consider `openmp` and `tbb` to
+be small enough to be part of any image.
+
+2. Make sure image is part of the `update_all.sh` script, following the convention
+of `platform_device`.
+
+3. Update the `.gitlab-ci.yml` comments to list what compiler(s), device adapters,
+and other relevant libraries the image has.
+
+4. Verify the image is part of the `.gitlab-ci.yml` file and uses the docker image
+pattern, as seen below. This is important as `.docker_image` makes sure we
+have consistent paths across all builds to allow us to cache compilation object
+files.
+
+```yml
+.$<platform>_$<device>: &$<platform>_$<device>
+  image: "kitware/vtkm:ci-$<platform>_$<device>-$<YYYYMMDD>"
+  extends:
+    - .docker_image
+```
+
+## How to add a new tester
+
+Each test definition is placed inside the respective OS `yml` file located in
+`.gitlab/ci/`. Therefore if you are adding a builder that will run on Ubuntu 20.04 it
+would go into `.gitlab/ci/ubuntu2004.yml`.
+
+The primary difference between tests and build definitions are that tests have
+the dependencies and needs sections. These are required as by default
+gitlab-ci will not run any test stage before ALL the build stages have
+completed.
+
+Variables for testers are currently only used for the following things:
+    - Allowing OpenMPI to run as root
+
+An example defitinon of a tester would look like:
+```yml
+test:ubuntu2004_$<compiler>:
+  tags:
+    - test
+    - cuda-rt
+    - turing
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu2004_cuda
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu2004_$<compiler>
+  needs:
+    - build:ubuntu2004_$<compiler>
+```
+
+## How to update an existing docker image
+
+Updating an image to be used for CI infrastructure can be done by anyone that
+has permissions to the kitware/vtkm dockerhub project, as that is where
+images are stored.
+
+Each modification of the docker image requires a new name so that existing open
+merge requests can safely trigger pipelines without inadverntly using the
+updated images which might break their build.
+
+Therefore the workflow to update images is
+1. Start a new git branch
+2. Update the associated `Dockerfile`
+3. Locally build the docker image
+4. Push the docker image to dockerhub
+5. Open a Merge Request
+
+
+To simplify step 3 and 4 of the process, VTK-m has a script (`update_all.sh`) that automates
+these stages. This script is required to be run from the `.gitlab/ci/docker` directory, and
+needs to have the date string passed to it. An example of running the script:
+
+```sh
+sudo docker login --username=<docker_hub_name>
+cd .gitlab/ci/docker
+sudo ./update_all.sh 20201230
+```
+
+# ECP OSTI CI
+
+`.gitlab-ci-ecp.yml` allows for VTK-m to run CI on provided by ECP at NMC.
+
+To have this work properly you will need to make sure that the gitlab repository
+has been updated to this non-standard yaml file location
+( "Settings" -> "CI/CD" -> "General pipelines" -> "Custom CI configuration path").
+
+The ECP CI is setup to verify VTK-m mainly on Power9 hardware as that currently is
+missing from VTK-m standard CI infrastructure.
+
+Currently we verify Power9 support with `cuda` and `openmp` builders. The `cuda` builder
+is setup to use the default cuda SDK on the machine and the required `c++` compiler which
+currently is `gcc-4.8.5`. The `openmp` builder is setup to use the newest `c++` compiler provided
+on the machine so that we maximimze compiler coverage.
+
+## Issues
+Currently these builders don't report back to the VTK-m CDash instance.
--- a/docs/ReleaseProcess.md
+++ b/docs/ReleaseProcess.md
@ -179,7 +179,7 @@ contributed to VTK-m since our last release. The 1.5.0 release contains
 over 100000 merge requests, and 100000 entries to the changelog .

 Below are all the entries in the changelog, with more details at (
-https://gitlab.kitware.com/vtk/vtk-m/tags/vX.Y.0 ) or in the vtkm
+https://gitlab.kitware.com/vtk/vtk-m/-/tags/vX.Y.0 ) or in the vtkm
 repository at `docs/X.Y/release-notes.md`

 1. Core
--- a/docs/build_stage.png
+++ b/docs/build_stage.png
--- a/docs/changelog/1.3/release-notes.md
+++ b/docs/changelog/1.3/release-notes.md
@ -75,7 +75,7 @@ new adapter is enabled using the CMake option `VTKm_ENABLE_OPENMP` and its
 performance is comparable to the TBB device adapter.

 Performance comparisions of `OpenMP` against the `TBB` and  `Serial` device
-adapters can be found at: https://gitlab.kitware.com/vtk/vtk-m/issues/223
+adapters can be found at: https://gitlab.kitware.com/vtk/vtk-m/-/issues/223


 ## Make all worklets dispatches invoke using a `TryExecute`
--- a/docs/changelog/ci-script.md
+++ b/docs/changelog/ci-script.md
@ -0,0 +1,21 @@
+# Provide scripts to build Gitlab-ci workers locally
+
+To simplify reproducing docker based CI workers locally, VTK-m has python program that handles all the
+work automatically for you.
+
+The program is located in `[Utilities/CI/reproduce_ci_env.py ]` and requires python3 and pyyaml. 
+
+To use the program is really easy! The following two commands will create the `build:rhel8` gitlab-ci
+worker as a docker image and setup a container just as how gitlab-ci would be before the actual
+compilation of VTK-m. Instead of doing the compilation, instead you will be given an interactive shell. 
+
+```
+./reproduce_ci_env.py create rhel8
+./reproduce_ci_env.py run rhel8
+```
+
+To compile VTK-m from the the interactive shell you would do the following:
+```
+> src]# cd build/
+> build]# cmake --build .
+```
--- a/docs/changelog/configurable-default-types.md
+++ b/docs/changelog/configurable-default-types.md
@ -0,0 +1,53 @@
+# Configurable default types
+
+Because VTK-m compiles efficient code for accelerator architectures, it
+often has to compile for static types. This means that dynamic types often
+have to be determined at runtime and converted to static types. This is the
+reason for the `CastAndCall` architecture in VTK-m.
+
+For this `CastAndCall` to work, there has to be a finite set of static
+types to try at runtime. If you don't compile in the types you need, you
+will get runtime errors. However, the more types you compile in, the longer
+the compile time and executable size. Thus, getting the types right is
+important.
+
+The "right" types to use can change depending on the application using
+VTK-m. For example, when VTK links in VTK-m, it needs to support lots of
+types and can sacrifice the compile times to do so. However, if using VTK-m
+in situ with a fortran simulation, space and time are critical and you
+might only need to worry about double SoA arrays.
+
+Thus, it is important to customize what types VTK-m uses based on the
+application. This leads to the oxymoronic phrase of configuring the default
+types used by VTK-m.
+
+This is being implemented by providing VTK-m with a header file that
+defines the default types. The header file provided to VTK-m should define
+one or more of the following preprocessor macros:
+
+  * `VTKM_DEFAULT_TYPE_LIST` - a `vtkm::List` of value types for fields that
+     filters should directly operate on (where applicable).
+  * `VTKM_DEFAULT_STORAGE_LIST` - a `vtkm::List` of storage tags for fields
+     that filters should directly operate on.
+  * `VTKM_DEFAULT_CELL_SET_LIST_STRUCTURED` - a `vtkm::List` of
+     `vtkm::cont::CellSet` types that filters should operate on as a
+     strutured cell set.
+  * `VTKM_DEFAULT_CELL_SET_LIST_UNSTRUCTURED` - a `vtkm::List` of
+     `vtkm::cont::CellSet` types that filters should operate on as an
+     unstrutured cell set.
+  * `VTKM_DEFAULT_CELL_SET_LIST` - a `vtkm::List` of `vtkm::cont::CellSet`
+     types that filters should operate on (where applicable). The default of
+     `vtkm::ListAppend<VTKM_DEFAULT_CELL_SET_LIST_STRUCTURED, VTKM_DEFAULT_CELL_SET_LIST>`
+	 is usually correct.
+
+If any of these macros are not defined, a default version will be defined.
+(This is the same default used if no header file is provided.)
+
+This header file is provided to the build by setting the
+`VTKm_DEFAULT_TYPES_HEADER` CMake variable. `VTKm_DEFAULT_TYPES_HEADER`
+points to the file, which will be configured and copied to VTK-m's build
+directory.
+
+For convenience, header files can be added to the VTK_m source directory
+(conventionally under vtkm/cont/internal). If this is the case, an advanced
+CMake option should be added to select the provided header file.
--- a/docs/changelog/coordinate-transform-results.md
+++ b/docs/changelog/coordinate-transform-results.md
@ -0,0 +1,13 @@
+# Result DataSet of coordinate transform has its CoordinateSystem changed
+
+When you run one of the coordinate transform filters,
+`CylindricalCoordinateTransform` or `SphericalCoordinateTransform`, the
+transform coordiantes are placed as the first `CoordinateSystem` in the
+returned `DataSet`. This means that after running this filter, the data
+will be moved to this new coordinate space.
+
+Previously, the result of these filters was just placed in a named `Field`
+of the output. This caused some confusion because the filter did not seem
+to have any effect (unless you knew to modify the output data). Not using
+the result as the coordinate system seems like a dubious use case (and not
+hard to work around), so this is much better behavior.
--- a/docs/changelog/dataset-unique-field-names.md
+++ b/docs/changelog/dataset-unique-field-names.md
@ -0,0 +1,17 @@
+# DataSet now only allows unique field names
+
+When you add a `vtkm::cont::Field` to a `vtkm::cont::DataSet`, it now
+requires every `Field` to have a unique name. When you attempt to add a
+`Field` to a `DataSet` that already has a `Field` of the same name and
+association, the old `Field` is removed and replaced with the new `Field`.
+
+You are allowed, however, to have two `Field`s with the same name but
+different associations. For example, you could have a point `Field` named
+"normals" and also have a cell `Field` named "normals" in the same
+`DataSet`.
+
+This new behavior matches how VTK's data sets manage fields.
+
+The old behavior allowed you to add multiple `Field`s with the same name,
+but it would be unclear which one you would get if you asked for a `Field`
+by name.
--- a/docs/changelog/deprecate-execute-with-policy.md
+++ b/docs/changelog/deprecate-execute-with-policy.md
@ -0,0 +1,17 @@
+# Deprecate Execute with policy
+
+The version of `Filter::Execute` that takes a policy as an argument is now
+deprecated. Filters are now able to specify their own fields and types,
+which is often why you want to customize the policy for an execution. The
+other reason is that you are compiling VTK-m into some other source that
+uses a particular types of storage. However, there is now a mechanism in
+the CMake configuration to allow you to provide a header that customizes
+the "default" types used in filters. This is a much more convenient way to
+compile filters for specific types.
+
+One thing that filters were not able to do was to customize what cell sets
+they allowed using. This allows filters to self-select what types of cell
+sets they support (beyond simply just structured or unstructured). To
+support this, the lists `SupportedCellSets`, `SupportedStructuredCellSets`,
+and `SupportedUnstructuredCellSets` have been added to `Filter`. When you
+apply a policy to a cell set, you now have to also provide the filter.
--- a/docs/changelog/filter-specifies-own-field-types.md
+++ b/docs/changelog/filter-specifies-own-field-types.md
@ -0,0 +1,8 @@
+# Filters specify their own field types
+
+Previously, the policy specified which field types the filter should
+operate on. The filter could remove some types, but it was not able to
+add any types.
+
+This is backward. Instead, the filter should specify what types its
+supports and the policy may cull out some of those.
--- a/docs/changelog/flying-edges.md
+++ b/docs/changelog/flying-edges.md
@ -0,0 +1,15 @@
+# Flying Edges
+
+Added the flying edges contouring algorithm to VTK-m. This algorithm only
+works on structured grids, but operates much faster than the traditional
+Marching Cubes algorithm.
+
+The speed of VTK-m's flying edges is comprable to VTK's running on the same
+CPUs. VTK-m's implementation also works well on CUDA hardware.
+
+The Flying Edges algorithm was introduced in this paper:
+
+Schroeder, W.; Maynard, R. & Geveci, B.
+"Flying edges: A high-performance scalable isocontouring algorithm."
+Large Data Analysis and Visualization (LDAV), 2015.
+DOI 10.1109/LDAV.2015.7348069
--- a/docs/changelog/image_io.md
+++ b/docs/changelog/image_io.md
@ -0,0 +1,32 @@
+# Implemented PNG/PPM image Readers/Writers
+
+The original implementation of writing image data was only performed as a 
+proxy through the Canvas rendering class. In order to implement true support
+for image-based regression testing, this interface needed to be expanded upon
+to support reading/writing arbitrary image data and storing it in a `vtkm::DataSet`.
+Using the new `vtkm::io::PNGReader` and `vtkm::io::PPMReader` it is possible
+to read data from files and Cavases directly and store them as a point field
+in a 2D uniform `vtkm::DataSet`
+
+```cpp
+auto reader = vtkm::io::PNGReader();
+auto imageDataSet = reader.ReadFromFile("read_image.png");
+```
+
+Similarly, the new `vtkm::io::PNGWriter` and `vtkm::io::PPMWriter` make it possible
+to write out a 2D uniform `vtkm::DataSet` directly to a file.  
+
+```cpp
+auto writer = vtkm::io::PNGWriter();
+writer.WriteToFile("write_image.png", imageDataSet);
+```
+
+If canvas data is to be written out, the reader provides a method for converting
+a canvas's data to a `vtkm::DataSet`.
+
+```cpp
+auto reader = vtkm::io::PNGReader();
+auto dataSet = reader.CreateImageDataSet(canvas);
+auto writer = vtkm::io::PNGWriter();
+writer.WriteToFile("output.png", dataSet);
+```
--- a/docs/changelog/io_reorganize.md
+++ b/docs/changelog/io_reorganize.md
@ -0,0 +1,7 @@
+# Reorganization of `io` directory
+
+The `vtkm/io` directory has been flattened.
+Namely, the files in `vtkm/io/reader` and `vtkm/io/writer` have been moved up into `vtkm/io`,
+with the associated changes in namespaces.
+
+In addition, `vtkm/cont/EncodePNG.h` and `vtkm/cont/DecodePNG.h` have been moved to a more natural home in `vtkm/io`.
--- a/docs/changelog/no-cell-op-errors.md
+++ b/docs/changelog/no-cell-op-errors.md
@ -0,0 +1,72 @@
+# Avoid raising errors when operating on cells
+
+Cell operations like interpolate and finding parametric coordinates can
+fail under certain conditions. The previous behavior was to call
+`RaiseError` on the worklet. By design, this would cause the worklet
+execution to fail. However, that makes the worklet unstable for a conditin
+that might be relatively common in data. For example, you wouldn't want a
+large streamline worklet to fail just because one cell was not found
+correctly.
+
+To work around this, many of the cell operations in the execution
+environment have been changed to return an error code rather than raise an
+error in the worklet.
+
+## Error Codes
+
+To support cell operations efficiently returning errors, a new enum named
+`vtkm::ErrorCode` is available. This is the current implementation of
+`ErrorCode`.
+
+``` cpp
+enum class ErrorCode
+{
+  Success,
+  InvalidShapeId,
+  InvalidNumberOfPoints,
+  WrongShapeIdForTagType,
+  InvalidPointId,
+  InvalidEdgeId,
+  InvalidFaceId,
+  SolutionDidNotConverge,
+  MatrixFactorizationFailed,
+  DegenerateCellDetected,
+  MalformedCellDetected,
+  OperationOnEmptyCell,
+  CellNotFound,
+
+  UnknownError
+};
+```
+
+A convenience function named `ErrorString` is provided to make it easy to
+convert the `ErrorCode` to a descriptive string that can be placed in an
+error.
+
+## New Calling Specification
+
+Previously, most execution environment functions took as an argument the
+worklet calling the function. This made it possible to call `RaiseError` on
+the worklet. The result of the operation was typically returned. For
+example, here is how the _old_ version of interpolate was called.
+
+``` cpp
+FieldType interpolatedValue =
+  vtkm::exec::CellInterpolate(fieldValues, pcoord, shape, worklet);
+```
+
+The worklet is now no longer passed to the function. It is no longer needed
+because an error is never directly raised. Instead, an `ErrorCode` is
+returned from the function. Because the `ErrorCode` is returned, the
+computed result of the function is returned by passing in a reference to a
+variable. This is usually placed as the last argument (where the worklet
+used to be). here is the _new_ version of how interpolate is called.
+
+``` cpp
+FieldType interpolatedValue;
+vtkm::ErrorCode result =
+  vtkm::exec::CellInterpolate(fieldValues, pcoord, shape, interpolatedValue);
+```
+
+The success of the operation can be determined by checking that the
+returned `ErrorCode` is equal to `vtkm::ErrorCode::Success`.
--- a/docs/changelog/readportalget.md
+++ b/docs/changelog/readportalget.md
@ -0,0 +1,18 @@
+# `ReadPortal().Get(idx)`
+
+Calling `ReadPortal()` in a tight loop is an antipattern.
+A call to `ReadPortal()` causes the array to be copied back to the control environment,
+and hence code like
+
+```cpp
+for (vtkm::Id i = 0; i < array.GetNumberOfValues(); ++i) {
+    vtkm::FloatDefault x = array.ReadPortal().Get(i);
+}
+```
+
+is a quadratic-scaling loop.
+
+We have remove *almost* all internal uses of the `ReadPortal().Get` antipattern,
+with the exception of 4 API calls into which the pattern is baked in:
+`CellSetExplicit::GetCellShape`, `CellSetPermutation::GetNumberOfPointsInCell`, `CellSetPermutation::GetCellShape`, and `CellSetPermutation::GetCellPointIds`.
+We expect these will need to be deprecated in the future.
--- a/docs/changelog/remove-opengl-rendering-classes.md
+++ b/docs/changelog/remove-opengl-rendering-classes.md
@ -0,0 +1,13 @@
+# Removed OpenGL Rendering Classes
+
+When the rendering library was first built, OpenGL was used to implement
+the components (windows, mappers, annotation, etc.). However, as the native
+ray casting became viable, the majority of the work has focused on using
+that. Since then, the original OpenGL classes have been largely ignored.
+
+It has for many months been determined that it is not work attempting to
+maintain two different versions of the rendering libraries as features are
+added and changed. Thus, the OpenGL classes have fallen out of date and did
+not actually work.
+
+These classes have finally been officially removed.
--- a/docs/changelog/vtk-io-in-library.md
+++ b/docs/changelog/vtk-io-in-library.md
@ -0,0 +1,13 @@
+# Move VTK file readers and writers into vtkm_io
+
+The legacy VTK file reader and writer were created back when VTK-m was a
+header-only library. Things have changed and we now compile quite a bit of
+code into libraries. At this point, there is no reason why the VTK file
+reader/writer should be any different.
+
+Thus, `VTKDataSetReader`, `VTKDataSetWriter`, and several supporting
+classes are now compiled into the `vtkm_io` library. Also similarly updated
+`BOVDataSetReader` for good measure.
+
+As a side effect, code using VTK-m will need to link to `vtkm_io` if they
+are using any readers or writers.
--- a/docs/external_stage.png
+++ b/docs/external_stage.png
--- a/examples/clipping/CMakeLists.txt
+++ b/examples/clipping/CMakeLists.txt
@ -7,14 +7,14 @@
 ##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ##  PURPOSE.  See the above copyright notice for more information.
 ##============================================================================
-cmake_minimum_required(VERSION 3.8...3.15 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
 project(Clipping CXX)

 #Find the VTK-m package
 find_package(VTKm REQUIRED QUIET)

 add_executable(Clipping Clipping.cxx)
-target_link_libraries(Clipping PRIVATE vtkm_filter)
+target_link_libraries(Clipping PRIVATE vtkm_filter vtkm_io)

 vtkm_add_target_information(Clipping
                            DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
--- a/Show More
+++ b/Show More