Merge branch 'fp_atomics' into particle_density_cic

2024-09-16 17:22:55 +00:00 · 2021-03-04 11:15:38 -07:00 · 2021-03-04 11:15:38 -07:00 · aed6a8f2f4
commit aed6a8f2f4
parent a6cc11c799 b78f91b067
734 changed files with 37294 additions and 16052 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -3,7 +3,7 @@
 #
 #   * .gitlab/ci/docker/centos7/cuda10.2/
 #     - cuda
-#     - gcc 4.8.5
+#     - gcc 7.3.1
 #   * .gitlab/ci/docker/centos8/base/
 #     - gcc 8.3.1
 #     - clang 8.0.1
@ -13,7 +13,7 @@
 #     - cuda
 #     - gcc 8.2.1
 #   * .gitlab/ci/docker/ubuntu1604/base/
-#     - gcc 4.8
+#     - gcc 5.4.0
 #     - clang 3.8
 #     - clang 5.0
 #     - tbb
@ -32,6 +32,7 @@
 #     - tbb
 #     - openmp
 #     - mpich2
+#     - hdf5
 #   * .gitlab/ci/docker/ubuntu1804/cuda11.1/
 #     - cuda
 #     - gcc 7
@ -50,7 +51,7 @@
    GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci

 .centos7: &centos7
-  image: "kitware/vtkm:ci-centos7_cuda10.2-20201016"
+  image: "kitware/vtkm:ci-centos7_cuda10.2-20210128"
  extends:
    - .docker_image

@ -75,7 +76,7 @@
    - .docker_image

 .ubuntu1804: &ubuntu1804
-  image: "kitware/vtkm:ci-ubuntu1804-20201016"
+  image: "kitware/vtkm:ci-ubuntu1804-20210107"
  extends:
    - .docker_image

@ -173,6 +174,14 @@ stages:
    #for running failed tests multiple times so failures
    #due to system load are not reported
    - "ctest-latest -VV -S .gitlab/ci/ctest_test.cmake"
+  artifacts:
+    expire_in: 24 hours
+    when: always
+    paths:
+      # The generated regression testing images
+      - build/*.png
+      - build/*.pnm
+      - build/*.pmm

 .cmake_memcheck_linux: &cmake_memcheck_linux
  stage: test
@ -180,6 +189,14 @@ stages:
  interruptible: true
  script:
    - "ctest-latest -VV -S .gitlab/ci/ctest_memcheck.cmake"
+  artifacts:
+    expire_in: 24 hours
+    when: always
+    paths:
+      # The generated regression testing images
+      - build/*.png
+      - build/*.pnm
+      - build/*.pmm

 include:
  - local: '/.gitlab/ci/centos7.yml'
--- a/.gitlab/ci/centos7.yml
+++ b/.gitlab/ci/centos7.yml
@ -1,7 +1,7 @@

 # Build on centos7 with CUDA and test on rhel8 and centos7
-# gcc 4.8
-build:centos7_gcc48:
+# gcc 7.3.1
+build:centos7_gcc73:
  tags:
    - build
    - vtkm
@ -18,7 +18,7 @@ build:centos7_gcc48:
    CMAKE_GENERATOR: "Unix Makefiles"
    VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering"

-test:centos7_gcc48:
+test:centos7_gcc73:
  tags:
    - test
    - vtkm
@ -30,12 +30,10 @@ test:centos7_gcc48:
    - .centos7
    - .cmake_test_linux
    - .only-default
-  variables:
-      CTEST_EXCLUSIONS: "UnitTestContourTreeUniformAugmentedFilterCUDA|UnitTestContourTreeUniformAugmentedCUDA"
  dependencies:
-    - build:centos7_gcc48
+    - build:centos7_gcc73
  needs:
-    - build:centos7_gcc48
+    - build:centos7_gcc73

 test:rhel8_test_centos7:
  tags:
@ -50,8 +48,8 @@ test:rhel8_test_centos7:
    - .cmake_test_linux
    - .only-default
  variables:
-      CTEST_EXCLUSIONS: "built_against_test_install|UnitTestContourTreeUniformAugmentedFilterCUDA|UnitTestContourTreeUniformAugmentedCUDA"
+      CTEST_EXCLUSIONS: "built_against_test_install"
  dependencies:
-    - build:centos7_gcc48
+    - build:centos7_gcc73
  needs:
-    - build:centos7_gcc48
+    - build:centos7_gcc73
--- a/.gitlab/ci/centos8.yml
+++ b/.gitlab/ci/centos8.yml
@ -30,6 +30,7 @@ test:centos8_sanitizer:
  variables:
    OMP_NUM_THREADS: 4
    CTEST_MEMORYCHECK_TYPE: LeakSanitizer
+    CTEST_EXCLUSIONS: "RegressionTest.*"
  dependencies:
    - build:centos8_sanitizer
  needs:
--- a/.gitlab/ci/config/initial_config.cmake
+++ b/.gitlab/ci/config/initial_config.cmake
@ -29,6 +29,10 @@ foreach(option IN LISTS options)
  elseif(vtk_types STREQUAL option)
    set(VTKm_USE_DEFAULT_TYPES_FOR_VTK "ON" CACHE STRING "")

+  elseif(ascent_types STREQUAL option)
+    # Note: ascent_types also requires 32bit_ids and 64bit_floats
+    set(VTKm_USE_DEFAULT_TYPES_FOR_ASCENT "ON" CACHE STRING "")
+
  elseif(32bit_ids STREQUAL option)
    set(VTKm_USE_64BIT_IDS "OFF" CACHE STRING "")

@ -76,6 +80,9 @@ foreach(option IN LISTS options)
  elseif(kokkos STREQUAL option)
    set(VTKm_ENABLE_KOKKOS "ON" CACHE STRING "")

+  elseif(hdf5 STREQUAL option)
+    set(VTKm_ENABLE_HDF5_IO "ON" CACHE STRING "")
+
  elseif(maxwell STREQUAL option)
    set(VTKm_CUDA_Architecture "maxwell" CACHE STRING "")

--- a/.gitlab/ci/config/sccache.sh
+++ b/.gitlab/ci/config/sccache.sh
@ -10,7 +10,7 @@ readonly tarball="$filename.tar.gz"
 cd .gitlab

 echo "$sha256sum  $tarball" > sccache.sha256sum
-curl -OL "https://github.com/robertmaynard/sccache/releases/download/$version/$tarball"
+curl --insecure -OL "https://github.com/robertmaynard/sccache/releases/download/$version/$tarball"
 sha256sum --check sccache.sha256sum
 tar xf "$tarball"
 #mv "$filename/sccache" .
--- a/.gitlab/ci/config/setup_vs_powershell.ps1
+++ b/.gitlab/ci/config/setup_vs_powershell.ps1
@ -1,14 +0,0 @@
-
-$tempFile = "$env:temp\vcvars.txt"
-
-if ($env:CI_JOB_NAME -eq "build:windows_vs2019") {
-  cmd.exe /c "call `"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat`" && set > $tempFile"
-} else {
-  cmd.exe /c "call `"C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat`" && set > $tempFile"
-}
-
-Get-Content "$tempFile" | Foreach-Object {
-  if ($_ -match "^(.*?)=(.*)$") {
-    Set-Content "env:\$($matches[1])" $matches[2]
-  }
-}
--- a/.gitlab/ci/config/vcvarsall.ps1
+++ b/.gitlab/ci/config/vcvarsall.ps1
@ -0,0 +1,9 @@
+$erroractionpreference = "stop"
+
+cmd /c "`"$env:VCVARSALL`" $VCVARSPLATFORM -vcvars_ver=$VCVARSVERSION & set" |
+foreach {
+    if ($_ -match "=") {
+        $v = $_.split("=")
+        [Environment]::SetEnvironmentVariable($v[0], $v[1])
+    }
+}
--- a/.gitlab/ci/ctest_memcheck.cmake
+++ b/.gitlab/ci/ctest_memcheck.cmake
@ -33,7 +33,8 @@ if(NOT CTEST_MEMORYCHECK_SUPPRESSIONS_FILE)
 endif()

 set(test_exclusions
-  # placeholder for tests to exclude
+  # placeholder for tests to exclude provided by the env
+  $ENV{CTEST_EXCLUSIONS}
 )

 string(REPLACE ";" "|" test_exclusions "${test_exclusions}")
--- a/.gitlab/ci/docker/centos7/cuda10.2/Dockerfile
+++ b/.gitlab/ci/docker/centos7/cuda10.2/Dockerfile
@ -1,9 +1,9 @@
 FROM nvidia/cuda:10.2-devel-centos7
 LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"

-RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 -y
+RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 centos-release-scl -y
 RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
-RUN yum install git git-lfs -y
+RUN yum install git git-lfs devtoolset-7-gcc-c++ -y

 # Provide a consistent CMake path across all images
 # Install CMake 3.13 as it is the minium for cuda builds
@ -20,4 +20,4 @@ RUN mkdir /opt/cmake-latest/ && \
    rm cmake-3.17.3-Linux-x86_64.sh && \
    ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest

-ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
+ENV PATH "/opt/rh/devtoolset-7/root/bin:/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
--- a/.gitlab/ci/docker/ubuntu1804/base/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/base/Dockerfile
@ -12,12 +12,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
      libmpich-dev \
      libomp-dev \
      libtbb-dev \
+      libhdf5-dev \
      mpich \
      ninja-build \
      software-properties-common

 # extra dependencies for charm machine
-RUN add-apt-repository ppa:jonathonf/gcc-9.2
+RUN add-apt-repository ppa:jonathonf/gcc
 RUN apt-get update && apt-get install -y --no-install-recommends \
      clang-8 \
      g++-9 \
--- a/.gitlab/ci/ubuntu1604.yml
+++ b/.gitlab/ci/ubuntu1604.yml
@ -17,7 +17,7 @@ build:ubuntu1604_gcc5:
    CC: "gcc-5"
    CXX: "g++-5"
    CMAKE_BUILD_TYPE: RelWithDebInfo
-    VTKM_SETTINGS: "cuda+pascal+no_virtual"
+    VTKM_SETTINGS: "cuda+pascal+no_virtual+ascent_types+32bit_ids+64bit_floats"

 test:ubuntu1604_gcc5:
  tags:
@ -75,50 +75,6 @@ test:ubuntu1804_test_ubuntu1604_gcc5_2:
  needs:
    - build:ubuntu1604_gcc5_2

-# Build on ubuntu1604 with mpi + tbb and test on ubuntu1604
-# Uses gcc 4.8
-# Uses OpenMPI
-build:ubuntu1604_gcc48:
-  tags:
-    - build
-    - vtkm
-    - docker
-    - linux
-  extends:
-    - .ubuntu1604
-    - .cmake_build_linux
-    - .only-default
-  variables:
-    CC: "gcc-4.8"
-    CXX: "g++-4.8"
-    CMAKE_BUILD_TYPE: Release
-    #custom openmpi install location
-    CMAKE_PREFIX_PATH: "/opt/openmpi/"
-    VTKM_SETTINGS: "tbb+mpi+shared+no_rendering"
-
-test:ubuntu1604_gcc48:
-  tags:
-    - test
-    - vtkm
-    - docker
-    - linux
-  extends:
-    - .ubuntu1604
-    - .cmake_test_linux
-    - .only-default
-  variables:
-    #env flags to allow openmpi to run as root user
-    OMPI_ALLOW_RUN_AS_ROOT: 1
-    OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
-    #mpi location so that `built_against_test_install` tests
-    #pass
-    CMAKE_PREFIX_PATH: "/opt/openmpi/"
-  dependencies:
-    - build:ubuntu1604_gcc48
-  needs:
-    - build:ubuntu1604_gcc48
-
-
 # Build on ubuntu1604 with tbb and test on ubuntu1604
 # Uses clang 5
 build:ubuntu1604_clang5:
--- a/.gitlab/ci/ubuntu1804.yml
+++ b/.gitlab/ci/ubuntu1804.yml
@ -16,7 +16,7 @@ build:ubuntu1804_gcc9:
    CC: "gcc-9"
    CXX: "g++-9"
    CMAKE_BUILD_TYPE: Debug
-    VTKM_SETTINGS: "tbb+openmp+mpi+shared"
+    VTKM_SETTINGS: "tbb+openmp+mpi+shared+hdf5"

 test:ubuntu1804_gcc9:
  tags:
@ -220,5 +220,3 @@ test:ubuntu1804_kokkos:
    - build:ubuntu1804_kokkos
  needs:
    - build:ubuntu1804_kokkos
-  variables:
-    CUDA_LAUNCH_BLOCKING: "1"
--- a/.gitlab/ci/windows10.yml
+++ b/.gitlab/ci/windows10.yml
@ -1,10 +1,27 @@
+.windows_build:
+  variables:
+    # Note that shell runners only support runners with a single
+    # concurrency level. We can't use `$CI_CONCURRENCY_ID` because this may
+    # change between the build and test stages which CMake doesn't support.
+    # Even if we could, it could change if other runners on the machine
+    # could run at the same time, so we drop it.
+    GIT_CLONE_PATH: "$CI_BUILDS_DIR\\vtkm ci"
+
+.windows_vs2019:
+  variables:
+    VCVARSALL: "${VS160COMNTOOLS}\\..\\..\\VC\\Auxiliary\\Build\\vcvarsall.bat"
+    VCVARSPLATFORM: "x64"
+    VCVARSVERSION: "14.25"

 .cmake_build_windows: &cmake_build_windows
+  extends:
+    - .windows_build
+    - .windows_vs2019
  stage: build
  timeout:  2 hours
  interruptible: true
  before_script:
-    - .gitlab/ci/config/setup_vs_powershell.ps1
+    - Invoke-Expression -Command .gitlab/ci/config/vcvarsall.ps1
    - "cmake --version"
    - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
    - "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
@ -39,11 +56,14 @@


 .cmake_test_windows: &cmake_test_windows
+  extends:
+    - .windows_build
+    - .windows_vs2019
  stage: test
  timeout:  50 minutes
  interruptible: true
  before_script:
-    - .gitlab/ci/config/setup_vs_powershell.ps1
+    - Invoke-Expression -Command .gitlab/ci/config/vcvarsall.ps1
  script:
    #Need to use our custom ctest-latest symlink
    #This will allow us to use 3.17+ which has support
@ -55,11 +75,13 @@
 # Will have CUDA 10.2 once build issues are resolved
 build:windows_vs2019:
  tags:
+    - vtkm # Since this is a bare runner, pin to a project.
+    - nonconcurrent
    - build
-    - vtkm
    - windows
-    - vs2019
    - shell
+    - vs2019
+    - msvc-19.25
    - large-memory
  extends:
    - .cmake_build_windows
@ -73,10 +95,13 @@ build:windows_vs2019:

 test:windows_vs2019:
  tags:
+    - vtkm # Since this is a bare runner, pin to a project.
+    - nonconcurrent
    - test
-    - vtkm
    - windows
    - shell
+    - vs2019
+    - msvc-19.25
    - cuda-rt
    - turing
  extends:
--- a/CMake/VTKmCPUVectorization.cmake
+++ b/CMake/VTKmCPUVectorization.cmake
@ -77,7 +77,7 @@ endif()
 set(vec_levels none native)

 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-  #for now we presume gcc >= 4.8
+  #for now we presume gcc >= 5.4
  list(APPEND vec_levels avx avx2)

  #common flags for the avx and avx2 instructions for the gcc compiler
--- a/CMake/VTKmCompilerFlags.cmake
+++ b/CMake/VTKmCompilerFlags.cmake
@ -124,8 +124,15 @@ elseif(VTKM_COMPILER_IS_ICC)
  target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-wd1478 -wd13379>)

 elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
-  set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common -Wno-unused-function)
-  set(cuda_flags -Xcompiler=-Wall,-Wcast-align,-Wchar-subscripts,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-fno-common,-Wunused,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-function)
+  set(cxx_flags -Wall -Wcast-align -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common -Wno-unused-function)
+  set(cuda_flags -Xcompiler=-Wall,-Wcast-align,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-fno-common,-Wunused,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-function)
+
+  #Clang does not support the -Wchar-subscripts flag for warning if an array
+  #subscript has a char type.
+  if (VTKM_COMPILER_IS_GNU)
+    list(APPEND cxx_flags -Wchar-subscripts)
+    set(cuda_flags "${cuda_flags},-Wchar-subscripts")
+  endif()

  #Only add float-conversion warnings for gcc as the integer warnigns in GCC
  #include the implicit casting of all types smaller than int to ints.
--- a/CMake/VTKmDeviceAdapters.cmake
+++ b/CMake/VTKmDeviceAdapters.cmake
@ -344,6 +344,12 @@ if(VTKm_ENABLE_KOKKOS AND NOT TARGET vtkm::kokkos)
    message(STATUS "Detected Cuda arch from Kokkos: ${cuda_arch}")

    add_library(vtkm::kokkos_cuda INTERFACE IMPORTED GLOBAL)
+  elseif(HIP IN_LIST Kokkos_DEVICES)
+    cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
+    enable_language(HIP)
+    add_library(vtkm::kokkos_hip INTERFACE IMPORTED GLOBAL)
+    set_property(TARGET Kokkos::kokkoscore PROPERTY INTERFACE_COMPILE_OPTIONS "")
+    set_property(TARGET Kokkos::kokkoscore PROPERTY INTERFACE_LINK_OPTIONS "")
  endif()

  add_library(vtkm::kokkos INTERFACE IMPORTED GLOBAL)
--- a/CMake/VTKmRenderingContexts.cmake
+++ b/CMake/VTKmRenderingContexts.cmake
@ -61,6 +61,20 @@ function(vtkm_find_gl)

  if(DO_GLUT_FIND AND NOT TARGET GLUT::GLUT)
    find_package(GLUT ${GLUT_REQUIRED} ${QUIETLY})
+
+    if(APPLE AND CMAKE_VERSION VERSION_LESS 3.19.2)
+      get_target_property(lib_path GLUT::GLUT IMPORTED_LOCATION)
+      if(EXISTS "${lib_path}.tbd")
+        set_target_properties(GLUT::GLUT PROPERTIES
+          IMPORTED_LOCATION "${lib_path}.tbd")
+      endif()
+
+      get_target_property(lib_path GLUT::Cocoa IMPORTED_LOCATION)
+      if(EXISTS "${lib_path}.tbd")
+        set_target_properties(GLUT::Cocoa PROPERTIES
+          IMPORTED_LOCATION "${lib_path}.tbd")
+      endif()
+    endif()
  endif()

 endfunction()
--- a/CMake/VTKmWrappers.cmake
+++ b/CMake/VTKmWrappers.cmake
@ -33,7 +33,7 @@ function(vtkm_get_kit_name kitvar)
  # Optional second argument to get dir_prefix.
  if (${ARGC} GREATER 1)
    set(${ARGV1} "${dir_prefix}" PARENT_SCOPE)
-  endif (${ARGC} GREATER 1)
+  endif ()
 endfunction(vtkm_get_kit_name)

 #-----------------------------------------------------------------------------
@ -327,6 +327,12 @@ function(vtkm_add_target_information uses_vtkm_target)
    endforeach()
  endif()

+  if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
+    set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "CUDA")
+  elseif(TARGET vtkm::kokkos_hip)
+    set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "HIP")
+  endif()
+
  # Validate that following:
  #   - We are building with CUDA enabled.
  #   - We are building a VTK-m library or a library that wants cross library
@ -335,7 +341,6 @@ function(vtkm_add_target_information uses_vtkm_target)
  # This is required as CUDA currently doesn't support device side calls across
  # dynamic library boundaries.
  if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
-    set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "CUDA")
    foreach(target IN LISTS targets)
      get_target_property(lib_type ${target} TYPE)
      if (TARGET vtkm::cuda)
--- a/CMake/patches/FindMPI.cmake
+++ b/CMake/patches/FindMPI.cmake
@ -0,0 +1,18 @@
+##=============================================================================
+##
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##
+##=============================================================================
+
+# This module is already included in new versions of CMake
+if(CMAKE_VERSION VERSION_LESS 3.15)
+  include(${CMAKE_CURRENT_LIST_DIR}/3.15/FindMPI.cmake)
+else()
+  include(${CMAKE_ROOT}/Modules/FindMPI.cmake)
+endif()
--- a/CMake/testing/VTKmCheckSourceInInstall.cmake
+++ b/CMake/testing/VTKmCheckSourceInInstall.cmake
@ -18,6 +18,7 @@
 #        -DVTKm_INSTALL_INCLUDE_DIR=<VTKm_INSTALL_INCLUDE_DIR>
 #        -DVTKm_ENABLE_RENDERING=<VTKm_ENABLE_RENDERING>
 #        -DVTKm_ENABLE_LOGGING=<VTKm_ENABLE_LOGGING>
+#        -DVTKm_ENABLE_HDF5_IO=<VTKm_ENABLE_HDF5_IO>
 #        -P <VTKm_SOURCE_DIR>/CMake/testing/VTKMCheckSourceInInstall.cmake
 ##

@ -39,7 +40,9 @@ endif ()
 if (NOT DEFINED VTKm_ENABLE_LOGGING)
  message(FATAL_ERROR "VTKm_ENABLE_LOGGING not defined.")
 endif ()
-
+if (NOT DEFINED VTKm_ENABLE_HDF5_IO)
+  message(FATAL_ERROR "VTKm_ENABLE_HDF5_IO not defined.")
+endif()

 include(CMakeParseArguments)
 # -----------------------------------------------------------------------------
@ -117,8 +120,12 @@ function(do_verify root_dir prefix)
    cont/ArrayHandleVirtual.h
    cont/ArrayHandleVirtual.hxx
    cont/ArrayHandleVirtualCoordinates.h
+    cont/CellLocator.h
+    cont/PointLocator.h
    cont/StorageVirtual.h
    cont/StorageVirtual.hxx
+    exec/CellLocator.h
+    exec/PointLocator.h
    )

  #by default every header in a testing directory doesn't need to be installed
@ -131,7 +138,12 @@ function(do_verify root_dir prefix)
  if(NOT VTKm_ENABLE_LOGGING)
    list(APPEND directory_exceptions thirdparty/loguru)
  endif()
-
+  if (NOT VTKm_ENABLE_HDF5_IO)
+    list(APPEND file_exceptions
+      io/ImageWriterHDF5.h
+      io/ImageReaderHDF5.h
+      )
+  endif()
  #Step 2. Verify the installed files match what headers are listed in each
  # source directory
  verify_install_per_dir("${VTKm_SOURCE_DIR}/vtkm"
--- a/CMake/testing/VTKmTestInstall.cmake
+++ b/CMake/testing/VTKmTestInstall.cmake
@ -17,6 +17,7 @@ function(vtkm_test_install )
      "-DVTKm_INSTALL_INCLUDE_DIR=${VTKm_INSTALL_INCLUDE_DIR}"
      "-DVTKm_ENABLE_RENDERING=${VTKm_ENABLE_RENDERING}"
      "-DVTKm_ENABLE_LOGGING=${VTKm_ENABLE_LOGGING}"
+      "-DVTKm_ENABLE_HDF5_IO=${VTKm_ENABLE_HDF5_IO}"
      )

    #By having this as separate tests using fixtures, it will allow us in
--- a/CMake/testing/VTKmTestWrappers.cmake
+++ b/CMake/testing/VTKmTestWrappers.cmake
@ -47,9 +47,15 @@ function(vtkm_create_test_executable
  vtkm_add_drop_unused_function_flags(${prog})
  target_compile_definitions(${prog} PRIVATE ${defines})

-  #if all backends are enabled, we can use cuda compiler to handle all possible backends.
+  #determine if we have a device that requires a separate compiler enabled
+  set(device_lang_enabled FALSE)
+  if( (TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda) OR (TARGET vtkm::kokkos_hip))
+    set(device_lang_enabled TRUE)
+  endif()
+
+  #if all backends are enabled, we can use the device compiler to handle all possible backends.
  set(device_sources)
-  if(((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda)) AND enable_all_backends)
+  if(device_lang_enabled AND enable_all_backends)
    set(device_sources ${sources})
  endif()
  vtkm_add_target_information(${prog} DEVICE_SOURCES ${device_sources})
@ -62,7 +68,7 @@ function(vtkm_create_test_executable
  set_property(TARGET ${prog} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
  set_property(TARGET ${prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})

-  target_link_libraries(${prog} PRIVATE vtkm_cont ${libraries})
+  target_link_libraries(${prog} PRIVATE vtkm_cont_testing ${libraries})

  if(use_job_pool)
    vtkm_setup_job_pool()
@ -178,6 +184,9 @@ function(vtkm_unit_tests)
  # Add the path to the location where regression test images are to be stored
  list(APPEND VTKm_UT_TEST_ARGS "--baseline-dir=${VTKm_SOURCE_DIR}/data/baseline")

+  # Add the path to the location where generated regression test images should be written
+  list(APPEND VTKm_UT_TEST_ARGS "--write-dir=${VTKm_BINARY_DIR}")
+
  if(VTKm_UT_MPI)
    if (VTKm_ENABLE_MPI)
      vtkm_create_test_executable(
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -12,6 +12,14 @@
 cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
 project (VTKm)

+# We only allow c++14
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# When using C++14 support make sure you use the standard C++ extensions rather
+# than compiler-specific versions of the extensions (to preserve portability).
+set(CMAKE_CXX_EXTENSIONS OFF)
+
 # Update module path
 set(VTKm_CMAKE_MODULE_PATH ${VTKm_SOURCE_DIR}/CMake)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${VTKm_CMAKE_MODULE_PATH})
@ -92,6 +100,11 @@ endif()
 vtkm_option(VTKm_USE_DOUBLE_PRECISION "Use double precision for floating point calculations" OFF)
 vtkm_option(VTKm_USE_64BIT_IDS "Use 64-bit indices." ON)

+vtkm_option(VTKm_ENABLE_HDF5_IO "Enable HDF5 support" OFF)
+if (VTKm_ENABLE_HDF5_IO)
+  find_package(HDF5 REQUIRED COMPONENTS HL)
+endif()
+
 # VTK-m will turn on logging by default, but will set the default
 # logging level to WARN.  This option should not be visible by default
 # in the GUI, as ERROR and WARN level logging should not interfere
@ -149,6 +162,7 @@ mark_as_advanced(
  VTKm_ENABLE_LOGGING
  VTKm_NO_ASSERT
  VTKm_NO_ASSERT_CUDA
+  VTKm_NO_ASSERT_HIP
  VTKm_INSTALL_ONLY_LIBRARIES
  VTKm_HIDE_PRIVATE_SYMBOLS
  VTKm_ENABLE_DEVELOPER_FLAGS
@ -157,9 +171,6 @@ mark_as_advanced(
  )

 #-----------------------------------------------------------------------------
-# When using C++11 support make sure you use the standard C++ extensions rather
-# than compiler-specific versions of the extensions (to preserve portability).
-set(CMAKE_CXX_EXTENSIONS Off)

 # Setup default build types
 include(VTKmBuildType)
@ -217,7 +228,7 @@ if (VTKm_ENABLE_TESTING)
  # Setup compiler flags for dynamic analysis if needed
  include(testing/VTKmCompilerDynamicAnalysisFlags)

-endif (VTKm_ENABLE_TESTING)
+endif()

 #-----------------------------------------------------------------------------
 # Check basic type sizes.
@ -283,9 +294,14 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
    FILES
      ${VTKm_SOURCE_DIR}/CMake/VTKmCMakeBackports.cmake
      ${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
-      ${VTKm_SOURCE_DIR}/CMake/patches/3.15/FindMPI.cmake
+      ${VTKm_SOURCE_DIR}/CMake/patches/FindMPI.cmake
    DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
    )
+  install(
+    FILES
+      ${VTKm_SOURCE_DIR}/CMake/patches/3.15/FindMPI.cmake
+    DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}/3.15
+    )

  # Install support files.
  install(
--- a/README.md
+++ b/README.md
@ -64,7 +64,7 @@ effort.
 VTK-m Requires:

  + C++11 Compiler. VTK-m has been confirmed to work with the following
-      + GCC 4.8+
+      + GCC 5.4+
      + Clang 5.0+
      + XCode 5.0+
      + MSVC 2015+
@ -103,7 +103,7 @@ Optional dependencies are:

 VTK-m has been tested on the following configurations:c
  + On Linux
-      + GCC 4.8.5, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0
+      + GCC 5.4.0, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0
      + CMake 3.12, 3.13, 3.16, 3.17
      + CUDA 9.2, 10.2, 11.0, 11.1 
      + TBB 4.4 U2, 2017 U7
--- a/Utilities/CI/reproduce_ci_env.py
+++ b/Utilities/CI/reproduce_ci_env.py
@ -201,11 +201,8 @@ ENV GITLAB_CI=1 \
 COPY . /src
 ENV $gitlab_env
 WORKDIR /src
-#Let git fix issues from copying across OS (such as windows EOL)
-#Note that this will remove any changes not committed.
 RUN echo "$before_script || true" >> /setup-gitlab-env.sh && \
    echo "$script || true" >> /run-gitlab-stage.sh && \
-    git reset --hard && \
    bash /setup-gitlab-env.sh
 ''')

--- a/benchmarking/BenchmarkDeviceAdapter.cxx
+++ b/benchmarking/BenchmarkDeviceAdapter.cxx
@ -20,6 +20,7 @@
 #include <vtkm/cont/Timer.h>

 #include <vtkm/worklet/StableSortIndices.h>
+#include <vtkm/worklet/WorkletMapField.h>

 #include <algorithm>
 #include <cmath>
--- a/benchmarking/BenchmarkFieldAlgorithms.cxx
+++ b/benchmarking/BenchmarkFieldAlgorithms.cxx
@ -8,13 +8,13 @@
 //  PURPOSE.  See the above copyright notice for more information.
 //============================================================================

+#include <vtkm/ImplicitFunction.h>
 #include <vtkm/Math.h>
 #include <vtkm/VectorAnalysis.h>

 #include <vtkm/cont/ArrayHandle.h>
 #include <vtkm/cont/ArrayHandleMultiplexer.h>
 #include <vtkm/cont/CellSetStructured.h>
-#include <vtkm/cont/ImplicitFunctionHandle.h>
 #include <vtkm/cont/Initialize.h>
 #include <vtkm/cont/Invoker.h>
 #include <vtkm/cont/Timer.h>
@ -226,20 +226,20 @@ public:
  using ExecutionSignature = void(_1, _2, _3, _4);
  using InputDomain = _1;

-  template <typename WeightType, typename T, typename S, typename D>
+  template <typename WeightType, typename T, typename S>
  VTKM_EXEC void operator()(const vtkm::Id2& low_high,
                            const WeightType& weight,
-                            const vtkm::exec::ExecutionWholeArrayConst<T, S, D>& inPortal,
+                            const vtkm::exec::ExecutionWholeArrayConst<T, S>& inPortal,
                            T& result) const
  {
    //fetch the low / high values from inPortal
    result = vtkm::Lerp(inPortal.Get(low_high[0]), inPortal.Get(low_high[1]), weight);
  }

-  template <typename WeightType, typename T, typename S, typename D, typename U>
+  template <typename WeightType, typename T, typename S, typename U>
  VTKM_EXEC void operator()(const vtkm::Id2&,
                            const WeightType&,
-                            const vtkm::exec::ExecutionWholeArrayConst<T, S, D>&,
+                            const vtkm::exec::ExecutionWholeArrayConst<T, S>&,
                            U&) const
  {
    //the inPortal and result need to be the same type so this version only
@ -248,50 +248,35 @@ public:
  }
 };

-template <typename ImplicitFunction>
 class EvaluateImplicitFunction : public vtkm::worklet::WorkletMapField
 {
 public:
-  using ControlSignature = void(FieldIn, FieldOut);
-  using ExecutionSignature = void(_1, _2);
+  using ControlSignature = void(FieldIn, FieldOut, ExecObject);
+  using ExecutionSignature = void(_1, _2, _3);

-  EvaluateImplicitFunction(const ImplicitFunction* function)
-    : Function(function)
+  template <typename VecType, typename ScalarType, typename FunctionType>
+  VTKM_EXEC void operator()(const VecType& point,
+                            ScalarType& val,
+                            const FunctionType& function) const
  {
+    val = function.Value(point);
  }
-
-  template <typename VecType, typename ScalarType>
-  VTKM_EXEC void operator()(const VecType& point, ScalarType& val) const
-  {
-    val = this->Function->Value(point);
-  }
-
-private:
-  const ImplicitFunction* Function;
 };

-template <typename T1, typename T2>
 class Evaluate2ImplicitFunctions : public vtkm::worklet::WorkletMapField
 {
 public:
-  using ControlSignature = void(FieldIn, FieldOut);
-  using ExecutionSignature = void(_1, _2);
+  using ControlSignature = void(FieldIn, FieldOut, ExecObject, ExecObject);
+  using ExecutionSignature = void(_1, _2, _3, _4);

-  Evaluate2ImplicitFunctions(const T1* f1, const T2* f2)
-    : Function1(f1)
-    , Function2(f2)
+  template <typename VecType, typename ScalarType, typename FType1, typename FType2>
+  VTKM_EXEC void operator()(const VecType& point,
+                            ScalarType& val,
+                            const FType1& function1,
+                            const FType2& function2) const
  {
+    val = function1.Value(point) + function2.Value(point);
  }
-
-  template <typename VecType, typename ScalarType>
-  VTKM_EXEC void operator()(const VecType& point, ScalarType& val) const
-  {
-    val = this->Function1->Value(point) + this->Function2->Value(point);
-  }
-
-private:
-  const T1* Function1;
-  const T2* Function2;
 };

 struct PassThroughFunctor
@ -821,7 +806,7 @@ static ImplicitFunctionBenchData MakeImplicitFunctionBenchData()

 void BenchImplicitFunction(::benchmark::State& state)
 {
-  using EvalWorklet = EvaluateImplicitFunction<vtkm::Sphere>;
+  using EvalWorklet = EvaluateImplicitFunction;

  const vtkm::cont::DeviceAdapterId device = Config.Device;

@ -833,10 +818,7 @@ void BenchImplicitFunction(::benchmark::State& state)
    state.SetLabel(desc.str());
  }

-  vtkm::cont::Token token;
-  auto handle = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere1);
-  auto function = static_cast<const vtkm::Sphere*>(handle.PrepareForExecution(device, token));
-  EvalWorklet eval(function);
+  EvalWorklet eval;

  vtkm::cont::Timer timer{ device };
  vtkm::cont::Invoker invoker{ device };
@ -845,7 +827,7 @@ void BenchImplicitFunction(::benchmark::State& state)
  {
    (void)_;
    timer.Start();
-    invoker(eval, data.Points, data.Result);
+    invoker(eval, data.Points, data.Result, data.Sphere1);
    timer.Stop();

    state.SetIterationTime(timer.GetElapsedTime());
@ -855,7 +837,7 @@ VTKM_BENCHMARK(BenchImplicitFunction);

 void BenchVirtualImplicitFunction(::benchmark::State& state)
 {
-  using EvalWorklet = EvaluateImplicitFunction<vtkm::ImplicitFunction>;
+  using EvalWorklet = EvaluateImplicitFunction;

  const vtkm::cont::DeviceAdapterId device = Config.Device;

@ -867,9 +849,7 @@ void BenchVirtualImplicitFunction(::benchmark::State& state)
    state.SetLabel(desc.str());
  }

-  vtkm::cont::Token token;
-  auto sphere = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere1);
-  EvalWorklet eval(sphere.PrepareForExecution(device, token));
+  EvalWorklet eval;

  vtkm::cont::Timer timer{ device };
  vtkm::cont::Invoker invoker{ device };
@ -878,7 +858,7 @@ void BenchVirtualImplicitFunction(::benchmark::State& state)
  {
    (void)_;
    timer.Start();
-    invoker(eval, data.Points, data.Result);
+    invoker(eval, data.Points, data.Result, data.Sphere1);
    timer.Stop();

    state.SetIterationTime(timer.GetElapsedTime());
@ -888,7 +868,7 @@ VTKM_BENCHMARK(BenchVirtualImplicitFunction);

 void Bench2ImplicitFunctions(::benchmark::State& state)
 {
-  using EvalWorklet = Evaluate2ImplicitFunctions<vtkm::Sphere, vtkm::Sphere>;
+  using EvalWorklet = Evaluate2ImplicitFunctions;

  const vtkm::cont::DeviceAdapterId device = Config.Device;

@ -900,12 +880,7 @@ void Bench2ImplicitFunctions(::benchmark::State& state)
    state.SetLabel(desc.str());
  }

-  vtkm::cont::Token token;
-  auto h1 = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere1);
-  auto h2 = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere2);
-  auto f1 = static_cast<const vtkm::Sphere*>(h1.PrepareForExecution(device, token));
-  auto f2 = static_cast<const vtkm::Sphere*>(h2.PrepareForExecution(device, token));
-  EvalWorklet eval(f1, f2);
+  EvalWorklet eval;

  vtkm::cont::Timer timer{ device };
  vtkm::cont::Invoker invoker{ device };
@ -914,7 +889,7 @@ void Bench2ImplicitFunctions(::benchmark::State& state)
  {
    (void)_;
    timer.Start();
-    invoker(eval, data.Points, data.Result);
+    invoker(eval, data.Points, data.Result, data.Sphere1, data.Sphere2);
    timer.Stop();

    state.SetIterationTime(timer.GetElapsedTime());
@ -922,40 +897,6 @@ void Bench2ImplicitFunctions(::benchmark::State& state)
 }
 VTKM_BENCHMARK(Bench2ImplicitFunctions);

-void Bench2VirtualImplicitFunctions(::benchmark::State& state)
-{
-  using EvalWorklet = Evaluate2ImplicitFunctions<vtkm::ImplicitFunction, vtkm::ImplicitFunction>;
-
-  const vtkm::cont::DeviceAdapterId device = Config.Device;
-
-  auto data = MakeImplicitFunctionBenchData();
-
-  {
-    std::ostringstream desc;
-    desc << data.Points.GetNumberOfValues() << " points";
-    state.SetLabel(desc.str());
-  }
-
-  vtkm::cont::Token token;
-  auto s1 = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere1);
-  auto s2 = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere2);
-  EvalWorklet eval(s1.PrepareForExecution(device, token), s2.PrepareForExecution(device, token));
-
-  vtkm::cont::Timer timer{ device };
-  vtkm::cont::Invoker invoker{ device };
-
-  for (auto _ : state)
-  {
-    (void)_;
-    timer.Start();
-    invoker(eval, data.Points, data.Result);
-    timer.Stop();
-
-    state.SetIterationTime(timer.GetElapsedTime());
-  }
-}
-VTKM_BENCHMARK(Bench2VirtualImplicitFunctions);
-
 } // end anon namespace

 int main(int argc, char* argv[])
--- a/benchmarking/BenchmarkFilters.cxx
+++ b/benchmarking/BenchmarkFilters.cxx
@ -347,10 +347,11 @@ void BenchContour(::benchmark::State& state)
 {
  const vtkm::cont::DeviceAdapterId device = Config.Device;

-  const vtkm::Id numIsoVals = static_cast<vtkm::Id>(state.range(0));
-  const bool mergePoints = static_cast<bool>(state.range(1));
-  const bool normals = static_cast<bool>(state.range(2));
-  const bool fastNormals = static_cast<bool>(state.range(3));
+  const bool isStructured = static_cast<vtkm::Id>(state.range(0));
+  const vtkm::Id numIsoVals = static_cast<vtkm::Id>(state.range(1));
+  const bool mergePoints = static_cast<bool>(state.range(2));
+  const bool normals = static_cast<bool>(state.range(3));
+  const bool fastNormals = static_cast<bool>(state.range(4));

  vtkm::filter::Contour filter;
  filter.SetActiveField(PointScalarsName, vtkm::cont::Field::Association::POINTS);
@ -376,11 +377,14 @@ void BenchContour(::benchmark::State& state)
  filter.SetComputeFastNormalsForUnstructured(fastNormals);

  vtkm::cont::Timer timer{ device };
+
+  vtkm::cont::DataSet input = isStructured ? InputDataSet : UnstructuredInputDataSet;
+
  for (auto _ : state)
  {
    (void)_;
    timer.Start();
-    auto result = filter.Execute(InputDataSet);
+    auto result = filter.Execute(input);
    ::benchmark::DoNotOptimize(result);
    timer.Stop();

@ -390,13 +394,17 @@ void BenchContour(::benchmark::State& state)

 void BenchContourGenerator(::benchmark::internal::Benchmark* bm)
 {
-  bm->ArgNames({ "NIsoVals", "MergePts", "GenNormals", "FastNormals" });
+  bm->ArgNames({ "IsStructuredDataSet", "NIsoVals", "MergePts", "GenNormals", "FastNormals" });

  auto helper = [&](const vtkm::Id numIsoVals) {
-    bm->Args({ numIsoVals, 0, 0, 0 });
-    bm->Args({ numIsoVals, 1, 0, 0 });
-    bm->Args({ numIsoVals, 0, 1, 0 });
-    bm->Args({ numIsoVals, 0, 1, 1 });
+    bm->Args({ 0, numIsoVals, 0, 0, 0 });
+    bm->Args({ 0, numIsoVals, 1, 0, 0 });
+    bm->Args({ 0, numIsoVals, 0, 1, 0 });
+    bm->Args({ 0, numIsoVals, 0, 1, 1 });
+    bm->Args({ 1, numIsoVals, 0, 0, 0 });
+    bm->Args({ 1, numIsoVals, 1, 0, 0 });
+    bm->Args({ 1, numIsoVals, 0, 1, 0 });
+    bm->Args({ 1, numIsoVals, 0, 1, 1 });
  };

  helper(1);
@ -405,7 +413,7 @@ void BenchContourGenerator(::benchmark::internal::Benchmark* bm)
 }

 // :TODO: Disabled until SIGSEGV in Countour when passings field is resolved
-//VTKM_BENCHMARK_APPLY(BenchContour, BenchContourGenerator);
+VTKM_BENCHMARK_APPLY(BenchContour, BenchContourGenerator);

 void BenchExternalFaces(::benchmark::State& state)
 {
@ -767,6 +775,10 @@ struct Arg : vtkm::cont::internal::option::Arg
    bool msg)
  {
    if ((option.arg != nullptr) && (option.arg[0] != '\0'))
+    {
+      return vtkm::cont::internal::option::ARG_OK;
+    }
+    else
    {
      if (msg)
      {
@ -774,10 +786,6 @@ struct Arg : vtkm::cont::internal::option::Arg
      }
      return vtkm::cont::internal::option::ARG_ILLEGAL;
    }
-    else
-    {
-      return vtkm::cont::internal::option::ARG_OK;
-    }
  }
 };

@ -993,26 +1001,22 @@ void InitDataSet(int& argc, char** argv)
    source.SetExtent({ 0 }, { waveletDim - 1 });

    InputDataSet = source.Execute();
-
-    vtkm::cont::DataSet input = vtkm::cont::testing::MakeTestDataSet().Make2DUniformDataSet2();
-    vtkm::filter::Triangulate triangulateFilter;
-    triangulateFilter.SetFieldsToPass(
-      vtkm::filter::FieldSelection(vtkm::filter::FieldSelection::MODE_ALL));
-    UnstructuredInputDataSet = triangulateFilter.Execute(input);
-  }
-
-
-  if (tetra)
-  {
-    std::cerr << "[InitDataSet] Tetrahedralizing dataset...\n";
-    vtkm::filter::Tetrahedralize tet;
-    tet.SetFieldsToPass(vtkm::filter::FieldSelection(vtkm::filter::FieldSelection::MODE_ALL));
-    InputDataSet = tet.Execute(InputDataSet);
  }

  FindFields();
  CreateMissingFields();

+  std::cerr
+    << "[InitDataSet] Create UnstructuredInputDataSet from Tetrahedralized InputDataSet...\n";
+  vtkm::filter::Tetrahedralize tet;
+  tet.SetFieldsToPass(vtkm::filter::FieldSelection(vtkm::filter::FieldSelection::MODE_ALL));
+  UnstructuredInputDataSet = tet.Execute(InputDataSet);
+
+  if (tetra)
+  {
+    InputDataSet = UnstructuredInputDataSet;
+  }
+
  inputGenTimer.Stop();

  std::cerr << "[InitDataSet] DataSet initialization took " << inputGenTimer.GetElapsedTime()
--- a/data/baseline/5x6_7_MC_Rank0_Block0_Round1_CombinedMesh.ctm
+++ b/data/baseline/5x6_7_MC_Rank0_Block0_Round1_CombinedMesh.ctm
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5706bddc644b5b120ffbd424b3073ce989735272726de711ca8dac19b4a30ee1
+size 2653
--- a/data/baseline/contour-tangle.png
+++ b/data/baseline/contour-tangle.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:785051d9773c4a0ced2701de3499f9cd948da2a4c846a5187e30dfb5cb0783cb
+size 10830
--- a/data/baseline/contour-uniform.png
+++ b/data/baseline/contour-uniform.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d990b5f0e9ef27e4e5f87f4c62c4f9974992506521f32bd5901ac6670e71bfa
+size 9656
--- a/data/baseline/contour-wedge.png
+++ b/data/baseline/contour-wedge.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54e09a09c97a20627e54c835d2d488bc9f692ef1315122ab60241c006ab78813
+size 19742
--- a/data/baseline/point-transform.png
+++ b/data/baseline/point-transform.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1472e6002ca4ad4012e0c9f067f8254290fabe93c82713a4994ad97a7fdbdfc
+size 31218
--- a/data/baseline/split-sharp-edges.png
+++ b/data/baseline/split-sharp-edges.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ff6d72bd325ffe0fb3b22bfdc294b6d674384afd662290424bb77634202b4ef
+size 71150
--- a/data/baseline/streamline.png
+++ b/data/baseline/streamline.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24c71e8846fe62e6f6eefdb72c9729639061af80bf9d3453d35c8c6838de9174
+size 37162
--- a/data/baseline/surface-normals.png
+++ b/data/baseline/surface-normals.png
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4c905ec76e72513519515ec41cf5efd34490b98255ee7465f8b6746fcff41e5
+size 51865
--- a/data/baseline/vanc.ct_txt
+++ b/data/baseline/vanc.ct_txt
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddf65aefbd8c8fe8fb479521af7e5fa894cc94b3f890e2cc527a8df5c6e5601c
+size 728
--- a/data/data/misc/5x6_7_MC_Rank0_Block0_Round1_BeforeCombineMesh1.ctm
+++ b/data/data/misc/5x6_7_MC_Rank0_Block0_Round1_BeforeCombineMesh1.ctm
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea0a0903fce2b7b42023ca0a2bdc008781a61fa74f75b2b107e6d0788c404551
+size 1441
--- a/data/data/misc/5x6_7_MC_Rank0_Block0_Round1_BeforeCombineMesh2.ctm
+++ b/data/data/misc/5x6_7_MC_Rank0_Block0_Round1_BeforeCombineMesh2.ctm
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58aed19216ce91b6c9bc7c0d8ee31c1062405ad6f5a4a977b49f213e2ce81307
+size 1518
--- a/data/data/rectilinear/vanc.vtk
+++ b/data/data/rectilinear/vanc.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc4033483646c7e3c7be921ca4f821d1277c0d6d79063b1565dfb78c4766bf4d
+size 1234
--- a/data/data/uniform/StreamlineTestDataSet.vtk
+++ b/data/data/uniform/StreamlineTestDataSet.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2c0b3788197a48a305fc049f54d66c94c20298e617ef06dbe4fe0c2043f7366
+size 3590
--- a/data/data/unstructured/PointTransformTestDataSet.vtk
+++ b/data/data/unstructured/PointTransformTestDataSet.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7191ea7dec00129cb262239a508aeba4bb9387e581adfa2049211f4514ee4130
+size 1020
--- a/data/data/unstructured/SplitSharpEdgesTestDataSet.vtk
+++ b/data/data/unstructured/SplitSharpEdgesTestDataSet.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7b7e73f60f3572e19178aa55fcd32cafb5c5823062241d28aa37d82b0031a2a
+size 1145
--- a/data/data/unstructured/SurfaceNormalsTestDataSet.vtk
+++ b/data/data/unstructured/SurfaceNormalsTestDataSet.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90aed1ed3c3eba58f1b0b1573b09e8c024e48f5ca822e9f88b0c1ff6593a978f
+size 693
--- a/docs/CI-README.md
+++ b/docs/CI-README.md
@ -118,10 +118,9 @@ compilation of VTK-m. Instead of doing the compilation, instead you will be give
 ./reproduce_ci_env.py run rhel8
 ```

-To compile VTK-m from the the interactive shell you would do the following:
+To compile VTK-m from the the interactive shell with the settings of the CI job you would do the following:
 ```
-> src]# cd build/
-> build]# cmake --build .
+> src]# bash /run-gitlab-stage.sh
 ```

 # How to Add/Update Kitware Gitlab CI
--- a/docs/changelog/array-extract-component.md
+++ b/docs/changelog/array-extract-component.md
@ -0,0 +1,226 @@
+# Extract component arrays from unknown arrays
+
+One of the problems with the data structures of VTK-m is that non-templated
+classes like `DataSet`, `Field`, and `UnknownArrayHandle` (formally
+`VariantArrayHandle`) internally hold an `ArrayHandle` of a particular type
+that has to be cast to the correct task before it can be reasonably used.
+That in turn is problematic because the list of possible `ArrayHandle`
+types is very long.
+
+At one time we were trying to compensate for this by using
+`ArrayHandleVirtual`. However, for technical reasons this class is
+infeasible for every use case of VTK-m and has been deprecated. Also, this
+was only a partial solution since using it still required different code
+paths for, say, handling values of `vtkm::Float32` and `vtkm::Vec3f_32`
+even though both are essentially arrays of 32-bit floats.
+
+The extract component feature compensates for this problem by allowing you
+to extract the components from an `ArrayHandle`. This feature allows you to
+create a single code path to handle `ArrayHandle`s containing scalars or
+vectors of any size. Furthermore, when you extract a component from an
+array, the storage gets normalized so that one code path covers all storage
+types.
+
+## `ArrayExtractComponent`
+
+The basic enabling feature is a new function named `ArrayExtractComponent`.
+This function takes takes an `ArrayHandle` and an index to a component. It
+then returns an `ArrayHandleStride` holding the selected component of each
+entry in the original array.
+
+We will get to the structure of `ArrayHandleStride` later. But the
+important part is that `ArrayHandleStride` does _not_ depend on the storage
+type of the original `ArrayHandle`. That means whether you extract a
+component from `ArrayHandleBasic`, `ArrayHandleSOA`,
+`ArrayHandleCartesianProduct`, or any other type, you get back the same
+`ArrayHandleStride`. Likewise, regardless of whether the input
+`ArrayHandle` has a `ValueType` of `FloatDefault`, `Vec2f`, `Vec3f`, or any
+other `Vec` of a default float, you get the same `ArrayHandleStride`. Thus,
+you can see how this feature can dramatically reduce code paths if used
+correctly.
+
+It should be noted that `ArrayExtractComponent` will (logically) flatten
+the `ValueType` before extracting the component. Thus, nested `Vec`s such
+as `Vec<Vec3f, 3>` will be treated as a `Vec<FloatDefault, 9>`. The
+intention is so that the extracted component will always be a basic C type.
+For the purposes of this document when we refer to the "component type", we
+really mean the base component type.
+
+Different `ArrayHandle` implementations provide their own implementations
+for `ArrayExtractComponent` so that the component can be extracted without
+deep copying all the data. We will visit how `ArrayHandleStride` can
+represent different data layouts later, but first let's go into the main
+use case.
+
+## Extract components from `UnknownArrayHandle`
+
+The principle use case for `ArrayExtractComponent` is to get an
+`ArrayHandle` from an unknown array handle without iterating over _every_
+possible type. (Rather, we iterate over a smaller set of types.) To
+facilitate this, an `ExtractComponent` method has been added to
+`UnknownArrayHandle`.
+
+To use `UnknownArrayHandle::ExtractComponent`, you must give it the
+component type. You can check for the correct component type by using the
+`IsBaseComponentType` method. The method will then return an
+`ArrayHandleStride` for the component type specified.
+
+### Example
+
+As an example, let's say you have a worklet, `FooWorklet`, that does some
+per component operation on an array. Furthermore, let's say that you want
+to implement a function that, to the best of your ability, can apply
+`FooWorklet` on an array of any type. This function should be pre-compiled
+into a library so it doesn't have to be compiled over and over again.
+(`MapFieldPermutation` and `MapFieldMergeAverage` are real and important
+examples that have this behavior.)
+
+Without the extract component feature, the implementation might look
+something like this (many practical details left out):
+
+``` cpp
+struct ApplyFooFunctor
+{
+  template <typename ArrayType>
+  void operator()(const ArrayType& input, vtkm::cont::UnknownArrayHandle& output) const
+  {
+    ArrayType outputArray;
+	vtkm::cont::Invoke invoke;
+	invoke(FooWorklet{}, input, outputArray);
+	output = outputArray;
+  }
+};
+
+vtkm::cont::UnknownArrayHandle ApplyFoo(const vtkm::cont::UnknownArrayHandle& input)
+{
+  vtkm::cont::UnknownArrayHandle output;
+  input.CastAndCallForTypes<vtkm::TypeListAll, VTKM_DEFAULT_STORAGE_LIST_TAG>(
+    ApplyFooFunctor{}, output);
+  return output;
+}
+```
+
+Take a look specifically at the `CastAndCallForTypes` call near the bottom
+of this example. It calls for all types in `vtkm::TypeListAll`, which is
+about 40 instances. Then, it needs to be called for any type in the desired
+storage list. This could include basic arrays, SOA arrays, and lots of
+other specialized types. It would be expected for this code to generate
+over 100 paths for `ApplyFooFunctor`. This in turn contains a worklet
+invoke, which is not a small amount of code.
+
+Now consider how we can use the `ExtractComponent` feature to reduce the
+code paths:
+
+``` cpp
+struct ApplyFooFunctor
+{
+  template <typename T>
+  void operator()(T,
+                  const vtkm::cont::UnknownArrayHandle& input, 
+				  cont vtkm::cont::UnknownArrayHandle& output) const
+  {
+    if (!input.IsBasicComponentType<T>()) { return; }
+	VTKM_ASSERT(output.IsBasicComponentType<T>());
+
+	vtkm::cont::Invoke invoke;
+	invoke(FooWorklet{}, input.ExtractComponent<T>(), output.ExtractComponent<T>());
+  }
+};
+
+vtkm::cont::UnknownArrayHandle ApplyFoo(const vtkm::cont::UnknownArrayHandle& input)
+{
+  vtkm::cont::UnknownArrayHandle output = input.NewInstanceBasic();
+  output.Allocate(input.GetNumberOfValues());
+  vtkm::cont::ListForEach(ApplyFooFunctor{}, vtkm::TypeListScalarAll{}, input, output);
+  return output;
+}
+```
+
+The number of lines of code is about the same, but take a look at the
+`ListForEach` (which replaces the `CastAndCallForTypes`). This calling code
+takes `TypeListScalarAll` instead of `TypeListAll`, which reduces the
+instances created from around 40 to 13 (every basic C type). It is also no
+longer dependent on the storage, so these 13 instances are it. As an
+example of potential compile savings, changing the implementation of the
+`MapFieldMergePermutation` and `MapFieldMergeAverage` functions in this way
+reduced the filters_common library (on Mac, Debug build) by 24 MB (over a
+third of the total size).
+
+Another great advantage of this approach is that even though it takes less
+time to compile and generates less code, it actually covers more cases.
+Have an array containg values of `Vec<short, 13>`? No problem. The values
+were actually stored in an `ArrayHandleReverse`? It will still work.
+
+## `ArrayHandleStride`
+
+This functionality is made possible with the new `ArrayHandleStride`. This
+array behaves much like `ArrayHandleBasic`, except that it contains an
+_offset_ parameter to specify where in the buffer array to start reading
+and a _stride_ parameter to specify how many entries to skip for each
+successive entry. `ArrayHandleStride` also has optional parameters
+`divisor` and `modulo` that allow indices to be repeated at regular
+intervals.
+
+Here are how `ArrayHandleStride` extracts components from several common
+arrays. For each of these examples, we assume that the `ValueType` of the
+array is `Vec<T, N>`. They are each extracting _component_.
+
+### Extracting from `ArrayHandleBasic`
+
+When extracting from an `ArrayHandleBasic`, we just need to start at the
+proper component and skip the length of the `Vec`.
+
+* _offset_: _component_
+* _stride_: `N`
+
+### Extracting from `ArrayHandleSOA`
+
+Since each component is held in a separate array, they are densly packed.
+Each component could be represented by `ArrayHandleBasic`, but of course we
+use `ArrayHandleStride` to keep the type consistent.
+
+* _offset_: 0
+* _stride_: 1
+
+### Extracting from `ArrayHandleCartesianProduct`
+
+This array is the basic reason for implementing the _divisor_ and _modulo_
+parameters. Each of the 3 components have different parameters, which are
+the following (given that _dims_[3] captures the size of the 3 arrays for
+each dimension).
+
+* _offset_: 0
+* _stride_: 1
+* case _component_ == 0
+  * _divisor_: _ignored_
+  * _modulo_: _dims_[0]
+* case _component_ == 1
+  * _divisor_: _dims_[0]
+  * _modulo_: _dims_[1]
+* case _component_ == 2
+  * _divisor_: _dims_[0]
+  * _modulo_: _ignored_
+
+### Extracting from `ArrayHandleUniformPointCoordinates`
+
+This array cannot be represented directly because it is fully implicit.
+However, it can be trivially converted to `ArrayHandleCartesianProduct` in
+typically very little memory. (In fact, EAVL always represented uniform
+point coordinates by explicitly storing a Cartesian product.) Thus, for
+very little overhead the `ArrayHandleStride` can be created.
+
+## Runtime overhead of extracting components
+
+These benefits come at a cost, but not a large one. The "biggest" cost is
+the small cost of computing index arithmetic for each access into
+`ArrayHandleStride`. To make this as efficient as possible, there are
+conditions that skip over the modulo and divide steps if they are not
+necessary. (Integer modulo and divide tend to take much longer than
+addition and multiplication.) It is for this reason that we probably do not
+want to use this method all the time.
+
+Another cost is the fact that not every `ArrayHandle` can be represented by
+`ArrayHandleStride` directly without copying. If you ask to extract a
+component that cannot be directly represented, it will be copied into a
+basic array, which is not great. To make matters worse, for technical
+reasons this copy happens on the host rather than the device.
--- a/docs/changelog/array-handle-offsets-to-num-components.md
+++ b/docs/changelog/array-handle-offsets-to-num-components.md
@ -0,0 +1,29 @@
+# Create `ArrayHandleOffsetsToNumComponents`
+
+`ArrayHandleOffsetsToNumComponents` is a fancy array that takes an array of
+offsets and converts it to an array of the number of components for each
+packed entry.
+
+It is common in VTK-m to pack small vectors of variable sizes into a single
+contiguous array. For example, cells in an explicit cell set can each have
+a different amount of vertices (triangles = 3, quads = 4, tetra = 4, hexa =
+8, etc.). Generally, to access items in this list, you need an array of
+components in each entry and the offset for each entry. However, if you
+have just the array of offsets in sorted order, you can easily derive the
+number of components for each entry by subtracting adjacent entries. This
+works best if the offsets array has a size that is one more than the number
+of packed vectors with the first entry set to 0 and the last entry set to
+the total size of the packed array (the offset to the end).
+
+When packing data of this nature, it is common to start with an array that
+is the number of components. You can convert that to an offsets array using
+the `vtkm::cont::ConvertNumComponentsToOffsets` function. This will create
+an offsets array with one extra entry as previously described. You can then
+throw out the original number of components array and use the offsets with
+`ArrayHandleOffsetsToNumComponents` to represent both the offsets and num
+components while storing only one array.
+
+This replaces the use of `ArrayHandleDecorator` in `CellSetExplicit`.
+The two implementation should do the same thing, but the new
+`ArrayHandleOffsetsToNumComponents` should be less complex for
+compilers.
--- a/docs/changelog/array-range-compute-unknown.md
+++ b/docs/changelog/array-range-compute-unknown.md
@ -0,0 +1,18 @@
+# `ArrayRangeCompute` works on any array type without compiling device code
+
+Originally, `ArrayRangeCompute` required you to know specifically the
+`ArrayHandle` type (value type and storage type) and to compile using any
+device compiler. The method is changed to include only overloads that have
+precompiled versions of `ArrayRangeCompute`.
+
+Additionally, an `ArrayRangeCompute` overload that takes an
+`UnknownArrayHandle` has been added. In addition to allowing you to compute
+the range of arrays of unknown types, this implementation of
+`ArrayRangeCompute` serves as a fallback for `ArrayHandle` types that are
+not otherwise explicitly supported.
+
+If you really want to make sure that you compute the range directly on an
+`ArrayHandle` of a particular type, you can include
+`ArrayRangeComputeTemplate.h`, which contains a templated overload of
+`ArrayRangeCompute` that directly computes the range of an `ArrayHandle`.
+Including this header requires compiling for device code.
--- a/docs/changelog/buffer.md
+++ b/docs/changelog/buffer.md
@ -50,7 +50,7 @@ As explained below, `ArrayHandle` holds some fixed number of `Buffer`
 objects. (The number can be zero for implicit `ArrayHandle`s.) Because all
 the interaction with the devices happen through `Buffer`, it will no longer
 be necessary to compile any reference to `ArrayHandle` for devices (e.g.
-you won’t have to use nvcc just because the code links `ArrayHandle.h`).
+you won't have to use nvcc just because the code links `ArrayHandle.h`).

 ## Storage

@ -121,12 +121,12 @@ With the current version of `ArrayHandle`, if you want to take data from
 one `ArrayHandle` you pretty much have to create a special template to wrap
 another `ArrayHandle` around that. With this new design, it is possible to
 take data from one `ArrayHandle` and give it to another `ArrayHandle` of a
-completely different type. You can’t do this willy-nilly since different
+completely different type. You can't do this willy-nilly since different
 `ArrayHandle` types will interpret buffers differently. But there can be
 some special important use cases.

 One such case could be an `ArrayHandle` that provides strided access to a
-buffer. (Let’s call it `ArrayHandleStride`.) The idea is that it interprets
+buffer. (Let's call it `ArrayHandleStride`.) The idea is that it interprets
 the buffer as an array for a particular type (like a basic `ArrayHandle`)
 but also defines a stride, skip, and repeat so that given an index it looks
 up the value `((index / skip) % repeat) * stride`. The point is that it can
@ -165,7 +165,7 @@ this redesign, the data need to be stored in some fixed number of memory
 buffers.

 This is a pretty open requirement. I suspect most data formats will be
-storable in this. The user’s guide has an example of data stored in a
+storable in this. The user's guide has an example of data stored in a
 `std::deque` that will not be representable. But that is probably not a
 particularly practical example.

--- a/docs/changelog/copy-unknownarrayhandle.md
+++ b/docs/changelog/copy-unknownarrayhandle.md
@ -0,0 +1,9 @@
+# Precompiled `ArrayCopy` for `UnknownArrayHandle`
+
+Previously, in order to copy an `UnknownArrayHandle`, you had to specify
+some subset of types and then specially compile a copy for each potential
+type. With the new ability to extract a component from an
+`UnknownArrayHandle`, it is now feasible to precompile copying an
+`UnknownArrayHandle` to another array. This greatly reduces the overhead of
+using `ArrayCopy` to copy `UnknownArrayHandle`s while simultaneously
+increasing the likelihood that the copy will be successful.
--- a/docs/changelog/recombine-vec-array.md
+++ b/docs/changelog/recombine-vec-array.md
@ -0,0 +1,26 @@
+# Recombine extracted component arrays from unknown arrays
+
+Building on the recent capability to [extract component arrays from unknown
+arrays](array-extract-component.md), there is now also the ability to
+recombine these extracted arrays to a single `ArrayHandle`. It might seem
+counterintuitive to break an `ArrayHandle` into component arrays and then
+combine the component arrays back into a single `ArrayHandle`, but this is
+a very handy way to run algorithms without knowing the exact `ArrayHandle`
+type.
+
+Recall that when extracting a component array from an `UnknownArrayHandle`
+you only need to know the base component of the value type of the contained
+`ArrayHandle`. That makes extracting a component array independent from
+either the size of any `Vec` value type and any storage type.
+
+The added `UnknownArrayHandle::ExtractArrayFromComponents` method allows
+you to use the functionality to transform the unknown array handle to a
+form of `ArrayHandle` that depends only on this base component type. This
+method internally uses a new `ArrayHandleRecombineVec` class, but this
+class is mostly intended for internal use by this class.
+
+As an added convenience, `UnknownArrayHandle` now also provides the
+`CastAndCallWithExtractedArray` method. This method works like other
+`CastAndCall`s except that it uses the `ExtractArrayFromComponents` feature
+to allow you to handle most `ArrayHandle` types with few template
+instances.
--- a/docs/changelog/remove-old-array-transfer.md
+++ b/docs/changelog/remove-old-array-transfer.md
@ -0,0 +1,19 @@
+# Removed old `ArrayHandle` transfer mechanism
+
+Deleted the default implementation of `ArrayTransfer`. `ArrayTransfer` is
+used with the old `ArrayHandle` style to move data between host and device.
+The new version of `ArrayHandle` does not use `ArrayTransfer` at all
+because this functionality is wrapped in `Buffer` (where it can exist in a
+precompiled library).
+
+Once all the old `ArrayHandle` classes are gone, this class will be removed
+completely. Although all the remaining `ArrayHandle` classes provide their
+own versions of `ArrayTransfer`, they still need the prototype to be
+defined to specialize. Thus, the guts of the default `ArrayTransfer` are
+removed and replaced with a compile error if you try to compile it.
+
+Also removed `ArrayManagerExecution`. This class was used indirectly by the
+old `ArrayHandle`, through `ArrayHandleTransfer`, to move data to and from
+a device. This functionality has been replaced in the new `ArrayHandle`s
+through the `Buffer` class (which can be compiled into libraries rather
+than make every translation unit compile their own template).
--- a/docs/changelog/soa-array-default.md
+++ b/docs/changelog/soa-array-default.md
@ -0,0 +1,22 @@
+Support `ArrayHandleSOA` as a "default" array
+
+Many programs, particularly simulations, store fields of vectors in
+separate arrays for each component. This maps to the storage of
+`ArrayHandleSOA`. The VTK-m code tends to prefer the AOS storage (which is
+what is implemented in `ArrayHandleBasic`, and the behavior of which is
+inherited from VTK). VTK-m should better support adding `ArrayHandleSOA` as
+one of the types.
+
+We now have a set of default types for Ascent that uses SOA as one of the
+basic types.
+
+Part of this change includes an intentional feature regression of
+`ArrayHandleSOA` to only support value types of `Vec`. Previously, scalar
+types were supported. However, the behavior of `ArrayHandleSOA` is exactly
+the same as `ArrayHandleBasic`, except a lot more template code has to be
+generated. That itself is not a huge deal, but because you have 2 types
+that essentially do the same thing, a lot of template code in VTK-m would
+unwind to create two separate code paths that do the same thing with the
+same data. To avoid creating those code paths, we simply make any use of
+`ArrayHandleSOA` without a `Vec` value invalid. This will prevent VTK-m
+from creating those code paths.
--- a/docs/changelog/vecflat.md
+++ b/docs/changelog/vecflat.md
@ -0,0 +1,19 @@
+# Added VecFlat class
+
+`vtkm::VecFlat` is a wrapper around a `Vec`-like class that may be a nested
+series of vectors. For example, if you run a gradient operation on a vector
+field, you are probably going to get a `Vec` of `Vec`s that looks something
+like `vtkm::Vec<vtkm::Vec<vtkm::Float32, 3>, 3>`. That is fine, but what if
+you want to treat the result simply as a `Vec` of size 9?
+
+The `VecFlat` wrapper class allows you to do this. Simply place the nested
+`Vec` as an argument to `VecFlat` and it will behave as a flat `Vec` class.
+(In fact, `VecFlat` is a subclass of `Vec`.) The `VecFlat` class can be
+copied to and from the nested `Vec` it is wrapping.
+
+There is a `vtkm::make_VecFlat` convenience function that takes an object
+and returns a `vtkm::VecFlat` wrapped around it.
+
+`VecFlat` works with any `Vec`-like object as well as scalar values.
+However, any type used with `VecFlat` must have `VecTraits` defined and the
+number of components must be static (i.e. known at compile time).
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -24,6 +24,7 @@ if(VTKm_ENABLE_EXAMPLES)
  add_subdirectory(histogram)
  add_subdirectory(ising)
  add_subdirectory(lagrangian)
+  add_subdirectory(logistic_map)
  add_subdirectory(mesh_quality)
  add_subdirectory(multi_backend)
  add_subdirectory(oscillator)
--- a/examples/contour_tree/ContourTreeMesh2D.cxx
+++ b/examples/contour_tree/ContourTreeMesh2D.cxx
@ -112,7 +112,7 @@ int main(int argc, char* argv[])
  vtkm::cont::Field resultField = output.GetField("saddlePeak");
  ;
  vtkm::cont::ArrayHandle<vtkm::Pair<vtkm::Id, vtkm::Id>> saddlePeak;
-  resultField.GetData().CopyTo(saddlePeak);
+  resultField.GetData().AsArrayHandle(saddlePeak);

  return 0;
 }
--- a/examples/contour_tree/ContourTreeMesh3D.cxx
+++ b/examples/contour_tree/ContourTreeMesh3D.cxx
@ -113,7 +113,7 @@ int main(int argc, char* argv[])

  vtkm::cont::Field resultField = output.GetField("saddlePeak");
  vtkm::cont::ArrayHandle<vtkm::Pair<vtkm::Id, vtkm::Id>> saddlePeak;
-  resultField.GetData().CopyTo(saddlePeak);
+  resultField.GetData().AsArrayHandle(saddlePeak);

  return 0;
 }
--- a/examples/contour_tree_augmented/ContourTreeApp.cxx
+++ b/examples/contour_tree_augmented/ContourTreeApp.cxx
@ -762,14 +762,13 @@ int main(int argc, char* argv[])
    if (numLevels > 0) // if compute isovalues
    {
 // Get the data values for computing the explicit branch decomposition
-// TODO Can we cast the handle we get from GetData() instead of doing a CopyTo?
 #ifdef WITH_MPI
      vtkm::cont::ArrayHandle<ValueType> dataField;
-      result.GetPartitions()[0].GetField(0).GetData().CopyTo(dataField);
+      result.GetPartitions()[0].GetField(0).GetData().AsArrayHandle(dataField);
      bool dataFieldIsSorted = true;
 #else
      vtkm::cont::ArrayHandle<ValueType> dataField;
-      useDataSet.GetField(0).GetData().CopyTo(dataField);
+      useDataSet.GetField(0).GetData().AsArrayHandle(dataField);
      bool dataFieldIsSorted = false;
 #endif

@ -844,7 +843,7 @@ int main(int argc, char* argv[])

  //vtkm::cont::Field resultField =  result.GetField();
  //vtkm::cont::ArrayHandle<vtkm::Pair<vtkm::Id, vtkm::Id> > saddlePeak;
-  //resultField.GetData().CopyTo(saddlePeak);
+  //resultField.GetData().AsArrayHandle(saddlePeak);

  // Dump out contour tree for comparison
  if (rank == 0 && printContourTree)
@ -854,7 +853,7 @@ int main(int argc, char* argv[])
    ctaug_ns::EdgePairArray saddlePeak;
    ctaug_ns::ProcessContourTree::CollectSortedSuperarcs(
      filter.GetContourTree(), filter.GetSortOrder(), saddlePeak);
-    ctaug_ns::PrintEdgePairArray(saddlePeak);
+    ctaug_ns::PrintEdgePairArrayColumnLayout(saddlePeak, std::cout);
  }

 #ifdef WITH_MPI
@ -889,24 +888,7 @@ int main(int argc, char* argv[])
  VTKM_LOG_S(vtkm::cont::LogLevel::Info,
             std::endl
               << "    ---------------- Contour Tree Array Sizes ---------------------" << std::endl
-               << std::setw(42) << std::left << "    #Nodes"
-               << ": " << ct.Nodes.GetNumberOfValues() << std::endl
-               << std::setw(42) << std::left << "    #Arcs"
-               << ": " << ct.Arcs.GetNumberOfValues() << std::endl
-               << std::setw(42) << std::left << "    #Superparents"
-               << ": " << ct.Superparents.GetNumberOfValues() << std::endl
-               << std::setw(42) << std::left << "    #Superarcs"
-               << ": " << ct.Superarcs.GetNumberOfValues() << std::endl
-               << std::setw(42) << std::left << "    #Supernodes"
-               << ": " << ct.Supernodes.GetNumberOfValues() << std::endl
-               << std::setw(42) << std::left << "    #Hyperparents"
-               << ": " << ct.Hyperparents.GetNumberOfValues() << std::endl
-               << std::setw(42) << std::left << "    #WhenTransferred"
-               << ": " << ct.WhenTransferred.GetNumberOfValues() << std::endl
-               << std::setw(42) << std::left << "    #Hypernodes"
-               << ": " << ct.Hypernodes.GetNumberOfValues() << std::endl
-               << std::setw(42) << std::left << "    #Hyperarcs"
-               << ": " << ct.Hyperarcs.GetNumberOfValues() << std::endl);
+               << ct.PrintArraySizes());
  // Print hyperstructure statistics
  VTKM_LOG_S(vtkm::cont::LogLevel::Info,
             std::endl
--- a/examples/contour_tree_distributed/CMakeLists.txt
+++ b/examples/contour_tree_distributed/CMakeLists.txt
@ -50,7 +50,7 @@
 ##  Hamish Carr (University of Leeds), Gunther H. Weber (LBNL), and
 ##  Oliver Ruebel (LBNL)
 ##==============================================================================
-cmake_minimum_required(VERSION 3.8...3.15 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)

 # Find the VTK-m package
 find_package(VTKm REQUIRED QUIET)
@ -60,19 +60,29 @@ find_package(VTKm REQUIRED QUIET)
 ####################################
 if (VTKm_ENABLE_MPI)
  add_executable(ContourTree_Distributed ContourTreeApp.cxx)
-  target_link_libraries(ContourTree_Distributed vtkm_filter MPI::MPI_CXX)
+  target_link_libraries(ContourTree_Distributed vtkm_filter vtkm_io MPI::MPI_CXX)
  vtkm_add_target_information(ContourTree_Distributed
                              MODIFY_CUDA_FLAGS
                              DEVICE_SOURCES ContourTreeApp.cxx)
-  target_compile_definitions(ContourTree_Distributed PRIVATE)
+  target_compile_definitions(ContourTree_Distributed PRIVATE "WITH_MPI")

  option (VTKM_EXAMPLE_CONTOURTREE_ENABLE_DEBUG_PRINT Off)
  mark_as_advanced(VTKM_EXAMPLE_CONTOURTREE_ENABLE_DEBUG_PRINT)
  if (VTKM_EXAMPLE_CONTOURTREE_ENABLE_DEBUG_PRINT)
    target_compile_definitions(ContourTree_Distributed PRIVATE "DEBUG_PRINT")
+    target_compile_definitions(ContourTree_Distributed PRIVATE "DEBUG_PRINT_CTUD")
  endif()

  if (TARGET vtkm::tbb)
    target_compile_definitions(ContourTree_Distributed PRIVATE "ENABLE_SET_NUM_THREADS")
  endif()
+
+  add_executable(TreeCompiler TreeCompilerApp.cxx)
+  target_link_libraries(TreeCompiler vtkm_filter)
+  vtkm_add_target_information(TreeCompiler DROP_UNUSED_SYMBOLS)
+
+  configure_file(split_data_2d.py split_data_2d.py COPYONLY)
+  configure_file(split_data_3d.py split_data_3d.py COPYONLY)
+  configure_file(hact_test.sh hact_test.sh COPYONLY)
+  configure_file(testrun.sh testrun.sh COPYONLY)
 endif()
--- a/examples/contour_tree_distributed/ContourTreeApp.cxx
+++ b/examples/contour_tree_distributed/ContourTreeApp.cxx
--- a/vtkm/worklet/contourtree_augmented/meshtypes/contourtreemesh/CombinedVector.h
+++ b/vtkm/worklet/contourtree_augmented/meshtypes/contourtreemesh/CombinedVector.h
@ -60,90 +60,35 @@
 //  Oliver Ruebel (LBNL)
 //==============================================================================

-#ifndef vtk_m_worklet_contourtree_augmented_contourtree_mesh_inc_combined_vector_h
-#define vtk_m_worklet_contourtree_augmented_contourtree_mesh_inc_combined_vector_h
+#include <stdio.h>
+#include <vtkm/worklet/contourtree_distributed/TreeCompiler.h>

-#include <vtkm/cont/ArrayHandle.h>
-#include <vtkm/cont/ExecutionObjectBase.h>
-#include <vtkm/worklet/contourtree_augmented/Types.h>
+// main routine
+int main(int argc, char** argv)
+{ // main()
+  // the compiler for putting them together
+  vtkm::worklet::contourtree_distributed::TreeCompiler compiler;

-namespace vtkm
-{
-namespace worklet
-{
-namespace contourtree_augmented
-{
-namespace mesh_dem_contourtree_mesh_inc
-{
+  // we just loop through the arguments, reading them in and adding them
+  for (int argument = 1; argument < argc; argument++)
+  { // per argument
+    // create a temporary file
+    FILE* inFile = fopen(argv[argument], "r");

+    // if it's bad, choke
+    if (inFile == NULL)
+    { // bad filename
+      printf("Bad filename %s\n", argv[argument]);
+      return EXIT_FAILURE;
+    } // bad filename

-template <typename T, typename DeviceAdapter>
-class CombinedVector
-{
-public:
-  typedef typename vtkm::cont::ArrayHandle<T>::template ExecutionTypes<DeviceAdapter>::PortalConst
-    TArrayPortalType;
-  VTKM_CONT
-  CombinedVector(const vtkm::cont::ArrayHandle<T>& ThisVector,
-                 const vtkm::cont::ArrayHandle<T>& OtherVector,
-                 vtkm::cont::Token& token)
-  {
-    this->ThisVectorPortal = ThisVector.PrepareForInput(DeviceAdapter(), token);
-    this->OtherVectorPortal = OtherVector.PrepareForInput(DeviceAdapter(), token);
-  }
+    // read and append
+    compiler.ReadBinary(inFile);

-  // See contourtree_augmented/Types.h for definitions of IsThis() and CV_OTHER_FLAG
+  } // per argument

-  VTKM_EXEC_CONT
-  T operator[](vtkm::Id idx) const
-  {
-    return IsThis(idx) ? this->ThisVectorPortal.Get(MaskedIndex(idx))
-                       : this->OtherVectorPortal.Get(MaskedIndex(idx));
-  }
-
-  VTKM_EXEC_CONT
-  vtkm::Id GetNumberOfValues() const
-  {
-    return ThisVectorPortal.GetNumberOfValues() + OtherVectorPortal.GetNumberOfValues();
-  }
-
-private:
-  TArrayPortalType ThisVectorPortal;
-  TArrayPortalType OtherVectorPortal;
-}; // class CombinedVector
-
-
-template <typename T>
-class CombinedVectorExecObj : public vtkm::cont::ExecutionObjectBase
-{
-public:
-  CombinedVectorExecObj(const vtkm::cont::ArrayHandle<T>& tV, const vtkm::cont::ArrayHandle<T>& oV)
-    : ThisVector(tV)
-    , OtherVector(oV)
-  {
-  }
-
-  template <typename DeviceTag>
-  CombinedVector<T, DeviceTag> PrepareForExecution(DeviceTag, vtkm::cont::Token& token) const
-  {
-    return CombinedVector<T, DeviceTag>(this->ThisVector, this->OtherVector, token);
-  }
-
-  vtkm::Id GetNumberOfValues() const
-  {
-    return ThisVector.GetNumberOfValues() + OtherVector.GetNumberOfValues();
-  }
-
-private:
-  const vtkm::cont::ArrayHandle<T>& ThisVector;
-  const vtkm::cont::ArrayHandle<T>& OtherVector;
-
-}; // class CombinedVectorExecObj
-
-
-} // namespace mesh_dem_contourtree_mesh_inc
-} // namespace contourtree_augmented
-} // namespace worklet
-} // namespace vtkm
-
-#endif
+  // now compile and print
+  compiler.ComputeSuperarcs();
+  compiler.PrintSuperarcs(true);
+  return EXIT_SUCCESS;
+} // main()
--- a/examples/contour_tree_distributed/hact_test.sh
+++ b/examples/contour_tree_distributed/hact_test.sh
@ -0,0 +1,39 @@
+#!/bin/sh
+GTCT_DIR=${GTCT_DIR:-${HOME}/devel/parallel-peak-pruning/ContourTree/SweepAndMergeSerial/out}
+RED=""
+GREEN=""
+NC=""
+if [ -t 1 ]; then
+# If stdout is a terminal, color Pass and FAIL green and red, respectively
+    RED=$(tput setaf 1)
+    GREEN=$(tput setaf 2)
+    NC=$(tput sgr0)
+fi
+
+echo "Removing previously generated files"
+rm *.log *.dat
+
+echo "Copying target file "$1 "into current directory"
+filename=${1##*/}
+fileroot=${filename%.txt}
+cp $1 ${filename}
+
+echo "Splitting data into "$2" x "$2" parts"
+./split_data_2d.py ${filename} $2
+rm ${filename}
+
+echo "Running HACT"
+n_parts=$(($2*$2))
+echo mpirun -np 4  ./ContourTree_Distributed -d Any --numBlocks=${n_parts} ${fileroot}_part_%d_of_${n_parts}.txt
+mpirun -np 4  ./ContourTree_Distributed -d Any --preSplitFiles --saveTreeCompilerData --numBlocks=${n_parts} ${fileroot}_part_%d_of_${n_parts}.txt
+rm ${fileroot}_part_*_of_${n_parts}.txt
+
+echo "Compiling Outputs"
+./TreeCompiler TreeCompilerOutput_*.dat | sort > outsort${fileroot}_$2x$2.txt
+
+echo "Diffing"
+diff outsort${fileroot}_$2x$2.txt ${GTCT_DIR}/outsort${fileroot}.txt
+if test $? -eq 0; then echo "${GREEN}Pass${NC}"; rm outsort${fileroot}_$2x$2.txt; else echo "${RED}FAIL${NC}"; fi;
+
+# echo "Generating Dot files"
+# ./makedot.sh
--- a/examples/contour_tree_distributed/split_data_2d.py
+++ b/examples/contour_tree_distributed/split_data_2d.py
@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+import numpy as np
+import math
+import os
+import sys
+
+# Read a 2D text file from disk into a NumPy array
+def read_file(fn):
+    data = np.fromfile(fn, dtype=np.int, sep=" ")
+    data = data[2:].reshape(tuple(data[0:2]))
+    return data
+
+# Save a block from a 2D NumPy array to disk
+def save_piece(fn, array, offset, n_blocks, block_index, size):
+    with open(fn, 'w') as f:
+        f.write('#GLOBAL_EXTENTS ' + ' '.join(map(str, array.shape)) + '\n')
+        f.write('#OFFSET ' + ' '.join(map(str, offset))+'\n')
+        f.write('#BLOCKS_PER_DIM ' + ' '.join(map(str, n_blocks))+'\n')
+        f.write('#BLOCK_INDEX ' + ' '.join(map(str, block_index))+'\n')
+        f.write(' '.join(map(str, size)) + '\n')
+        np.savetxt(f, array[offset[0]:offset[0]+size[0],offset[1]:offset[1]+size[1]], fmt='%.16g')
+
+# Compute split points for splitting into n blocks
+def split_points(shape, nblocks):
+    dx = float(shape-1) / nblocks
+    return [ math.floor(i*dx) for i in range(nblocks)] + [ shape - 1 ]
+
+if len(sys.argv) < 2:
+    print("Error: Usage split_data_2d.py <filename> [<n_blocks_per_axis>|<n_blocks_x> <n_blocks_y>]", file=sys.stderr)
+    sys.exit(1)
+
+# Parse parameters
+in_filename = sys.argv[1]
+n_blocks = (2, 2)
+if len(sys.argv) > 2:
+    if len(sys.argv) >= 4:
+        n_blocks = (int(sys.argv[2]), int(sys.argv[3]))
+    else:
+        n_blocks = (int(sys.argv[2]), int(sys.argv[2]))
+
+name, ext = os.path.splitext(in_filename)
+out_filename_pattern = name + '_part_%d_of_' + str(n_blocks[0]*n_blocks[1]) + ext
+
+# Read data
+data = read_file(in_filename)
+
+# Compute split points
+split_points_x = split_points(data.shape[0], n_blocks[0])
+split_points_y = split_points(data.shape[1], n_blocks[1])
+
+# Save blocks
+block_no = 0
+for block_index_x, (x_start, x_stop) in enumerate(zip(split_points_x, split_points_x[1:])):
+    for block_index_y, (y_start, y_stop) in enumerate(zip(split_points_y, split_points_y[1:])):
+        n_x = x_stop - x_start + 1
+        n_y = y_stop - y_start + 1
+        save_piece(out_filename_pattern % block_no, data, (x_start, y_start), n_blocks, (block_index_x, block_index_y), (n_x, n_y))
+#         print("Wrote block %d, origin %d %d, size %d %d" % (block_no, x_start, y_start, n_x, n_y))
+        block_no += 1
--- a/examples/contour_tree_distributed/split_data_3d.py
+++ b/examples/contour_tree_distributed/split_data_3d.py
@ -0,0 +1,101 @@
+#!/usr/bin/env python3
+import numpy as np
+import math
+import os
+import sys
+# For readBOV
+from functools import reduce
+import operator
+
+# Read a 3D text file from disk into a NumPy array
+# ... Plain text
+def read_file(fn):
+    data = np.fromfile(fn, dtype=np.float, sep=" ")
+    data = data[3:].reshape((int(data[2]),int(data[0]),int(data[1])))
+    return data
+
+# ... VisItBOV
+def readBOV(filename):
+    with open(filename, 'r') as f:
+        header = dict([(lambda x: (x[0].strip().lower(), x[1].strip()))(l.strip().split(':')) for l in f.readlines()])
+        if 'data_endian' in header:
+            if header['data_endian'].lower() != sys.byteorder:
+                print('Unsopported endianess ' + eader['data_endian'].lower())
+                return None
+        shape = tuple([int(x) for x in header['data_size'].split(' ')])
+        count = reduce(operator.mul, shape, 1)
+        dtype_map = { 'float': 'float32', 'double': 'float64', 'char': 'uint8' }
+        dtype = np.dtype(dtype_map[header['data_format'].lower()])
+        dataname = os.path.realpath(os.path.join(os.path.dirname(filename), header['data_file']))
+        if 'variable' not in header:
+            header['variable'] = 'val'
+        return (header['variable'], header['centering'].lower(), np.fromfile(dataname, dtype, count).reshape(tuple(reversed(shape))))
+    return None
+
+# Save a block from a 3D NumPy array to disk
+# Python order is slice, row, col
+# TXT file order is row, col, slice
+# offset and size are in file order
+def save_piece(fn, array, offset, n_blocks, block_index, size):
+    with open(fn, 'w') as f:
+        perm = [1, 2, 0]
+        f.write('#GLOBAL_EXTENTS ' + ' '.join(map(str, [array.shape[i] for i in perm])) + '\n')
+        f.write('#OFFSET ' + ' '.join(map(str, offset))+'\n')
+        f.write('#BLOCKS_PER_DIM ' + ' '.join(map(str, n_blocks))+'\n')
+        f.write('#BLOCK_INDEX ' + ' '.join(map(str, block_index))+'\n')
+        f.write(' '.join(map(str, size)) + '\n')
+        if fn[-5:]=='.bdem':
+            array[offset[2]:offset[2]+size[2],offset[0]:offset[0]+size[0],offset[1]:offset[1]+size[1]].astype(np.double).tofile(f)
+        else:
+            for s in range(offset[2], offset[2]+size[2]):
+                np.savetxt(f, array[s, offset[0]:offset[0]+size[0],offset[1]:offset[1]+size[1]], fmt='%.16g')
+                f.write('\n')
+
+# Compute split points for splitting into n blocks
+def split_points(shape, nblocks):
+    dx = float(shape-1) / nblocks
+    return [ math.floor(i*dx) for i in range(nblocks)] + [ shape - 1 ]
+
+if len(sys.argv) < 2:
+    print("Error: Usage split_data_3d.py <filename> <outfilepattern> [<n_blocks_per_axis>|<n_blocks_x> <n_blocks_y> <n_blocks_z>]", file=sys.stderr)
+    sys.exit(1)
+
+# Parse parameters
+in_filename = sys.argv[1]
+
+name, ext = os.path.splitext(in_filename)
+#out_filename_pattern = name + '_split_%d.txt'
+out_filename_pattern = sys.argv[2]
+
+n_blocks = (2, 2, 2)
+if len(sys.argv) > 3:
+    if len(sys.argv) >= 6:
+        n_blocks = (int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5]))
+    else:
+        n_blocks = (int(sys.argv[3]), int(sys.argv[3]), int(sys.argv[3]))
+
+# Read data
+if ext == '.bov':
+    data = readBOV(in_filename)[2]
+else:
+    data = read_file(in_filename)
+
+# Python order is slice, row, col
+# Compute split points
+split_points_s = split_points(data.shape[0], n_blocks[2])
+split_points_r = split_points(data.shape[1], n_blocks[0])
+split_points_c = split_points(data.shape[2], n_blocks[1])
+
+# Create the file that records the slice values
+slice_filename = name + '_slices.txt'
+
+# Save blocks
+block_no = 0
+for block_index_s, (s_start, s_stop) in enumerate(zip(split_points_s, split_points_s[1:])):
+    for block_index_r, (r_start, r_stop) in enumerate(zip(split_points_r, split_points_r[1:])):
+        for block_index_c, (c_start, c_stop) in enumerate(zip(split_points_c, split_points_c[1:])):
+            n_s = s_stop - s_start + 1
+            n_r = r_stop - r_start + 1
+            n_c = c_stop - c_start + 1
+            save_piece(out_filename_pattern % block_no, data, (r_start, c_start, s_start), n_blocks, (block_index_r, block_index_c, block_index_s), (n_r, n_c, n_s))
+            block_no += 1
--- a/examples/contour_tree_distributed/testrun.sh
+++ b/examples/contour_tree_distributed/testrun.sh
@ -0,0 +1,99 @@
+#!/bin/sh
+
+mkdir -p out
+DATA_DIR=${DATA_DIR:-${HOME}/devel/parallel-peak-pruning/Data/2D}
+
+if [ ! -d  $DATA_DIR ]; then
+    echo "Error: Directory  $DATA_DIR does not exist!"
+    exit 1;
+fi;
+
+echo
+echo "Starting Timing Runs"
+echo
+echo "8x9 Test Set"
+./hact_test.sh $DATA_DIR/8x9test.txt 2
+./hact_test.sh $DATA_DIR/8x9test.txt 4
+# ./hact_test.sh $DATA_DIR/8x9test.txt 8
+echo
+echo "Vancouver Test Set"
+./hact_test.sh $DATA_DIR/vanc.txt 2
+./hact_test.sh $DATA_DIR/vanc.txt 4
+# ./hact_test.sh $DATA_DIR/vanc.txt 8
+# ./hact_test.sh $DATA_DIR/vanc.txt 16
+echo
+echo "Vancouver SWSW Test Set"
+./hact_test.sh $DATA_DIR/vancouverSWSW.txt 2
+./hact_test.sh $DATA_DIR/vancouverSWSW.txt 4
+./hact_test.sh $DATA_DIR/vancouverSWSW.txt 8
+# ./hact_test.sh $DATA_DIR/vancouverSWSW.txt 16
+echo
+echo "Vancouver SWNW Test Set"
+./hact_test.sh $DATA_DIR/vancouverSWNW.txt 2
+./hact_test.sh $DATA_DIR/vancouverSWNW.txt 4
+./hact_test.sh $DATA_DIR/vancouverSWNW.txt 8
+# ./hact_test.sh $DATA_DIR/vancouverSWNW.txt 16
+echo
+echo "Vancouver SWSE Test Set"
+./hact_test.sh $DATA_DIR/vancouverSWSE.txt 2
+./hact_test.sh $DATA_DIR/vancouverSWSE.txt 4
+./hact_test.sh $DATA_DIR/vancouverSWSE.txt 8
+# ./hact_test.sh $DATA_DIR/vancouverSWSE.txt 16
+echo
+echo "Vancouver SWNE Test Set"
+./hact_test.sh $DATA_DIR/vancouverSWNE.txt 2
+./hact_test.sh $DATA_DIR/vancouverSWNE.txt 4
+./hact_test.sh $DATA_DIR/vancouverSWNE.txt 8
+# ./hact_test.sh $DATA_DIR/vancouverSWNE.txt 16
+echo
+echo "Vancouver NE Test Set"
+./hact_test.sh $DATA_DIR/vancouverNE.txt 2
+./hact_test.sh $DATA_DIR/vancouverNE.txt 4
+./hact_test.sh $DATA_DIR/vancouverNE.txt 8
+# ./hact_test.sh $DATA_DIR/vancouverNE.txt 16
+echo
+echo "Vancouver NW Test Set"
+./hact_test.sh $DATA_DIR/vancouverNW.txt 2
+./hact_test.sh $DATA_DIR/vancouverNW.txt 4
+./hact_test.sh $DATA_DIR/vancouverNW.txt 8
+# ./hact_test.sh $DATA_DIR/vancouverNW.txt 16
+echo
+echo "Vancouver SE Test Set"
+./hact_test.sh $DATA_DIR/vancouverSE.txt 2
+./hact_test.sh $DATA_DIR/vancouverSE.txt 4
+./hact_test.sh $DATA_DIR/vancouverSE.txt 8
+# ./hact_test.sh $DATA_DIR/vancouverSE.txt 16
+echo
+echo "Vancouver SW Test Set"
+./hact_test.sh $DATA_DIR/vancouverSW.txt 2
+./hact_test.sh $DATA_DIR/vancouverSW.txt 4
+./hact_test.sh $DATA_DIR/vancouverSW.txt 8
+# ./hact_test.sh $DATA_DIR/vancouverSW.txt 16
+echo
+echo "Icefields Test Set"
+./hact_test.sh $DATA_DIR/icefield.txt 2
+./hact_test.sh $DATA_DIR/icefield.txt 4
+./hact_test.sh $DATA_DIR/icefield.txt 8
+# ./hact_test.sh $DATA_DIR/icefield.txt 16
+# ./hact_test.sh $DATA_DIR/icefield.txt 32
+# ./hact_test.sh $DATA_DIR/icefield.txt 64
+echo
+echo "GTOPO30 Full Tiny Test Set"
+./hact_test.sh $DATA_DIR/gtopo_full_tiny.txt 2
+./hact_test.sh $DATA_DIR/gtopo_full_tiny.txt 4
+./hact_test.sh $DATA_DIR/gtopo_full_tiny.txt 8
+# ./hact_test.sh $DATA_DIR/gtopo_full_tiny.txt 16
+# ./hact_test.sh $DATA_DIR/gtopo_full_tiny.txt 32
+# ./hact_test.sh $DATA_DIR/gtopo_full_tiny.txt 64
+echo
+echo "GTOPO30 UK Tile Test Set"
+./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 2
+./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 4
+./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 8
+# ./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 16
+# ./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 32
+# ./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 64
+# ./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 128
+# ./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 256
+# ./hact_test.sh $DATA_DIR/gtopo30w020n40.txt 512
+echo "Done"
--- a/examples/ising/Ising.cxx
+++ b/examples/ising/Ising.cxx
@ -15,6 +15,7 @@
 /// Reference: Computational Physics 2nd Edition, Nicholas Giordano & Hisao Nakanishi

 #include <iomanip>
+#include <vtkm/cont/ArrayCopy.h>
 #include <vtkm/cont/ArrayHandleRandomUniformReal.h>
 #include <vtkm/cont/DataSet.h>
 #include <vtkm/cont/DataSetBuilderUniform.h>
@ -96,7 +97,7 @@ int main(int argc, char** argv)

  auto dataSet = SpinField({ 5, 5 });
  vtkm::cont::ArrayHandle<vtkm::Float32> spins;
-  dataSet.GetCellField("spins").GetData().CopyTo(spins);
+  dataSet.GetCellField("spins").GetData().AsArrayHandle(spins);

  vtkm::rendering::Scene scene;
  vtkm::rendering::Actor actor(dataSet.GetCellSet(),
--- a/examples/lagrangian/CMakeLists.txt
+++ b/examples/lagrangian/CMakeLists.txt
@ -12,6 +12,19 @@ cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
 #Find the VTK-m package
 find_package(VTKm REQUIRED QUIET)

+if ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
+  # CUDA architecture has a limited amount of memory available for constants. The CUDA
+  # compiler uses this space to hold constants for some optimizations. However, for large
+  # kernels, the number of constants needed might be larger than the constant space
+  # available. For these conditions, you have to disable this form of optimization with
+  # the -Xptxas --disable-optimizer-constants flags.
+  # TODO: Find a more elegant way to do this. Either figure out a way around this problem
+  # or add more general flags to vtkm_library/vtkm_unit_tests for sources with "large" kernels.
+  set_source_files_properties(lagrangian.cxx PROPERTIES
+    COMPILE_OPTIONS "-Xptxas;--disable-optimizer-constants"
+    )
+endif()
+
 add_executable(Lagrangian lagrangian.cxx ABCfield.h)
 target_link_libraries(Lagrangian PRIVATE vtkm_filter)
 vtkm_add_target_information(Lagrangian
--- a/examples/logistic_map/CMakeLists.txt
+++ b/examples/logistic_map/CMakeLists.txt
@ -0,0 +1,20 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+cmake_minimum_required(VERSION 3.12...3.19 FATAL_ERROR)
+project(LogisticMap CXX)
+
+find_package(VTKm REQUIRED QUIET)
+
+add_executable(LogisticMap LogisticMap.cxx)
+target_link_libraries(LogisticMap PRIVATE vtkm_io)
+
+vtkm_add_target_information(LogisticMap
+                            DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
+                            DEVICE_SOURCES LogisticMap.cxx)
--- a/examples/logistic_map/LogisticMap.cxx
+++ b/examples/logistic_map/LogisticMap.cxx
@ -0,0 +1,65 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+
+#include <cmath>
+#include <iostream>
+#include <vector>
+#include <vtkm/cont/DataSetBuilderUniform.h>
+#include <vtkm/io/ImageWriterPNG.h>
+
+
+int main()
+{
+  size_t height = 1800;
+  size_t width = height * 1.618;
+  vtkm::cont::DataSetBuilderUniform dsb;
+  vtkm::cont::DataSet ds = dsb.Create(vtkm::Id2(width, height));
+
+  std::vector<double> x(width, 0.5);
+
+  double rmin = 2.9;
+  for (size_t i = 0; i < width; ++i)
+  {
+    double r = rmin + (4.0 - rmin) * i / (width - 1);
+    int n = 0;
+    // 2048 should be enough iterations to be "converged";
+    // though of course the iterations actually don't all converge but cycle or are chaotic.
+    while (n++ < 2048)
+    {
+      x[i] = r * x[i] * (1 - x[i]);
+    }
+  }
+
+
+  vtkm::Vec4f v(1.0, 0.5, 0.0, 0.0);
+  std::vector<vtkm::Vec4f> pixelValues(width * height, vtkm::Vec4f(0, 0, 0, 0));
+  size_t iterates = 0;
+  // We don't need more iterates than pixels of height,
+  // by the pigeonhole principle.
+  while (iterates++ < height)
+  {
+    for (size_t i = 0; i < width; ++i)
+    {
+      double r = rmin + (4.0 - rmin) * i / (width - 1);
+      double y = x[i];
+      assert(y >= 0 && y <= 1);
+      size_t j = std::round(y * (height - 1));
+      pixelValues[j * width + i] = v;
+      x[i] = r * x[i] * (1 - x[i]);
+    }
+  }
+
+  std::string colorFieldName = "pixels";
+  ds.AddPointField(colorFieldName, pixelValues);
+  std::string filename = "logistic.png";
+  vtkm::io::ImageWriterPNG writer(filename);
+  writer.WriteDataSet(ds, colorFieldName);
+  std::cout << "Now open " << filename << "\n";
+}
--- a/examples/polyline_archimedean_helix/PolyLineArchimedeanHelix.cxx
+++ b/examples/polyline_archimedean_helix/PolyLineArchimedeanHelix.cxx
@ -66,10 +66,11 @@ void TubeThatSpiral(vtkm::FloatDefault radius, vtkm::Id numLineSegments, vtkm::I
  // This generates a new pointset, and new cell set.
  vtkm::cont::ArrayHandle<vtkm::Vec3f> tubePoints;
  vtkm::cont::CellSetSingleType<> tubeCells;
-  tubeWorklet.Run(ds.GetCoordinateSystem().GetData().Cast<vtkm::cont::ArrayHandle<vtkm::Vec3f>>(),
-                  ds.GetCellSet(),
-                  tubePoints,
-                  tubeCells);
+  tubeWorklet.Run(
+    ds.GetCoordinateSystem().GetData().AsArrayHandle<vtkm::cont::ArrayHandle<vtkm::Vec3f>>(),
+    ds.GetCellSet(),
+    tubePoints,
+    tubeCells);

  vtkm::cont::DataSet tubeDataset;
  tubeDataset.AddCoordinateSystem(vtkm::cont::CoordinateSystem("coords", tubePoints));
--- a/examples/redistribute_points/RedistributePoints.h
+++ b/examples/redistribute_points/RedistributePoints.h
@ -52,7 +52,7 @@ class Redistributor

    vtkm::filter::ExtractPoints extractor;
    extractor.SetCompactPoints(true);
-    extractor.SetImplicitFunction(vtkm::cont::make_ImplicitFunctionHandle(box));
+    extractor.SetImplicitFunction(box);
    return extractor.Execute(input);
  }

@ -71,8 +71,12 @@ class Redistributor

      if (this->Field.GetNumberOfValues() == 0)
      {
+        // Copy metadata
        this->Field = field;
-        field.GetData().CastAndCall(Allocator{}, this->Field, this->TotalSize);
+        // Reset array
+        this->Field.SetData(field.GetData().NewInstanceBasic());
+        // Preallocate array
+        this->Field.GetData().Allocate(this->TotalSize);
      }
      else
      {
@ -80,26 +84,14 @@ class Redistributor
                    this->Field.GetAssociation() == field.GetAssociation());
      }

-      field.GetData().CastAndCall(Appender{}, this->Field, this->CurrentIdx);
+      field.GetData().CastAndCallForTypes<VTKM_DEFAULT_TYPE_LIST, VTKM_DEFAULT_STORAGE_LIST>(
+        Appender{}, this->Field, this->CurrentIdx);
      this->CurrentIdx += field.GetNumberOfValues();
    }

    const vtkm::cont::Field& GetResult() const { return this->Field; }

  private:
-    struct Allocator
-    {
-      template <typename T, typename S>
-      void operator()(const vtkm::cont::ArrayHandle<T, S>&,
-                      vtkm::cont::Field& field,
-                      vtkm::Id totalSize) const
-      {
-        vtkm::cont::ArrayHandle<T> init;
-        init.Allocate(totalSize);
-        field.SetData(init);
-      }
-    };
-
    struct Appender
    {
      template <typename T, typename S>
@ -108,7 +100,7 @@ class Redistributor
                      vtkm::Id currentIdx) const
      {
        vtkm::cont::ArrayHandle<T> farray =
-          field.GetData().template Cast<vtkm::cont::ArrayHandle<T>>();
+          field.GetData().template AsArrayHandle<vtkm::cont::ArrayHandle<T>>();
        vtkm::cont::Algorithm::CopySubRange(data, 0, data.GetNumberOfValues(), farray, currentIdx);
      }
    };
--- a/vtkm/Atomic.h
+++ b/vtkm/Atomic.h
@ -225,6 +225,46 @@ VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(T* addr,
    return false;
  }
 }
+#if __CUDA_ARCH__ < 200
+VTKM_EXEC_CONT inline vtkm::Float32 vtkmAtomicAddImpl(vtkm::Float32* address,
+                                                      vtkm::Float32 value,
+                                                      vtkm::MemoryOrder order)
+{
+  union {
+    vtkm::UInt32 i;
+    vtkm::Float32 f;
+  } expected{ .f = *address }, desired{};
+
+  do
+  {
+    desired.f = expected.f + value;
+  } while (!AtomicCompareExchangeImpl(
+    reinterprete_cast<vtkm::UInt32*>(address), expected.i, desired.i, order));
+
+  // return the "old" value that was in the memory.
+  return expected.f;
+}
+#endif
+#if __CUDA_ARCH__ < 600
+VTKM_EXEC_CONT inline vtkm::Float64 vtkmAtomicAdd(vtkm::Float64* address,
+                                                  vtkm::Float64 value,
+                                                  vtkm::MemoryOrder order)
+{
+  union {
+    vtkm::UInt64 i;
+    vtkm::Float64 f;
+  } expected{ .f = *address }, desired{};
+
+  do
+  {
+    desired.f = expected.f + value;
+  } while (!AtomicCompareExchangeImpl(
+    reinterprete_cast<vtkm::UInt64*>(address), expected.i, desired.i, order));
+
+  // return the "old" value that was in the memory.
+  return expected.f;
+}
+#endif
 }
 } // namespace vtkm::detail

@ -463,7 +503,7 @@ VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(vtkm::UInt64* const addr, vtkm

 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt8* addr,
                                           vtkm::UInt8 val,
-                                           vtkm::MemoryOrder order)
+                                           vtkm::MemoryOrder vtkmNotUsed(order))
 {
  // There doesn't seem to be an atomic store instruction in the windows
  // API, so just exchange and discard the result.
@ -471,7 +511,7 @@ VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt8* addr,
 }
 VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt16* addr,
                                           vtkm::UInt16 val,
-                                           vtkm::MemoryOrder order)
+                                           vtkm::MemoryOrder vtkmNotUsed(order))
 {
  // There doesn't seem to be an atomic store instruction in the windows
  // API, so just exchange and discard the result.
@ -499,31 +539,31 @@ VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt64* addr,
      winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg)));        \
  }

-#define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix)                             \
-  VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix)     \
-  VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix)             \
-  VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix)               \
-  VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix)             \
-  VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order) \
-  {                                                                                     \
-    return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order);          \
-  }                                                                                     \
-  VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(                                 \
-    vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder order)      \
-  {                                                                                     \
-    vtkmType result = BitCast<vtkmType>(                                                \
-      _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr),    \
-                                          BitCast<winType>(desired),                    \
-                                          BitCast<winType>(*expected)));                \
-    if (result == *expected)                                                            \
-    {                                                                                   \
-      return true;                                                                      \
-    }                                                                                   \
-    else                                                                                \
-    {                                                                                   \
-      *expected = result;                                                               \
-      return false;                                                                     \
-    }                                                                                   \
+#define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix)                                     \
+  VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix)             \
+  VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix)                     \
+  VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix)                       \
+  VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix)                     \
+  VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order)         \
+  {                                                                                             \
+    return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order);                  \
+  }                                                                                             \
+  VTKM_EXEC_CONT inline bool AtomicCompareExchangeImpl(                                         \
+    vtkmType* addr, vtkmType* expected, vtkmType desired, vtkm::MemoryOrder vtkmNotUsed(order)) \
+  {                                                                                             \
+    vtkmType result = BitCast<vtkmType>(                                                        \
+      _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr),            \
+                                          BitCast<winType>(desired),                            \
+                                          BitCast<winType>(*expected)));                        \
+    if (result == *expected)                                                                    \
+    {                                                                                           \
+      return true;                                                                              \
+    }                                                                                           \
+    else                                                                                        \
+    {                                                                                           \
+      *expected = result;                                                                       \
+      return false;                                                                             \
+    }                                                                                           \
  }

 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
@ -532,6 +572,51 @@ VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
 VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)

 #undef VTKM_ATOMIC_OPS_FOR_TYPE
+
+VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float32* addr,
+                                                  vtkm::Float32 arg,
+                                                  vtkm::MemoryOrder order)
+{
+  union {
+    vtkm::UInt32 i;
+    vtkm::Float32 f;
+  } expected{}, desired{};
+  expected.f = *addr;
+
+  do
+  {
+    desired.f = expected.f + arg;
+  } while (
+    !AtomicCompareExchangeImpl(reinterpret_cast<vtkm::UInt32*>(addr),
+                               &expected.i, // reloads expected with *addr prior to the operation
+                               desired.i,
+                               order));
+  // return the "old" value that was in the memory.
+  return expected.f;
+}
+
+VTKM_EXEC_CONT inline vtkm::Float64 AtomicAddImpl(vtkm::Float64* addr,
+                                                  vtkm::Float64 arg,
+                                                  vtkm::MemoryOrder order)
+{
+  union {
+    vtkm::UInt64 i;
+    vtkm::Float64 f;
+  } expected{}, desired{};
+  expected.f = *addr;
+
+  do
+  {
+    desired.f = expected.f + arg;
+  } while (
+    !AtomicCompareExchangeImpl(reinterpret_cast<vtkm::UInt64*>(addr),
+                               &expected.i, // reloads expected with *addr prior to the operation
+                               desired.i,
+                               order));
+  // return the "old" value that was in the memory.
+  return expected.f;
+}
+
 }
 } // namespace vtkm::detail

@ -584,6 +669,52 @@ VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
 {
  return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
 }
+// TODO: Use enable_if to write one version for both Float32 and Float64.
+VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float32* addr,
+                                                  vtkm::Float32 arg,
+                                                  vtkm::MemoryOrder order)
+{
+  union {
+    vtkm::UInt32 i;
+    vtkm::Float32 f;
+  } expected{ .f = *addr }, desired{};
+
+  do
+  {
+    desired.f = expected.f + arg;
+  } while (
+    !__atomic_compare_exchange_n(reinterpret_cast<vtkm::UInt32*>(addr),
+                                 &expected.i, // reloads expected with *addr prior to the operation
+                                 desired.i,
+                                 false,
+                                 GccAtomicMemOrder(order),
+                                 GccAtomicMemOrder(order)));
+  // return the "old" value that was in the memory.
+  return expected.f;
+}
+// TODO: Use enable_if to write one version for both Float32 and Float64.
+VTKM_EXEC_CONT inline vtkm::Float32 AtomicAddImpl(vtkm::Float64* addr,
+                                                  vtkm::Float64 arg,
+                                                  vtkm::MemoryOrder order)
+{
+  union {
+    vtkm::UInt64 i;
+    vtkm::Float64 f;
+  } expected{ .f = *addr }, desired{};
+
+  do
+  {
+    desired.f = expected.f + arg;
+  } while (
+    !__atomic_compare_exchange_n(reinterpret_cast<vtkm::UInt64*>(addr),
+                                 &expected.i, // reloads expected with *addr prior to the operation
+                                 desired.i,
+                                 false,
+                                 GccAtomicMemOrder(order),
+                                 GccAtomicMemOrder(order)));
+  // return the "old" value that was in the memory.
+  return expected.f;
+}

 template <typename T>
 VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
@ -706,7 +837,7 @@ VTKM_EXEC_CONT inline T AtomicAdd(
 {
  return detail::AtomicAddImpl(pointer, operand, order);
 }
-template <typename T>
+template <typename T, typename std::enable_if<std::is_integral<T>::value>::type* = nullptr>
 VTKM_EXEC_CONT inline T AtomicAdd(
  T* pointer,
  detail::OppositeSign<T> operand,
@ -846,7 +977,7 @@ VTKM_EXEC_CONT inline T AtomicNot(
 /// pointing to an object on the stack).
 ///
 template <typename T>
-VTKM_EXEC_CONT inline T AtomicCompareExchange(
+VTKM_EXEC_CONT inline bool AtomicCompareExchange(
  T* shared,
  T* expected,
  T desired,
--- a/vtkm/CMakeLists.txt
+++ b/vtkm/CMakeLists.txt
@ -58,6 +58,7 @@ set(headers
  VecFromPortalPermute.h
  VecFromVirtPortal.h
  VectorAnalysis.h
+  VecFlat.h
  VecTraits.h
  VecVariable.h
  VirtualObjectBase.h
--- a/vtkm/Deprecated.h
+++ b/vtkm/Deprecated.h
@ -62,7 +62,7 @@

 #if defined(__NVCC__)
 // Currently nvcc has zero support deprecated attributes
-#elif __cplusplus >= 201402L
+#elif __cplusplus >= 201402L && !defined(VTKM_GCC)

 // C++14 and better supports [[deprecated]]
 // Except in these cases:
@ -133,7 +133,14 @@
 // Only actually use the [[deprecated]] attribute if the compiler supports it AND
 // we know how to suppress deprecations when necessary.
 #if defined(VTK_M_DEPRECATED_ATTRIBUTE_SUPPORTED) && defined(VTKM_DEPRECATED_SUPPRESS_SUPPORTED)
+#ifdef VTKM_MSVC
 #define VTKM_DEPRECATED(...) [[deprecated(VTK_M_DEPRECATED_MAKE_MESSAGE(__VA_ARGS__))]]
+#else // !MSVC
+// GCC and other compilers support the C++14 attribute [[deprecated]], but there appears to be a
+// bug (or other undesirable behavior) where if you mix [[deprecated]] with __attribute__(()) you
+// get compile errors. To get around this, use __attribute((deprecated)) where supported.
+#define VTKM_DEPRECATED(...) __attribute__((deprecated(VTK_M_DEPRECATED_MAKE_MESSAGE(__VA_ARGS__))))
+#endif // !MSVC
 #else
 #define VTKM_DEPRECATED(...)
 #endif
--- a/vtkm/ImplicitFunction.h
+++ b/vtkm/ImplicitFunction.h
@ -11,15 +11,26 @@
 #define vtk_m_ImplicitFunction_h

 #include <vtkm/Bounds.h>
+#include <vtkm/Deprecated.h>
 #include <vtkm/Math.h>
 #include <vtkm/VectorAnalysis.h>
+
+#include <vtkm/exec/internal/Variant.h>
+
+// For interface class only.
+#include <vtkm/cont/ExecutionAndControlObjectBase.h>
+
+#ifndef VTKM_NO_DEPRECATED_VIRTUAL
 #include <vtkm/VirtualObjectBase.h>
+#endif // VTKM_NO_DEPRECATED_VIRTUAL

 namespace vtkm
 {

 //============================================================================
-class VTKM_ALWAYS_EXPORT ImplicitFunction : public vtkm::VirtualObjectBase
+#ifndef VTKM_NO_DEPRECATED_VIRTUAL
+class VTKM_DEPRECATED(1.6, "ImplicitFunction with virtual methods no longer supported.")
+  VTKM_ALWAYS_EXPORT ImplicitFunction : public vtkm::VirtualObjectBase
 {
 public:
  using Scalar = vtkm::FloatDefault;
@ -28,6 +39,7 @@ public:
  VTKM_EXEC_CONT virtual Scalar Value(const Vector& point) const = 0;
  VTKM_EXEC_CONT virtual Vector Gradient(const Vector& point) const = 0;

+  VTKM_DEPRECATED_SUPPRESS_BEGIN
  VTKM_EXEC_CONT Scalar Value(Scalar x, Scalar y, Scalar z) const
  {
    return this->Value(Vector(x, y, z));
@ -37,14 +49,60 @@ public:
  {
    return this->Gradient(Vector(x, y, z));
  }
+  VTKM_DEPRECATED_SUPPRESS_END
 };
+#endif // VTKM_NO_DEPRECATED_VIRTUAL

 //============================================================================
+namespace internal
+{
+
+/// \brief Base class for all `ImplicitFunction` classes.
+///
+/// `ImplicitFunctionBase` uses the curiously recurring template pattern (CRTP). Subclasses
+/// must provide their own type for the template parameter. Subclasses must implement
+/// `Value` and `Gradient` methods.
+///
+/// Also, all subclasses must be trivially copiable. This is so they can be copied among
+/// host and devices.
+///
+template <typename Derived>
+class ImplicitFunctionBase : public vtkm::cont::ExecutionAndControlObjectBase
+{
+public:
+  using Scalar = vtkm::FloatDefault;
+  using Vector = vtkm::Vec<Scalar, 3>;
+
+  VTKM_EXEC_CONT Scalar Value(Scalar x, Scalar y, Scalar z) const
+  {
+    return reinterpret_cast<const Derived*>(this)->Value(Vector(x, y, z));
+  }
+
+  VTKM_EXEC_CONT Vector Gradient(Scalar x, Scalar y, Scalar z) const
+  {
+    return reinterpret_cast<const Derived*>(this)->Gradient(Vector(x, y, z));
+  }
+
+  VTKM_CONT Derived PrepareForExecution(vtkm::cont::DeviceAdapterId, vtkm::cont::Token&) const
+  {
+    return *reinterpret_cast<const Derived*>(this);
+  }
+
+  VTKM_CONT Derived PrepareForControl() const { return *reinterpret_cast<const Derived*>(this); }
+};
+
+} // namespace vtkm::internal
+
+//============================================================================
+#ifndef VTKM_NO_DEPRECATED_VIRTUAL
+VTKM_DEPRECATED_SUPPRESS_BEGIN
+
 /// A helpful functor that calls the (virtual) value method of a given ImplicitFunction. Can be
 /// passed to things that expect a functor instead of an ImplictFunction class (like an array
 /// transform).
 ///
-class VTKM_ALWAYS_EXPORT ImplicitFunctionValue
+class VTKM_DEPRECATED(1.6,
+                      "Use ImplicitFunctionValueFunctor.") VTKM_ALWAYS_EXPORT ImplicitFunctionValue
 {
 public:
  using Scalar = vtkm::ImplicitFunction::Scalar;
@ -73,7 +131,8 @@ private:
 /// passed to things that expect a functor instead of an ImplictFunction class (like an array
 /// transform).
 ///
-class VTKM_ALWAYS_EXPORT ImplicitFunctionGradient
+class VTKM_DEPRECATED(1.6, "Use ImplicitFunctionGradientFunctor.")
+  VTKM_ALWAYS_EXPORT ImplicitFunctionGradient
 {
 public:
  using Scalar = vtkm::ImplicitFunction::Scalar;
@ -98,6 +157,76 @@ private:
  const vtkm::ImplicitFunction* Function;
 };

+VTKM_DEPRECATED_SUPPRESS_END
+#endif // VTKM_NO_DEPRECATED_VIRTUAL
+
+//============================================================================
+/// A helpful functor that calls the value method of a given `ImplicitFunction`. Can be
+/// passed to things that expect a functor instead of an `ImplictFunction` class (like an array
+/// transform).
+///
+template <typename FunctionType>
+class ImplicitFunctionValueFunctor
+{
+public:
+  using Scalar = typename FunctionType::Scalar;
+  using Vector = typename FunctionType::Vector;
+
+  ImplicitFunctionValueFunctor() = default;
+
+  VTKM_EXEC_CONT ImplicitFunctionValueFunctor(
+    const vtkm::internal::ImplicitFunctionBase<FunctionType>& function)
+    : Function(reinterpret_cast<const FunctionType&>(function))
+  {
+  }
+
+  VTKM_EXEC_CONT ImplicitFunctionValueFunctor(const FunctionType& function)
+    : Function(function)
+  {
+  }
+
+  VTKM_EXEC_CONT Scalar operator()(const Vector& point) const
+  {
+    return this->Function.Value(point);
+  }
+
+private:
+  FunctionType Function;
+};
+
+/// A helpful functor that calls the gradient method of a given `ImplicitFunction`. Can be
+/// passed to things that expect a functor instead of an `ImplictFunction` class (like an array
+/// transform).
+///
+template <typename FunctionType>
+class ImplicitFunctionGradientFunctor
+{
+public:
+  using Scalar = typename FunctionType::Scalar;
+  using Vector = typename FunctionType::Vector;
+
+  ImplicitFunctionGradientFunctor() = default;
+
+  VTKM_EXEC_CONT ImplicitFunctionGradientFunctor(
+    const vtkm::internal::ImplicitFunctionBase<FunctionType>& function)
+    : Function(reinterpret_cast<const FunctionType&>(function))
+  {
+  }
+
+  VTKM_EXEC_CONT ImplicitFunctionGradientFunctor(const FunctionType& function)
+    : Function(function)
+  {
+  }
+
+  VTKM_EXEC_CONT Vector operator()(const Vector& point) const
+  {
+    return this->Function->Gradient(point);
+  }
+
+private:
+  FunctionType Function;
+};
+
 //============================================================================
 /// \brief Implicit function for a box
 ///
@ -106,7 +235,7 @@ private:
 /// meeting along shared edges and all faces are orthogonal to the x-y-z
 /// coordinate axes.

-class VTKM_ALWAYS_EXPORT Box : public ImplicitFunction
+class VTKM_ALWAYS_EXPORT Box : public internal::ImplicitFunctionBase<Box>
 {
 public:
  /// \brief Construct box with center at (0,0,0) and each side of length 1.0.
@ -130,17 +259,9 @@ public:

  VTKM_CONT Box(const vtkm::Bounds& bounds) { this->SetBounds(bounds); }

-  VTKM_CONT void SetMinPoint(const Vector& point)
-  {
-    this->MinPoint = point;
-    this->Modified();
-  }
+  VTKM_CONT void SetMinPoint(const Vector& point) { this->MinPoint = point; }

-  VTKM_CONT void SetMaxPoint(const Vector& point)
-  {
-    this->MaxPoint = point;
-    this->Modified();
-  }
+  VTKM_CONT void SetMaxPoint(const Vector& point) { this->MaxPoint = point; }

  VTKM_EXEC_CONT const Vector& GetMinPoint() const { return this->MinPoint; }

@ -159,7 +280,7 @@ public:
                        vtkm::Range(this->MinPoint[2], this->MaxPoint[2]));
  }

-  VTKM_EXEC_CONT Scalar Value(const Vector& point) const final
+  VTKM_EXEC_CONT Scalar Value(const Vector& point) const
  {
    Scalar minDistance = vtkm::NegativeInfinity32();
    Scalar diff, t, dist;
@ -227,7 +348,7 @@ public:
    }
  }

-  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const final
+  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const
  {
    vtkm::IdComponent minAxis = 0;
    Scalar dist = 0.0;
@ -349,6 +470,11 @@ public:
    return normal;
  }

+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC Box* operator->() { return this; }
+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC const Box* operator->() const { return this; }
+
 private:
  Vector MinPoint;
  Vector MaxPoint;
@ -365,7 +491,7 @@ private:
 ///
 /// Note that the cylinder is infinite in extent.
 ///
-class VTKM_ALWAYS_EXPORT Cylinder final : public vtkm::ImplicitFunction
+class VTKM_ALWAYS_EXPORT Cylinder : public vtkm::internal::ImplicitFunctionBase<Cylinder>
 {
 public:
  /// Construct cylinder radius of 0.5; centered at origin with axis
@ -391,32 +517,20 @@ public:
  {
  }

-  VTKM_CONT void SetCenter(const Vector& center)
-  {
-    this->Center = center;
-    this->Modified();
-  }
+  VTKM_CONT void SetCenter(const Vector& center) { this->Center = center; }

-  VTKM_CONT void SetAxis(const Vector& axis)
-  {
-    this->Axis = vtkm::Normal(axis);
-    this->Modified();
-  }
+  VTKM_CONT void SetAxis(const Vector& axis) { this->Axis = vtkm::Normal(axis); }

-  VTKM_CONT void SetRadius(Scalar radius)
-  {
-    this->Radius = radius;
-    this->Modified();
-  }
+  VTKM_CONT void SetRadius(Scalar radius) { this->Radius = radius; }

-  VTKM_EXEC_CONT Scalar Value(const Vector& point) const final
+  VTKM_EXEC_CONT Scalar Value(const Vector& point) const
  {
    Vector x2c = point - this->Center;
    FloatDefault proj = vtkm::Dot(this->Axis, x2c);
    return vtkm::Dot(x2c, x2c) - (proj * proj) - (this->Radius * this->Radius);
  }

-  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const final
+  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const
  {
    Vector x2c = point - this->Center;
    FloatDefault t = this->Axis[0] * x2c[0] + this->Axis[1] * x2c[1] + this->Axis[2] * x2c[2];
@ -424,6 +538,10 @@ public:
    return (point - closestPoint) * FloatDefault(2);
  }

+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC Cylinder* operator->() { return this; }
+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC const Cylinder* operator->() const { return this; }

 private:
  Vector Center;
@ -433,7 +551,7 @@ private:

 //============================================================================
 /// \brief Implicit function for a frustum
-class VTKM_ALWAYS_EXPORT Frustum final : public vtkm::ImplicitFunction
+class VTKM_ALWAYS_EXPORT Frustum : public vtkm::internal::ImplicitFunctionBase<Frustum>
 {
 public:
  /// \brief Construct axis-aligned frustum with center at (0,0,0) and each side of length 1.0.
@ -456,7 +574,6 @@ public:
    {
      this->Normals[index] = normals[index];
    }
-    this->Modified();
  }

  VTKM_EXEC void SetPlane(int idx, const Vector& point, const Vector& normal)
@ -464,7 +581,6 @@ public:
    VTKM_ASSERT((idx >= 0) && (idx < 6));
    this->Points[idx] = point;
    this->Normals[idx] = normal;
-    this->Modified();
  }

  VTKM_EXEC_CONT void GetPlanes(Vector points[6], Vector normals[6]) const
@ -502,10 +618,9 @@ public:
      this->Points[i] = v0;
      this->Normals[i] = vtkm::Normal(vtkm::TriangleNormal(v0, v1, v2));
    }
-    this->Modified();
  }

-  VTKM_EXEC_CONT Scalar Value(const Vector& point) const final
+  VTKM_EXEC_CONT Scalar Value(const Vector& point) const
  {
    Scalar maxVal = vtkm::NegativeInfinity<Scalar>();
    for (vtkm::Id index : { 0, 1, 2, 3, 4, 5 })
@ -518,7 +633,7 @@ public:
    return maxVal;
  }

-  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const final
+  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const
  {
    Scalar maxVal = vtkm::NegativeInfinity<Scalar>();
    vtkm::Id maxValIdx = 0;
@ -536,6 +651,11 @@ public:
    return this->Normals[maxValIdx];
  }

+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC Frustum* operator->() { return this; }
+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC const Frustum* operator->() const { return this; }
+
 private:
  Vector Points[6] = { { -0.5f, 0.0f, 0.0f }, { 0.5f, 0.0f, 0.0f },  { 0.0f, -0.5f, 0.0f },
                       { 0.0f, 0.5f, 0.0f },  { 0.0f, 0.0f, -0.5f }, { 0.0f, 0.0f, 0.5f } };
@ -550,7 +670,7 @@ private:
 /// The normal does not have to be a unit vector. The implicit function will
 /// still evaluate to 0 at the plane, but the values outside the plane
 /// (and the gradient) will be scaled by the length of the normal vector.
-class VTKM_ALWAYS_EXPORT Plane final : public vtkm::ImplicitFunction
+class VTKM_ALWAYS_EXPORT Plane : public vtkm::internal::ImplicitFunctionBase<Plane>
 {
 public:
  /// Construct plane passing through origin and normal to z-axis.
@ -574,27 +694,24 @@ public:
  {
  }

-  VTKM_CONT void SetOrigin(const Vector& origin)
-  {
-    this->Origin = origin;
-    this->Modified();
-  }
+  VTKM_CONT void SetOrigin(const Vector& origin) { this->Origin = origin; }

-  VTKM_CONT void SetNormal(const Vector& normal)
-  {
-    this->Normal = normal;
-    this->Modified();
-  }
+  VTKM_CONT void SetNormal(const Vector& normal) { this->Normal = normal; }

  VTKM_EXEC_CONT const Vector& GetOrigin() const { return this->Origin; }
  VTKM_EXEC_CONT const Vector& GetNormal() const { return this->Normal; }

-  VTKM_EXEC_CONT Scalar Value(const Vector& point) const final
+  VTKM_EXEC_CONT Scalar Value(const Vector& point) const
  {
    return vtkm::Dot(point - this->Origin, this->Normal);
  }

-  VTKM_EXEC_CONT Vector Gradient(const Vector&) const final { return this->Normal; }
+  VTKM_EXEC_CONT Vector Gradient(const Vector&) const { return this->Normal; }
+
+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC Plane* operator->() { return this; }
+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC const Plane* operator->() const { return this; }

 private:
  Vector Origin;
@ -609,7 +726,7 @@ private:
 /// The value of the sphere implicit function is the square of the distance
 /// from the center biased by the radius (so the surface of the sphere is
 /// at value 0).
-class VTKM_ALWAYS_EXPORT Sphere final : public vtkm::ImplicitFunction
+class VTKM_ALWAYS_EXPORT Sphere : public vtkm::internal::ImplicitFunctionBase<Sphere>
 {
 public:
  /// Construct sphere with center at (0,0,0) and radius = 0.5.
@ -632,51 +749,143 @@ public:
  {
  }

-  VTKM_CONT void SetRadius(Scalar radius)
-  {
-    this->Radius = radius;
-    this->Modified();
-  }
+  VTKM_CONT void SetRadius(Scalar radius) { this->Radius = radius; }

-  VTKM_CONT void SetCenter(const Vector& center)
-  {
-    this->Center = center;
-    this->Modified();
-  }
+  VTKM_CONT void SetCenter(const Vector& center) { this->Center = center; }

  VTKM_EXEC_CONT Scalar GetRadius() const { return this->Radius; }

  VTKM_EXEC_CONT const Vector& GetCenter() const { return this->Center; }

-  VTKM_EXEC_CONT Scalar Value(const Vector& point) const final
+  VTKM_EXEC_CONT Scalar Value(const Vector& point) const
  {
    return vtkm::MagnitudeSquared(point - this->Center) - (this->Radius * this->Radius);
  }

-  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const final
+  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const
  {
    return Scalar(2) * (point - this->Center);
  }

+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC Sphere* operator->() { return this; }
+  VTKM_DEPRECATED(1.6, "ImplicitFunctions are no longer pointers. Use . operator.")
+  VTKM_EXEC const Sphere* operator->() const { return this; }
+
 private:
  Scalar Radius;
  Vector Center;
 };

+namespace detail
+{
+
+struct ImplicitFunctionValueFunctor
+{
+  template <typename ImplicitFunctionType>
+  VTKM_EXEC_CONT typename ImplicitFunctionType::Scalar operator()(
+    const ImplicitFunctionType& function,
+    const typename ImplicitFunctionType::Vector& point) const
+  {
+    return function.Value(point);
+  }
+};
+
+struct ImplicitFunctionGradientFunctor
+{
+  template <typename ImplicitFunctionType>
+  VTKM_EXEC_CONT typename ImplicitFunctionType::Vector operator()(
+    const ImplicitFunctionType& function,
+    const typename ImplicitFunctionType::Vector& point) const
+  {
+    return function.Gradient(point);
+  }
+};
+
+} // namespace detail
+
+//============================================================================
+/// \brief Implicit function that can switch among different types.
+///
+/// An `ImplicitFunctionMultiplexer` is a templated `ImplicitFunction` that takes
+/// as template arguments any number of other `ImplicitFunction`s that it can
+/// behave as. This allows you to decide at runtime which of these implicit
+/// functions to define and compute.
+///
+/// For example, let's say you want a filter that finds points either inside
+/// a sphere or inside a box. Rather than create 2 different filters, one for
+/// each type of implicit function, you can use `ImplicitFunctionMultiplexer<Sphere, Box>`
+/// and then set either a `Sphere` or a `Box` at runtime.
+///
+/// To use `ImplicitFunctionMultiplexer`, simply create the actual implicit
+/// function that you want to use, and then set the `ImplicitFunctionMultiplexer`
+/// to that concrete implicit function object.
+///
+template <typename... ImplicitFunctionTypes>
+class ImplicitFunctionMultiplexer
+  : public vtkm::internal::ImplicitFunctionBase<
+      ImplicitFunctionMultiplexer<ImplicitFunctionTypes...>>
+{
+  vtkm::exec::internal::Variant<ImplicitFunctionTypes...> Variant;
+
+  using Superclass =
+    vtkm::internal::ImplicitFunctionBase<ImplicitFunctionMultiplexer<ImplicitFunctionTypes...>>;
+
+public:
+  using Scalar = typename Superclass::Scalar;
+  using Vector = typename Superclass::Vector;
+
+  ImplicitFunctionMultiplexer() = default;
+
+  template <typename FunctionType>
+  VTKM_EXEC_CONT ImplicitFunctionMultiplexer(
+    const vtkm::internal::ImplicitFunctionBase<FunctionType>& function)
+    : Variant(reinterpret_cast<const FunctionType&>(function))
+  {
+  }
+
+  VTKM_EXEC_CONT Scalar Value(const Vector& point) const
+  {
+    return this->Variant.CastAndCall(detail::ImplicitFunctionValueFunctor{}, point);
+  }
+
+  VTKM_EXEC_CONT Vector Gradient(const Vector& point) const
+  {
+    return this->Variant.CastAndCall(detail::ImplicitFunctionGradientFunctor{}, point);
+  }
+};
+
+//============================================================================
+/// \brief Implicit function that can switch among known implicit function types.
+///
+/// `ImplicitFunctionGeneral` can behave as any of the predefined implicit functions
+/// provided by VTK-m. This is helpful when the type of implicit function is not
+/// known at compile time. For example, say you want a filter that can operate on
+/// an implicit function. Rather than compile separate versions of the filter, one
+/// for each type of implicit function, you can compile the filter once for
+/// `ImplicitFunctionGeneral` and then set the desired implicit function at runtime.
+///
+/// To use `ImplicitFunctionGeneral`, simply create the actual implicit
+/// function that you want to use, and then set the `ImplicitFunctionGeneral`
+/// to that concrete implicit function object.
+///
+class ImplicitFunctionGeneral
+  : public vtkm::ImplicitFunctionMultiplexer<vtkm::Box,
+                                             vtkm::Cylinder,
+                                             vtkm::Frustum,
+                                             vtkm::Plane,
+                                             vtkm::Sphere>
+{
+  using Superclass = vtkm::ImplicitFunctionMultiplexer<vtkm::Box,
+                                                       vtkm::Cylinder,
+                                                       vtkm::Frustum,
+                                                       vtkm::Plane,
+                                                       vtkm::Sphere>;
+
+public:
+  using Superclass::Superclass;
+};
+
 } // namespace vtkm

-// Cuda seems to have a bug where it expects the template class VirtualObjectTransfer
-// to be instantiated in a consistent order among all the translation units of an
-// executable. Failing to do so results in random crashes and incorrect results.
-// We workaroud this issue by explicitly instantiating VirtualObjectTransfer for
-// all the implicit functions here.
-#ifdef VTKM_CUDA
-#include <vtkm/cont/internal/VirtualObjectTransferInstantiate.h>
-VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Box);
-VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Cylinder);
-VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Frustum);
-VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Plane);
-VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Sphere);
-#endif
-
 #endif //vtk_m_ImplicitFunction_h
--- a/vtkm/Math.h
+++ b/vtkm/Math.h
@ -1722,7 +1722,7 @@ static inline VTKM_EXEC_CONT vtkm::Float64 Max(vtkm::Float64 x, vtkm::Float64 y)
 ///
 template <typename T>
 static inline VTKM_EXEC_CONT T Min(const T& x, const T& y);
-#ifdef VTKM_USE_STL
+#if defined(VTKM_USE_STL) && !defined(VTKM_HIP)
 static inline VTKM_EXEC_CONT vtkm::Float32 Min(vtkm::Float32 x, vtkm::Float32 y)
 {
  return (std::min)(x, y);
@ -1731,7 +1731,7 @@ static inline VTKM_EXEC_CONT vtkm::Float64 Min(vtkm::Float64 x, vtkm::Float64 y)
 {
  return (std::min)(x, y);
 }
-#else // !VTKM_USE_STL
+#else // !VTKM_USE_STL OR HIP
 static inline VTKM_EXEC_CONT vtkm::Float32 Min(vtkm::Float32 x, vtkm::Float32 y)
 {
 #ifdef VTKM_CUDA
@ -2562,7 +2562,8 @@ static inline VTKM_EXEC_CONT vtkm::Vec<T, N> CopySign(const vtkm::Vec<T, N>& x,

 inline VTKM_EXEC_CONT vtkm::Float32 Frexp(vtkm::Float32 x, vtkm::Int32 *exponent)
 {
-#ifdef VTKM_CUDA
+  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
+#if defined(VTKM_CUDA) || defined(VTKM_HIP)
  return VTKM_CUDA_MATH_FUNCTION_32(frexp)(x, exponent);
 #else
  return std::frexp(x, exponent);
--- a/vtkm/Math.h.in
+++ b/vtkm/Math.h.in
@ -564,10 +564,10 @@ $#
 ///
 template <typename T>
 static inline VTKM_EXEC_CONT T Min(const T& x, const T& y);
-#ifdef VTKM_USE_STL
+#if defined(VTKM_USE_STL) && !defined(VTKM_HIP)
 $binary_template_function('Min', '(std::min)(x, y)')\
 $#
-#else // !VTKM_USE_STL
+#else // !VTKM_USE_STL OR HIP
 $binary_math_function('Min', 'fmin')\
 $#
 #endif // !VTKM_USE_STL
@ -1164,7 +1164,8 @@ static inline VTKM_EXEC_CONT vtkm::Vec<T, N> CopySign(const vtkm::Vec<T, N>& x,

 inline VTKM_EXEC_CONT vtkm::Float32 Frexp(vtkm::Float32 x, vtkm::Int32 *exponent)
 {
-#ifdef VTKM_CUDA
+  // See: https://github.com/ROCm-Developer-Tools/HIP/issues/2169
+#if defined(VTKM_CUDA) || defined(VTKM_HIP)
  return VTKM_CUDA_MATH_FUNCTION_32(frexp)(x, exponent);
 #else
  return std::frexp(x, exponent);
--- a/vtkm/Matrix.h
+++ b/vtkm/Matrix.h
@ -298,9 +298,23 @@ VTKM_EXEC_CONT void MatrixLUPFactorFindUpperTriangleElements(vtkm::Matrix<T, Siz
                                                             vtkm::IdComponent topCornerIndex)
 {
  // Compute values for upper triangle on row topCornerIndex
-  for (vtkm::IdComponent colIndex = topCornerIndex + 1; colIndex < Size; colIndex++)
+  if (A(topCornerIndex, topCornerIndex) == 0)
  {
-    A(topCornerIndex, colIndex) /= A(topCornerIndex, topCornerIndex);
+    for (vtkm::IdComponent colIndex = topCornerIndex + 1; colIndex < Size; colIndex++)
+    {
+      A(topCornerIndex, colIndex) = std::numeric_limits<T>::quiet_NaN();
+    }
+  }
+  else
+  {
+    // Let's make the reciprocal approximation here.
+    // Doesn't make things much fast for small 'Size',
+    // but definitely improves performance as 'Size' gets large.
+    T rAdiag = 1 / A(topCornerIndex, topCornerIndex);
+    for (vtkm::IdComponent colIndex = topCornerIndex + 1; colIndex < Size; colIndex++)
+    {
+      A(topCornerIndex, colIndex) *= rAdiag;
+    }
  }

  // Update the rest of the matrix for calculations on subsequent rows
@ -314,7 +328,7 @@ VTKM_EXEC_CONT void MatrixLUPFactorFindUpperTriangleElements(vtkm::Matrix<T, Siz
 }

 /// Performs an LUP-factorization on the given matrix using Crout's method. The
-/// LU-factorization takes a matrix A and decomposes it into a lower triangular
+/// LU-factorization takes a matrix A  and decomposes it into a lower triangular
 /// matrix L and upper triangular matrix U such that A = LU. The
 /// LUP-factorization also allows permutation of A, which makes the
 /// decomposition always possible so long as A is not singular. In addition to
@ -389,7 +403,14 @@ VTKM_EXEC_CONT vtkm::Vec<T, Size> MatrixLUPSolve(
    {
      y[rowIndex] -= LU(rowIndex, colIndex) * y[colIndex];
    }
-    y[rowIndex] /= LU(rowIndex, rowIndex);
+    if (LU(rowIndex, rowIndex) == 0)
+    {
+      y[rowIndex] = std::numeric_limits<T>::quiet_NaN();
+    }
+    else
+    {
+      y[rowIndex] /= LU(rowIndex, rowIndex);
+    }
  }

  // Now that we have y, we can easily solve Ux = y for x.
--- a/vtkm/Particle.h
+++ b/vtkm/Particle.h
@ -50,79 +50,30 @@ public:
  VTKM_EXEC_CONT void ClearTookAnySteps() { this->reset(this->TOOK_ANY_STEPS_BIT); }
  VTKM_EXEC_CONT bool CheckTookAnySteps() const { return this->test(this->TOOK_ANY_STEPS_BIT); }

+  VTKM_EXEC_CONT void SetInGhostCell() { this->set(this->IN_GHOST_CELL_BIT); }
+  VTKM_EXEC_CONT void ClearInGhostCell() { this->reset(this->IN_GHOST_CELL_BIT); }
+  VTKM_EXEC_CONT bool CheckInGhostCell() const { return this->test(this->IN_GHOST_CELL_BIT); }
+
 private:
  static constexpr vtkm::Id SUCCESS_BIT = 0;
  static constexpr vtkm::Id TERMINATE_BIT = 1;
  static constexpr vtkm::Id SPATIAL_BOUNDS_BIT = 2;
  static constexpr vtkm::Id TEMPORAL_BOUNDS_BIT = 3;
  static constexpr vtkm::Id TOOK_ANY_STEPS_BIT = 4;
+  static constexpr vtkm::Id IN_GHOST_CELL_BIT = 5;
 };

 inline VTKM_CONT std::ostream& operator<<(std::ostream& s, const vtkm::ParticleStatus& status)
 {
-  s << "[" << status.CheckOk() << " " << status.CheckTerminate() << " "
-    << status.CheckSpatialBounds() << " " << status.CheckTemporalBounds() << "]";
+  s << "[ok= " << status.CheckOk();
+  s << " term= " << status.CheckTerminate();
+  s << " spat= " << status.CheckSpatialBounds();
+  s << " temp= " << status.CheckTemporalBounds();
+  s << " ghst= " << status.CheckInGhostCell();
+  s << "]";
  return s;
 }

-class ParticleBase
-{
-public:
-  VTKM_EXEC_CONT
-  ParticleBase() {}
-
-  VTKM_EXEC_CONT virtual ~ParticleBase() noexcept
-  {
-    // This must not be defaulted, since defaulted virtual destructors are
-    // troublesome with CUDA __host__ __device__ markup.
-  }
-
-  VTKM_EXEC_CONT
-  ParticleBase(const vtkm::Vec3f& p,
-               const vtkm::Id& id,
-               const vtkm::Id& numSteps = 0,
-               const vtkm::ParticleStatus& status = vtkm::ParticleStatus(),
-               const vtkm::FloatDefault& time = 0)
-    : Pos(p)
-    , ID(id)
-    , NumSteps(numSteps)
-    , Status(status)
-    , Time(time)
-  {
-  }
-
-  VTKM_EXEC_CONT
-  ParticleBase(const vtkm::ParticleBase& p)
-    : Pos(p.Pos)
-    , ID(p.ID)
-    , NumSteps(p.NumSteps)
-    , Status(p.Status)
-    , Time(p.Time)
-  {
-  }
-
-  vtkm::ParticleBase& operator=(const vtkm::ParticleBase&) = default;
-
-  // The basic particle is only meant to be advected in a velocity
-  // field. In that case it is safe to assume that the velocity value
-  // will always be stored in the first location of vectors
-  VTKM_EXEC_CONT
-  virtual vtkm::Vec3f Next(const vtkm::VecVariable<vtkm::Vec3f, 2>&, const vtkm::FloatDefault&) = 0;
-
-  // The basic particle is only meant to be advected in a velocity
-  // field. In that case it is safe to assume that the velocity value
-  // will always be stored in the first location of vectors
-  VTKM_EXEC_CONT
-  virtual vtkm::Vec3f Velocity(const vtkm::VecVariable<vtkm::Vec3f, 2>&,
-                               const vtkm::FloatDefault&) = 0;
-
-  vtkm::Vec3f Pos;
-  vtkm::Id ID = -1;
-  vtkm::Id NumSteps = 0;
-  vtkm::ParticleStatus Status;
-  vtkm::FloatDefault Time = 0;
-};
-
 class Particle
 {
 public:
@ -186,7 +137,7 @@ public:
  vtkm::FloatDefault Time = 0;
 };

-class Electron : public vtkm::ParticleBase
+class Electron
 {
 public:
  VTKM_EXEC_CONT
--- a/vtkm/Swap.h
+++ b/vtkm/Swap.h
@ -13,7 +13,7 @@

 #include <vtkm/internal/ExportMacros.h>

-#ifdef __CUDACC__
+#ifdef VTKM_CUDA
 #include <thrust/swap.h>
 #else
 #include <algorithm>
@ -23,13 +23,27 @@ namespace vtkm
 {

 /// Performs a swap operation. Safe to call from cuda code.
-#ifdef __CUDACC__
+#if defined(VTKM_CUDA)
 template <typename T>
 VTKM_EXEC_CONT void Swap(T& a, T& b)
 {
  using namespace thrust;
  swap(a, b);
 }
+#elif defined(VTKM_HIP)
+template <typename T>
+__host__ void Swap(T& a, T& b)
+{
+  using namespace std;
+  swap(a, b);
+}
+template <typename T>
+__device__ void Swap(T& a, T& b)
+{
+  T temp = a;
+  a = b;
+  b = temp;
+}
 #else
 template <typename T>
 VTKM_EXEC_CONT void Swap(T& a, T& b)
--- a/vtkm/TypeList.h
+++ b/vtkm/TypeList.h
@ -86,7 +86,12 @@ using TypeListField = vtkm::List<vtkm::Float32,
                                 vtkm::Vec4f_64>;

 /// A list of all scalars defined in vtkm/Types.h. A scalar is a type that
-/// holds a single number.
+/// holds a single number. This should containing all true variations of
+/// scalars, but there might be some arithmetic C types not included. For
+/// example, this list contains `signed char`, and `unsigned char`, but not
+/// `char` as one of those types will behave the same as it. Two of the three
+/// types behave the same, but be aware that template resolution will treat
+/// them differently.
 ///
 using TypeListScalarAll = vtkm::List<vtkm::Int8,
                                     vtkm::UInt8,
@ -99,6 +104,15 @@ using TypeListScalarAll = vtkm::List<vtkm::Int8,
                                     vtkm::Float32,
                                     vtkm::Float64>;

+// A list that containes all the base arithmetric C types (i.e. char, int, float, etc.).
+// The list contains C types that are functionally equivalent but considered different
+// types (e.g. it contains both `char` and `signed char`).
+using TypeListBaseC = vtkm::ListAppend<
+  vtkm::TypeListScalarAll,
+  // Other base C types that are the same as above but
+  // recognized as different by the compiler
+  vtkm::List<bool, char, signed VTKM_UNUSED_INT_TYPE, unsigned VTKM_UNUSED_INT_TYPE>>;
+
 /// A list of the most commonly use Vec classes. Specifically, these are
 /// vectors of size 2, 3, or 4 containing either unsigned bytes, signed
 /// integers of 32 or 64 bits, or floating point values of 32 or 64 bits.
--- a/vtkm/TypeTraits.h
+++ b/vtkm/TypeTraits.h
@ -110,6 +110,7 @@ struct TypeTraits<const T> : TypeTraits<T>
 VTKM_BASIC_REAL_TYPE(float)
 VTKM_BASIC_REAL_TYPE(double)

+VTKM_BASIC_INTEGER_TYPE(bool)
 VTKM_BASIC_INTEGER_TYPE(char)
 VTKM_BASIC_INTEGER_TYPE(signed char)
 VTKM_BASIC_INTEGER_TYPE(unsigned char)
--- a/vtkm/Types.h
+++ b/vtkm/Types.h
@ -173,11 +173,13 @@ using WordTypeDefault = vtkm::UInt32;

 //In this order so that we exactly match the logic that exists in VTK
 #if VTKM_SIZE_LONG_LONG == 8
-using Int64 = long long;
+using Int64 = signed long long;
 using UInt64 = unsigned long long;
+#define VTKM_UNUSED_INT_TYPE long
 #elif VTKM_SIZE_LONG == 8
 using Int64 = signed long;
 using UInt64 = unsigned long;
+#define VTKM_UNUSED_INT_TYPE long long
 #else
 #error Could not find a 64-bit integer.
 #endif
@ -341,15 +343,14 @@ public:
    }
  }

-  template <typename OtherComponentType, typename OtherVecType>
-  VTKM_EXEC_CONT DerivedClass& operator=(
-    const vtkm::detail::VecBaseCommon<OtherComponentType, OtherVecType>& src)
+  // Only works with Vec-like objects with operator[] and GetNumberOfComponents().
+  template <typename OtherVecType>
+  VTKM_EXEC_CONT DerivedClass& operator=(const OtherVecType& src)
  {
-    const OtherVecType& srcDerived = static_cast<const OtherVecType&>(src);
-    VTKM_ASSERT(this->NumComponents() == srcDerived.GetNumberOfComponents());
+    VTKM_ASSERT(this->NumComponents() == src.GetNumberOfComponents());
    for (vtkm::IdComponent i = 0; i < this->NumComponents(); ++i)
    {
-      this->Component(i) = OtherComponentType(srcDerived[i]);
+      this->Component(i) = src[i];
    }
    return this->Derived();
  }
@ -411,14 +412,12 @@ public:
  }

  template <typename OtherClass>
-  inline VTKM_EXEC_CONT DerivedClass& operator+=(
-    const VecBaseCommon<ComponentType, OtherClass>& other)
+  inline VTKM_EXEC_CONT DerivedClass& operator+=(const OtherClass& other)
  {
-    const OtherClass& other_derived = static_cast<const OtherClass&>(other);
-    VTKM_ASSERT(this->NumComponents() == other_derived.GetNumberOfComponents());
+    VTKM_ASSERT(this->NumComponents() == other.GetNumberOfComponents());
    for (vtkm::IdComponent i = 0; i < this->NumComponents(); ++i)
    {
-      this->Component(i) += other_derived[i];
+      this->Component(i) += other[i];
    }
    return this->Derived();
  }
@ -437,14 +436,12 @@ public:
  }

  template <typename OtherClass>
-  inline VTKM_EXEC_CONT DerivedClass& operator-=(
-    const VecBaseCommon<ComponentType, OtherClass>& other)
+  inline VTKM_EXEC_CONT DerivedClass& operator-=(const OtherClass& other)
  {
-    const OtherClass& other_derived = static_cast<const OtherClass&>(other);
-    VTKM_ASSERT(this->NumComponents() == other_derived.GetNumberOfComponents());
+    VTKM_ASSERT(this->NumComponents() == other.GetNumberOfComponents());
    for (vtkm::IdComponent i = 0; i < this->NumComponents(); ++i)
    {
-      this->Component(i) -= other_derived[i];
+      this->Component(i) -= other[i];
    }
    return this->Derived();
  }
@ -462,14 +459,12 @@ public:
  }

  template <typename OtherClass>
-  inline VTKM_EXEC_CONT DerivedClass& operator*=(
-    const VecBaseCommon<ComponentType, OtherClass>& other)
+  inline VTKM_EXEC_CONT DerivedClass& operator*=(const OtherClass& other)
  {
-    const OtherClass& other_derived = static_cast<const OtherClass&>(other);
-    VTKM_ASSERT(this->NumComponents() == other_derived.GetNumberOfComponents());
+    VTKM_ASSERT(this->NumComponents() == other.GetNumberOfComponents());
    for (vtkm::IdComponent i = 0; i < this->NumComponents(); ++i)
    {
-      this->Component(i) *= other_derived[i];
+      this->Component(i) *= other[i];
    }
    return this->Derived();
  }
@ -487,13 +482,12 @@ public:
  }

  template <typename OtherClass>
-  VTKM_EXEC_CONT DerivedClass& operator/=(const VecBaseCommon<ComponentType, OtherClass>& other)
+  VTKM_EXEC_CONT DerivedClass& operator/=(const OtherClass& other)
  {
-    const OtherClass& other_derived = static_cast<const OtherClass&>(other);
-    VTKM_ASSERT(this->NumComponents() == other_derived.GetNumberOfComponents());
+    VTKM_ASSERT(this->NumComponents() == other.GetNumberOfComponents());
    for (vtkm::IdComponent i = 0; i < this->NumComponents(); ++i)
    {
-      this->Component(i) /= other_derived[i];
+      this->Component(i) /= other[i];
    }
    return this->Derived();
  }
--- a/vtkm/VecAxisAlignedPointCoordinates.h
+++ b/vtkm/VecAxisAlignedPointCoordinates.h
@ -167,7 +167,7 @@ struct VecTraits<vtkm::VecAxisAlignedPointCoordinates<NumDimensions>>
  template <typename NewComponentType>
  using ReplaceComponentType = vtkm::Vec<NewComponentType, NUM_COMPONENTS>;
  template <typename NewComponentType>
-  using ReplaceBaseComponenttype = vtkm::Vec<vtkm::Vec<NewComponentType, 3>, NUM_COMPONENTS>;
+  using ReplaceBaseComponentType = vtkm::Vec<vtkm::Vec<NewComponentType, 3>, NUM_COMPONENTS>;

  template <vtkm::IdComponent destSize>
  VTKM_EXEC_CONT static void CopyInto(const VecType& src, vtkm::Vec<ComponentType, destSize>& dest)
@ -176,6 +176,21 @@ struct VecTraits<vtkm::VecAxisAlignedPointCoordinates<NumDimensions>>
  }
 };

+/// Helper function for printing out vectors during testing.
+///
+template <vtkm::IdComponent NumDimensions>
+inline VTKM_CONT std::ostream& operator<<(
+  std::ostream& stream,
+  const vtkm::VecAxisAlignedPointCoordinates<NumDimensions>& vec)
+{
+  stream << "[";
+  for (vtkm::IdComponent component = 0; component < vec.NUM_COMPONENTS - 1; component++)
+  {
+    stream << vec[component] << ",";
+  }
+  return stream << vec[vec.NUM_COMPONENTS - 1] << "]";
+}
+
 } // namespace vtkm

 #endif //vtk_m_VecAxisAlignedPointCoordinates_h
--- a/vtkm/VecFlat.h
+++ b/vtkm/VecFlat.h
@ -0,0 +1,298 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+#ifndef vtk_m_VecFlat_h
+#define vtk_m_VecFlat_h
+
+#include <vtkm/StaticAssert.h>
+#include <vtkm/TypeTraits.h>
+#include <vtkm/Types.h>
+#include <vtkm/VecTraits.h>
+
+namespace vtkm
+{
+
+namespace internal
+{
+
+template <typename T,
+          typename MultipleComponents = typename vtkm::VecTraits<T>::HasMultipleComponents>
+struct TotalNumComponents;
+
+template <typename T>
+struct TotalNumComponents<T, vtkm::VecTraitsTagMultipleComponents>
+{
+  VTKM_STATIC_ASSERT_MSG(
+    (std::is_same<typename vtkm::VecTraits<T>::IsSizeStatic, vtkm::VecTraitsTagSizeStatic>::value),
+    "vtkm::VecFlat can only be used with Vec types with a static number of components.");
+  using ComponentType = typename vtkm::VecTraits<T>::ComponentType;
+  static constexpr vtkm::IdComponent value =
+    vtkm::VecTraits<T>::NUM_COMPONENTS * TotalNumComponents<ComponentType>::value;
+};
+
+template <typename T>
+struct TotalNumComponents<T, vtkm::VecTraitsTagSingleComponent>
+{
+  static constexpr vtkm::IdComponent value = 1;
+};
+
+template <typename T>
+using FlattenVec = vtkm::Vec<typename vtkm::VecTraits<T>::BaseComponentType,
+                             vtkm::internal::TotalNumComponents<T>::value>;
+
+template <typename T>
+using IsFlatVec = typename std::is_same<T, FlattenVec<T>>::type;
+
+namespace detail
+{
+
+template <typename T>
+VTKM_EXEC_CONT T GetFlatVecComponentImpl(const T& component,
+                                         vtkm::IdComponent index,
+                                         std::true_type vtkmNotUsed(isBase))
+{
+  VTKM_ASSERT(index == 0);
+  return component;
+}
+
+template <typename T>
+VTKM_EXEC_CONT typename vtkm::VecTraits<T>::BaseComponentType
+GetFlatVecComponentImpl(const T& vec, vtkm::IdComponent index, std::false_type vtkmNotUsed(isBase))
+{
+  using Traits = vtkm::VecTraits<T>;
+  using ComponentType = typename Traits::ComponentType;
+  using BaseComponentType = typename Traits::BaseComponentType;
+
+  constexpr vtkm::IdComponent subSize = TotalNumComponents<ComponentType>::value;
+  return GetFlatVecComponentImpl(Traits::GetComponent(vec, index / subSize),
+                                 index % subSize,
+                                 typename std::is_same<ComponentType, BaseComponentType>::type{});
+}
+
+} // namespace detail
+
+template <typename T>
+VTKM_EXEC_CONT typename vtkm::VecTraits<T>::BaseComponentType GetFlatVecComponent(
+  const T& vec,
+  vtkm::IdComponent index)
+{
+  return detail::GetFlatVecComponentImpl(vec, index, std::false_type{});
+}
+
+namespace detail
+{
+
+template <typename T, vtkm::IdComponent N>
+VTKM_EXEC_CONT void CopyVecNestedToFlatImpl(T nestedVec,
+                                            vtkm::Vec<T, N>& flatVec,
+                                            vtkm::IdComponent flatOffset)
+{
+  flatVec[flatOffset] = nestedVec;
+}
+
+template <typename T, vtkm::IdComponent NFlat, vtkm::IdComponent NNest>
+VTKM_EXEC_CONT void CopyVecNestedToFlatImpl(const vtkm::Vec<T, NNest>& nestedVec,
+                                            vtkm::Vec<T, NFlat>& flatVec,
+                                            vtkm::IdComponent flatOffset)
+{
+  for (vtkm::IdComponent nestedIndex = 0; nestedIndex < NNest; ++nestedIndex)
+  {
+    flatVec[nestedIndex + flatOffset] = nestedVec[nestedIndex];
+  }
+}
+
+template <typename T, vtkm::IdComponent N, typename NestedVecType>
+VTKM_EXEC_CONT void CopyVecNestedToFlatImpl(const NestedVecType& nestedVec,
+                                            vtkm::Vec<T, N>& flatVec,
+                                            vtkm::IdComponent flatOffset)
+{
+  using Traits = vtkm::VecTraits<NestedVecType>;
+  using ComponentType = typename Traits::ComponentType;
+  constexpr vtkm::IdComponent subSize = TotalNumComponents<ComponentType>::value;
+
+  vtkm::IdComponent flatIndex = flatOffset;
+  for (vtkm::IdComponent nestIndex = 0; nestIndex < Traits::NUM_COMPONENTS; ++nestIndex)
+  {
+    CopyVecNestedToFlatImpl(Traits::GetComponent(nestedVec, nestIndex), flatVec, flatIndex);
+    flatIndex += subSize;
+  }
+}
+
+} // namespace detail
+
+template <typename T, vtkm::IdComponent N, typename NestedVecType>
+VTKM_EXEC_CONT void CopyVecNestedToFlat(const NestedVecType& nestedVec, vtkm::Vec<T, N>& flatVec)
+{
+  detail::CopyVecNestedToFlatImpl(nestedVec, flatVec, 0);
+}
+
+namespace detail
+{
+
+template <typename T, vtkm::IdComponent N>
+VTKM_EXEC_CONT void CopyVecFlatToNestedImpl(const vtkm::Vec<T, N>& flatVec,
+                                            vtkm::IdComponent flatOffset,
+                                            T& nestedVec)
+{
+  nestedVec = flatVec[flatOffset];
+}
+
+template <typename T, vtkm::IdComponent NFlat, vtkm::IdComponent NNest>
+VTKM_EXEC_CONT void CopyVecFlatToNestedImpl(const vtkm::Vec<T, NFlat>& flatVec,
+                                            vtkm::IdComponent flatOffset,
+                                            vtkm::Vec<T, NNest>& nestedVec)
+{
+  for (vtkm::IdComponent nestedIndex = 0; nestedIndex < NNest; ++nestedIndex)
+  {
+    nestedVec[nestedIndex] = flatVec[nestedIndex + flatOffset];
+  }
+}
+
+template <typename T, vtkm::IdComponent NFlat, typename ComponentType, vtkm::IdComponent NNest>
+VTKM_EXEC_CONT void CopyVecFlatToNestedImpl(const vtkm::Vec<T, NFlat>& flatVec,
+                                            vtkm::IdComponent flatOffset,
+                                            vtkm::Vec<ComponentType, NNest>& nestedVec)
+{
+  constexpr vtkm::IdComponent subSize = TotalNumComponents<ComponentType>::value;
+
+  vtkm::IdComponent flatIndex = flatOffset;
+  for (vtkm::IdComponent nestIndex = 0; nestIndex < NNest; ++nestIndex)
+  {
+    CopyVecFlatToNestedImpl(flatVec, flatIndex, nestedVec[nestIndex]);
+    flatIndex += subSize;
+  }
+}
+
+template <typename T, vtkm::IdComponent N, typename NestedVecType>
+VTKM_EXEC_CONT void CopyVecFlatToNestedImpl(const vtkm::Vec<T, N>& flatVec,
+                                            vtkm::IdComponent flatOffset,
+                                            NestedVecType& nestedVec)
+{
+  using Traits = vtkm::VecTraits<NestedVecType>;
+  using ComponentType = typename Traits::ComponentType;
+  constexpr vtkm::IdComponent subSize = TotalNumComponents<ComponentType>::value;
+
+  vtkm::IdComponent flatIndex = flatOffset;
+  for (vtkm::IdComponent nestIndex = 0; nestIndex < Traits::NUM_COMPONENTS; ++nestIndex)
+  {
+    ComponentType component;
+    CopyVecFlatToNestedImpl(flatVec, flatIndex, component);
+    Traits::SetComponent(nestedVec, nestIndex, component);
+    flatIndex += subSize;
+  }
+}
+
+} // namespace detail
+
+template <typename T, vtkm::IdComponent N, typename NestedVecType>
+VTKM_EXEC_CONT void CopyVecFlatToNested(const vtkm::Vec<T, N>& flatVec, NestedVecType& nestedVec)
+{
+  detail::CopyVecFlatToNestedImpl(flatVec, 0, nestedVec);
+}
+
+} // namespace internal
+
+/// \brief Treat a `Vec` or `Vec`-like object as a flat `Vec`.
+///
+/// The `VecFlat` template wraps around another object that is a nested `Vec` object
+/// (that is, a vector of vectors) and treats it like a flat, 1 dimensional `Vec`.
+/// For example, let's say that you have a `Vec` of size 3 holding `Vec`s of size 2.
+///
+/// ```cpp
+/// void Foo(const vtkm::Vec<vtkm::Vec<vtkm::Id, 2>, 3>& nestedVec)
+/// {
+///   auto flatVec = vtkm::make_VecFlat(nestedVec);
+/// ```
+///
+/// `flatVec` is now of type `vtkm::VecFlat<vtkm::Vec<vtkm::Vec<T, 2>, 3>.
+/// `flatVec::NUM_COMPONENTS` is 6 (3 * 2). The `[]` operator takes an index between
+/// 0 and 5 and returns a value of type `vtkm::Id`. The indices are explored in
+/// depth-first order. So `flatVec[0] == nestedVec[0][0]`, `flatVec[1] == nestedVec[0][1]`,
+/// `flatVec[2] == nestedVec[1][0]`, and so on.
+///
+/// Note that `flatVec` only works with types that have `VecTraits` defined where
+/// the `IsSizeStatic` field is `vtkm::VecTraitsTagSizeStatic` (that is, the `NUM_COMPONENTS`
+/// constant is defined).
+///
+template <typename T, bool = internal::IsFlatVec<T>::value>
+class VecFlat;
+
+// Case where T is not a vtkm::Vec<T, N> where T is not a Vec.
+template <typename T>
+class VecFlat<T, false> : public internal::FlattenVec<T>
+{
+  using Superclass = internal::FlattenVec<T>;
+
+public:
+  using Superclass::Superclass;
+  VecFlat() = default;
+
+  VTKM_EXEC_CONT VecFlat(const T& src) { *this = src; }
+
+  VTKM_EXEC_CONT VecFlat& operator=(const T& src)
+  {
+    internal::CopyVecNestedToFlat(src, *this);
+    return *this;
+  }
+
+  VTKM_EXEC_CONT operator T() const
+  {
+    T nestedVec;
+    internal::CopyVecFlatToNested(*this, nestedVec);
+    return nestedVec;
+  }
+};
+
+// Specialization of VecFlat where the Vec is already flat Vec
+template <typename T>
+class VecFlat<T, true> : public T
+{
+public:
+  using T::T;
+  VecFlat() = default;
+
+  VTKM_EXEC_CONT VecFlat(const T& src)
+    : T(src)
+  {
+  }
+
+  VTKM_EXEC_CONT VecFlat& operator=(const T& src)
+  {
+    this->T::operator=(src);
+    return *this;
+  }
+
+  VTKM_EXEC_CONT VecFlat& operator=(T&& src)
+  {
+    this->T::operator=(std::move(src));
+    return *this;
+  }
+};
+
+/// \brief Converts a `Vec`-like object to a `VecFlat`.
+///
+template <typename T>
+VTKM_EXEC_CONT vtkm::VecFlat<T> make_VecFlat(const T& vec)
+{
+  return vtkm::VecFlat<T>(vec);
+}
+
+template <typename T>
+struct TypeTraits<vtkm::VecFlat<T>> : TypeTraits<internal::FlattenVec<T>>
+{
+};
+
+template <typename T>
+struct VecTraits<vtkm::VecFlat<T>> : VecTraits<internal::FlattenVec<T>>
+{
+};
+
+} // namespace vtkm
+
+#endif //vtk_m_VecFlat_h
--- a/vtkm/cont/Algorithm.h
+++ b/vtkm/cont/Algorithm.h
@ -12,11 +12,11 @@

 #include <vtkm/Types.h>

+#include <vtkm/cont/BitField.h>
 #include <vtkm/cont/DeviceAdapter.h>
 #include <vtkm/cont/ExecutionObjectBase.h>
 #include <vtkm/cont/Token.h>
 #include <vtkm/cont/TryExecute.h>
-#include <vtkm/cont/internal/ArrayManagerExecution.h>


 namespace vtkm
--- a/vtkm/cont/ArrayCopy.cxx
+++ b/vtkm/cont/ArrayCopy.cxx
@ -0,0 +1,133 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+
+#include <vtkm/cont/ArrayCopy.h>
+#include <vtkm/cont/DeviceAdapterList.h>
+#include <vtkm/cont/Invoker.h>
+
+#include <vtkm/worklet/WorkletMapField.h>
+
+namespace
+{
+
+// Use a worklet because device adapter copies often have an issue with casting the values from the
+// `ArrayHandleRecomineVec` that comes from `UnknownArrayHandle::CastAndCallWithExtractedArray`.
+struct CopyWorklet : vtkm::worklet::WorkletMapField
+{
+  using ControlSignature = void(FieldIn, FieldOut);
+  using ExecutionSignature = void(_1, _2);
+  using InputDomain = _1;
+
+  template <typename InType, typename OutType>
+  VTKM_EXEC void operator()(const InType& in, OutType& out) const
+  {
+    out = in;
+  }
+};
+
+struct UnknownCopyOnDevice
+{
+  bool Called = false;
+
+  template <typename InArrayType, typename OutArrayType>
+  void operator()(vtkm::cont::DeviceAdapterId device,
+                  const InArrayType& in,
+                  const OutArrayType& out)
+  {
+    if (!this->Called && ((device == vtkm::cont::DeviceAdapterTagAny{}) || (in.IsOnDevice(device))))
+    {
+      vtkm::cont::Invoker invoke(device);
+      invoke(CopyWorklet{}, in, out);
+      this->Called = true;
+    }
+  }
+};
+
+struct UnknownCopyFunctor2
+{
+  template <typename OutArrayType, typename InArrayType>
+  void operator()(const OutArrayType& out, const InArrayType& in) const
+  {
+    UnknownCopyOnDevice doCopy;
+
+    // Try to copy on a device that the data are already on.
+    vtkm::ListForEach(doCopy, VTKM_DEFAULT_DEVICE_ADAPTER_LIST{}, in, out);
+
+    // If it was not on any device, call one more time with any adapter to copy wherever.
+    doCopy(vtkm::cont::DeviceAdapterTagAny{}, in, out);
+  }
+};
+
+struct UnknownCopyFunctor1
+{
+  template <typename InArrayType>
+  void operator()(const InArrayType& in, vtkm::cont::UnknownArrayHandle& out) const
+  {
+    out.Allocate(in.GetNumberOfValues());
+
+    this->DoIt(in,
+               out,
+               typename std::is_same<vtkm::FloatDefault,
+                                     typename InArrayType::ValueType::ComponentType>::type{});
+  }
+
+  template <typename InArrayType>
+  void DoIt(const InArrayType& in, vtkm::cont::UnknownArrayHandle& out, std::false_type) const
+  {
+    // Source is not float.
+    using BaseComponentType = typename InArrayType::ValueType::ComponentType;
+    if (out.IsBaseComponentType<BaseComponentType>())
+    {
+      // Arrays have the same base component type. Copy directly.
+      UnknownCopyFunctor2{}(out.ExtractArrayFromComponents<BaseComponentType>(), in);
+    }
+    else if (out.IsBaseComponentType<vtkm::FloatDefault>())
+    {
+      // Can copy anything to default float.
+      UnknownCopyFunctor2{}(out.ExtractArrayFromComponents<vtkm::FloatDefault>(), in);
+    }
+    else
+    {
+      // Arrays have different base types. To reduce the number of template paths from nxn to 3n,
+      // copy first to a temp array of default float.
+      vtkm::cont::UnknownArrayHandle temp = out.NewInstanceFloatBasic();
+      (*this)(in, temp);
+      vtkm::cont::ArrayCopy(temp, out);
+    }
+  }
+
+  template <typename InArrayType>
+  void DoIt(const InArrayType& in, vtkm::cont::UnknownArrayHandle& out, std::true_type) const
+  {
+    // Source array is FloatDefault. That should be copiable to anything.
+    out.CastAndCallWithExtractedArray(UnknownCopyFunctor2{}, in);
+  }
+};
+
+} // anonymous namespace
+
+namespace vtkm
+{
+namespace cont
+{
+
+void ArrayCopy(const vtkm::cont::UnknownArrayHandle& source,
+               vtkm::cont::UnknownArrayHandle& destination)
+{
+  if (!destination.IsValid())
+  {
+    destination = source.NewInstanceBasic();
+  }
+
+  source.CastAndCallWithExtractedArray(UnknownCopyFunctor1{}, destination);
+}
+
+}
+} // namespace vtkm::cont
--- a/vtkm/cont/ArrayCopy.h
+++ b/vtkm/cont/ArrayCopy.h
@ -15,6 +15,9 @@
 #include <vtkm/cont/DeviceAdapterTag.h>
 #include <vtkm/cont/ErrorExecution.h>
 #include <vtkm/cont/Logging.h>
+#include <vtkm/cont/UnknownArrayHandle.h>
+
+#include <vtkm/cont/internal/ArrayHandleDeprecated.h>

 #include <vtkm/cont/vtkm_cont_export.h>

@ -149,45 +152,36 @@ VTKM_CONT void ArrayCopy(const vtkm::cont::ArrayHandle<InValueType, InStorage>&
                         "Cannot copy to a read-only array with a different "
                         "type than the source.");

-  using IsNewStyle =
-    std::is_base_of<vtkm::cont::ArrayHandleNewStyle<InValueType, InStorage>, InArrayType>;
+  using IsOldStyle =
+    std::is_base_of<vtkm::cont::internal::ArrayHandleDeprecated<InValueType, InStorage>,
+                    InArrayType>;

  // Static dispatch cases 1 & 2
-  detail::ArrayCopyImpl(source, destination, std::integral_constant<bool, IsNewStyle::value>{});
+  detail::ArrayCopyImpl(source, destination, std::integral_constant<bool, !IsOldStyle::value>{});
 }

-// Forward declaration
-// Cannot include VariantArrayHandle.h here due to circular dependency.
-template <typename TypeList>
-class VariantArrayHandleBase;

-namespace detail
-{
+VTKM_CONT_EXPORT void ArrayCopy(const vtkm::cont::UnknownArrayHandle& source,
+                                vtkm::cont::UnknownArrayHandle& destination);

-struct ArrayCopyFunctor
+template <typename T, typename S>
+VTKM_CONT void ArrayCopy(const vtkm::cont::UnknownArrayHandle& source,
+                         vtkm::cont::ArrayHandle<T, S>& destination)
 {
-  template <typename InValueType, typename InStorage, typename OutValueType, typename OutStorage>
-  VTKM_CONT void operator()(const vtkm::cont::ArrayHandle<InValueType, InStorage>& source,
-                            vtkm::cont::ArrayHandle<OutValueType, OutStorage>& destination) const
+  using DestType = vtkm::cont::ArrayHandle<T, S>;
+  if (source.IsType<DestType>())
  {
-    vtkm::cont::ArrayCopy(source, destination);
+    ArrayCopy(source.AsArrayHandle<DestType>(), destination);
+  }
+  else
+  {
+    vtkm::cont::UnknownArrayHandle destWrapper(destination);
+    ArrayCopy(source, destWrapper);
+    // Destination array should not change, but just in case.
+    destWrapper.AsArrayHandle(destination);
  }
-};
-
-} // namespace detail
-
-/// \brief Deep copies data in a `VariantArrayHandle` to an array of a known type.
-///
-/// This form of `ArrayCopy` can be used to copy data from an unknown array type to
-/// an array of a known type. Note that regardless of the source type, the data will
-/// be deep copied.
-///
-template <typename InTypeList, typename OutValueType, typename OutStorage>
-VTKM_CONT void ArrayCopy(const vtkm::cont::VariantArrayHandleBase<InTypeList>& source,
-                         vtkm::cont::ArrayHandle<OutValueType, OutStorage>& destination)
-{
-  source.CastAndCall(detail::ArrayCopyFunctor{}, destination);
 }
+
 }
 } // namespace vtkm::cont

--- a/vtkm/cont/ArrayExtractComponent.h
+++ b/vtkm/cont/ArrayExtractComponent.h
@ -0,0 +1,189 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+#ifndef vtk_m_cont_ArrayExtractComponent_h
+#define vtk_m_cont_ArrayExtractComponent_h
+
+#include <vtkm/cont/ArrayHandleBasic.h>
+#include <vtkm/cont/ArrayHandleStride.h>
+#include <vtkm/cont/ErrorBadValue.h>
+#include <vtkm/cont/Logging.h>
+
+#include <vtkm/TypeTraits.h>
+#include <vtkm/VecFlat.h>
+#include <vtkm/VecTraits.h>
+
+#include <vtkm/cont/vtkm_cont_export.h>
+
+namespace vtkm
+{
+namespace cont
+{
+
+namespace internal
+{
+
+// Note: Using partial template specialization instead of function overloading to
+// specialize ArrayExtractComponent for different types of array handles. This is
+// because function overloading from a templated function is done when the template
+// is defined rather than where it is resolved. This causes problems when extracting
+// components of, say, an ArrayHandleMultiplexer holding an ArrayHandleSOA.
+template <typename T, typename S>
+vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType>
+ArrayExtractComponentFallback(const vtkm::cont::ArrayHandle<T, S>& src,
+                              vtkm::IdComponent componentIndex,
+                              vtkm::CopyFlag allowCopy)
+{
+  if (allowCopy != vtkm::CopyFlag::On)
+  {
+    throw vtkm::cont::ErrorBadValue("Cannot extract component of " +
+                                    vtkm::cont::TypeToString<vtkm::cont::ArrayHandle<T, S>>() +
+                                    " without copying");
+  }
+  VTKM_LOG_S(vtkm::cont::LogLevel::Warn,
+             "Extracting component " << componentIndex << " of "
+                                     << vtkm::cont::TypeToString<vtkm::cont::ArrayHandle<T, S>>()
+                                     << " requires an inefficient memory copy.");
+
+  using BaseComponentType = typename vtkm::VecTraits<T>::BaseComponentType;
+  vtkm::Id numValues = src.GetNumberOfValues();
+  vtkm::cont::ArrayHandleBasic<BaseComponentType> dest;
+  dest.Allocate(numValues);
+  auto srcPortal = src.ReadPortal();
+  auto destPortal = dest.WritePortal();
+  for (vtkm::Id arrayIndex = 0; arrayIndex < numValues; ++arrayIndex)
+  {
+    destPortal.Set(arrayIndex,
+                   vtkm::internal::GetFlatVecComponent(srcPortal.Get(arrayIndex), componentIndex));
+  }
+
+  return vtkm::cont::ArrayHandleStride<BaseComponentType>(dest, numValues, 1, 0);
+}
+
+template <typename S>
+struct ArrayExtractComponentImpl
+{
+  template <typename T>
+  vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType> operator()(
+    const vtkm::cont::ArrayHandle<T, S>& src,
+    vtkm::IdComponent componentIndex,
+    vtkm::CopyFlag allowCopy) const
+  {
+    // This is the slow "default" implementation. ArrayHandle implementations should provide
+    // more efficient overloads where applicable.
+    return vtkm::cont::internal::ArrayExtractComponentFallback(src, componentIndex, allowCopy);
+  }
+};
+
+template <>
+struct ArrayExtractComponentImpl<vtkm::cont::StorageTagStride>
+{
+  template <typename T>
+  vtkm::cont::ArrayHandleStride<T> operator()(
+    const vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagStride>& src,
+    vtkm::IdComponent componentIndex,
+    vtkm::CopyFlag vtkmNotUsed(allowCopy)) const
+  {
+    VTKM_ASSERT(componentIndex == 0);
+    return src;
+  }
+
+  template <typename T, vtkm::IdComponent N>
+  auto operator()(const vtkm::cont::ArrayHandle<vtkm::Vec<T, N>, vtkm::cont::StorageTagStride>& src,
+                  vtkm::IdComponent componentIndex,
+                  vtkm::CopyFlag allowCopy) const
+    -> decltype((*this)(vtkm::cont::ArrayHandleStride<T>{}, componentIndex, allowCopy))
+  {
+    constexpr vtkm::IdComponent subStride = vtkm::internal::TotalNumComponents<T>::value;
+    vtkm::cont::ArrayHandleStride<vtkm::Vec<T, N>> array(src);
+    vtkm::cont::ArrayHandleStride<T> tmpIn(array.GetBuffers()[1],
+                                           array.GetNumberOfValues(),
+                                           array.GetStride() * N,
+                                           (array.GetOffset() * N) + (componentIndex / subStride),
+                                           array.GetModulo() * N,
+                                           array.GetDivisor());
+    return (*this)(tmpIn, componentIndex % subStride, allowCopy);
+  }
+};
+
+template <>
+struct ArrayExtractComponentImpl<vtkm::cont::StorageTagBasic>
+{
+  template <typename T>
+  auto operator()(const vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>& src,
+                  vtkm::IdComponent componentIndex,
+                  vtkm::CopyFlag allowCopy) const
+    -> decltype(
+      ArrayExtractComponentImpl<vtkm::cont::StorageTagStride>{}(vtkm::cont::ArrayHandleStride<T>{},
+                                                                componentIndex,
+                                                                allowCopy))
+  {
+    return ArrayExtractComponentImpl<vtkm::cont::StorageTagStride>{}(
+      vtkm::cont::ArrayHandleStride<T>(src, src.GetNumberOfValues(), 1, 0),
+      componentIndex,
+      allowCopy);
+  }
+};
+
+} // namespace internal
+
+/// \brief Pulls a component out of an `ArrayHandle`.
+///
+/// Given an `ArrayHandle` of any type, `ArrayExtractComponent` returns an
+/// `ArrayHandleStride` of the base component type that contains the data for the
+/// specified array component. This function can be used to apply an operation on
+/// an `ArrayHandle` one component at a time. Because the array type is always
+/// `ArrayHandleStride`, you can drastically cut down on the number of templates
+/// to instantiate (at a possible cost to performance).
+///
+/// Note that `ArrayExtractComponent` will flatten out the indices of any vec value
+/// type and return an `ArrayExtractComponent` of the base component type. For
+/// example, if you call `ArrayExtractComponent` on an `ArrayHandle` with a value
+/// type of `vtkm::Vec<vtkm::Vec<vtkm::Float32, 2>, 3>`, you will get an
+/// `ArrayExtractComponent<vtkm::Float32>` returned. The `componentIndex` provided
+/// will be applied to the nested vector in depth first order. So in the previous
+/// example, a `componentIndex` of 0 gets the values at [0][0], `componentIndex`
+/// of 1 gets [0][1], `componentIndex` of 2 gets [1][0], and so on.
+///
+/// Some `ArrayHandle`s allow this method to return an `ArrayHandleStride` that
+/// shares the same memory as the the original `ArrayHandle`. This form will be
+/// used if possible. In this case, if data are written into the `ArrayHandleStride`,
+/// they are also written into the original `ArrayHandle`. However, other forms will
+/// require copies into a new array. In this case, writes into `ArrayHandleStride`
+/// will not affect the original `ArrayHandle`.
+///
+/// For some operations, such as writing into an output array, this behavior of
+/// shared arrays is necessary. For this case, the optional argument `allowCopy`
+/// can be set to `vtkm::CopyFlag::Off` to prevent the copying behavior into the
+/// return `ArrayHandleStride`. If this is the case, an `ErrorBadValue` is thrown.
+/// If the arrays can be shared, they always will be regardless of the value of
+/// `allowCopy`.
+///
+/// Many forms of `ArrayHandle` have optimized versions to pull out a component.
+/// Some, however, do not. In these cases, a fallback array copy, done in serial,
+/// will be performed. A warning will be logged to alert users of this likely
+/// performance bottleneck.
+///
+/// As an implementation note, this function should not be overloaded directly.
+/// Instead, `ArrayHandle` implementations should provide a specialization of
+/// `vtkm::cont::internal::ArrayExtractComponentImpl`.
+///
+template <typename T, typename S>
+vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType> ArrayExtractComponent(
+  const vtkm::cont::ArrayHandle<T, S>& src,
+  vtkm::IdComponent componentIndex,
+  vtkm::CopyFlag allowCopy = vtkm::CopyFlag::On)
+{
+  return internal::ArrayExtractComponentImpl<S>{}(src, componentIndex, allowCopy);
+}
+
+}
+} // namespace vtkm::cont
+
+#endif //vtk_m_cont_ArrayExtractComponent_h
--- a/vtkm/cont/ArrayHandle.h
+++ b/vtkm/cont/ArrayHandle.h
@ -17,7 +17,6 @@
 #include <vtkm/Flags.h>
 #include <vtkm/Types.h>

-#include <vtkm/cont/ArrayPortalToIterators.h>
 #include <vtkm/cont/DeviceAdapterList.h>
 #include <vtkm/cont/ErrorBadValue.h>
 #include <vtkm/cont/ErrorInternal.h>
@ -33,9 +32,6 @@
 #include <mutex>
 #include <vector>

-#include <vtkm/cont/internal/ArrayHandleExecutionManager.h>
-#include <vtkm/cont/internal/ArrayPortalCheck.h>
-#include <vtkm/cont/internal/ArrayPortalFromIterators.h>
 #include <vtkm/cont/internal/Buffer.h>

 namespace vtkm
@ -256,630 +252,6 @@ struct GetTypeInParentheses<void(T)>
 #define VTKM_ARRAY_HANDLE_SUBCLASS_NT(classname, superclass) \
  VTK_M_ARRAY_HANDLE_SUBCLASS_IMPL(classname, (classname), superclass, )

-/// \brief Manages an array-worth of data.
-///
-/// \c ArrayHandle manages as array of data that can be manipulated by VTKm
-/// algorithms. The \c ArrayHandle may have up to two copies of the array, one
-/// for the control environment and one for the execution environment, although
-/// depending on the device and how the array is being used, the \c ArrayHandle
-/// will only have one copy when possible.
-///
-/// An ArrayHandle can be constructed one of two ways. Its default construction
-/// creates an empty, unallocated array that can later be allocated and filled
-/// either by the user or a VTKm algorithm. The \c ArrayHandle can also be
-/// constructed with iterators to a user's array. In this case the \c
-/// ArrayHandle will keep a reference to this array but will throw an exception
-/// if asked to re-allocate to a larger size.
-///
-/// \c ArrayHandle behaves like a shared smart pointer in that when it is copied
-/// each copy holds a reference to the same array.  These copies are reference
-/// counted so that when all copies of the \c ArrayHandle are destroyed, any
-/// allocated memory is released.
-///
-///
-template <typename T, typename StorageTag_ = VTKM_DEFAULT_STORAGE_TAG>
-class VTKM_ALWAYS_EXPORT ArrayHandle : public internal::ArrayHandleBase
-{
-private:
-  // Basic storage is specialized; this template should not be instantiated
-  // for it. Specialization is in ArrayHandleBasicImpl.h
-  static_assert(!std::is_same<StorageTag_, StorageTagBasic>::value,
-                "StorageTagBasic should not use this implementation.");
-
-  using ExecutionManagerType =
-    vtkm::cont::internal::ArrayHandleExecutionManagerBase<T, StorageTag_>;
-
-  using MutexType = std::mutex;
-  using LockType = std::unique_lock<MutexType>;
-
-public:
-  using StorageType = vtkm::cont::internal::Storage<T, StorageTag_>;
-  using ValueType = T;
-  using StorageTag = StorageTag_;
-  using WritePortalType = vtkm::cont::internal::ArrayPortalCheck<typename StorageType::PortalType>;
-  using ReadPortalType =
-    vtkm::cont::internal::ArrayPortalCheck<typename StorageType::PortalConstType>;
-  template <typename DeviceAdapterTag>
-  struct ExecutionTypes
-  {
-    using Portal = typename ExecutionManagerType::template ExecutionTypes<DeviceAdapterTag>::Portal;
-    using PortalConst =
-      typename ExecutionManagerType::template ExecutionTypes<DeviceAdapterTag>::PortalConst;
-  };
-
-  using PortalControl VTKM_DEPRECATED(1.6, "Use ArrayHandle::WritePortalType instead.") =
-    typename StorageType::PortalType;
-  using PortalConstControl VTKM_DEPRECATED(1.6, "Use ArrayHandle::ReadPortalType instead.") =
-    typename StorageType::PortalConstType;
-
-  /// Constructs an empty ArrayHandle. Typically used for output or
-  /// intermediate arrays that will be filled by a VTKm algorithm.
-  ///
-  VTKM_CONT ArrayHandle();
-
-  /// Copy constructor.
-  ///
-  /// Implemented so that it is defined exclusively in the control environment.
-  /// If there is a separate device for the execution environment (for example,
-  /// with CUDA), then the automatically generated copy constructor could be
-  /// created for all devices, and it would not be valid for all devices.
-  ///
-  ArrayHandle(const vtkm::cont::ArrayHandle<ValueType, StorageTag>& src);
-
-  /// Move constructor.
-  ///
-  /// Implemented so that it is defined exclusively in the control environment.
-  /// If there is a separate device for the execution environment (for example,
-  /// with CUDA), then the automatically generated move constructor could be
-  /// created for all devices, and it would not be valid for all devices.
-  ///
-  ArrayHandle(vtkm::cont::ArrayHandle<ValueType, StorageTag>&& src) noexcept;
-
-  /// Special constructor for subclass specializations that need to set the
-  /// initial state of the control array. When this constructor is used, it
-  /// is assumed that the control array is valid.
-  ///
-  ArrayHandle(const StorageType& storage);
-
-
-  /// Special constructor for subclass specializations that need to set the
-  /// initial state of the control array. When this constructor is used, it
-  /// is assumed that the control array is valid.
-  ///
-  ArrayHandle(StorageType&& storage) noexcept;
-
-  /// Destructs an empty ArrayHandle.
-  ///
-  /// Implemented so that it is defined exclusively in the control environment.
-  /// If there is a separate device for the execution environment (for example,
-  /// with CUDA), then the automatically generated destructor could be
-  /// created for all devices, and it would not be valid for all devices.
-  ///
-  ~ArrayHandle();
-
-  /// \brief Copies an ArrayHandle
-  ///
-  VTKM_CONT
-  vtkm::cont::ArrayHandle<ValueType, StorageTag>& operator=(
-    const vtkm::cont::ArrayHandle<ValueType, StorageTag>& src);
-
-  /// \brief Move and Assignment of an ArrayHandle
-  ///
-  VTKM_CONT
-  vtkm::cont::ArrayHandle<ValueType, StorageTag>& operator=(
-    vtkm::cont::ArrayHandle<ValueType, StorageTag>&& src) noexcept;
-
-  /// Like a pointer, two \c ArrayHandles are considered equal if they point
-  /// to the same location in memory.
-  ///
-  VTKM_CONT
-  bool operator==(const ArrayHandle<ValueType, StorageTag>& rhs) const
-  {
-    return (this->Internals == rhs.Internals);
-  }
-
-  VTKM_CONT
-  bool operator!=(const ArrayHandle<ValueType, StorageTag>& rhs) const
-  {
-    return (this->Internals != rhs.Internals);
-  }
-
-  template <typename VT, typename ST>
-  VTKM_CONT bool operator==(const ArrayHandle<VT, ST>&) const
-  {
-    return false; // different valuetype and/or storage
-  }
-
-  template <typename VT, typename ST>
-  VTKM_CONT bool operator!=(const ArrayHandle<VT, ST>&) const
-  {
-    return true; // different valuetype and/or storage
-  }
-
-  /// Get the storage.
-  ///
-  VTKM_CONT StorageType& GetStorage();
-
-  /// Get the storage.
-  ///
-  VTKM_CONT const StorageType& GetStorage() const;
-
-  /// Get the array portal of the control array.
-  /// Since worklet invocations are asynchronous and this routine is a synchronization point,
-  /// exceptions maybe thrown for errors from previously executed worklets.
-  ///
-  /// \deprecated Use `WritePortal` instead. Note that the portal returned from `WritePortal`
-  /// will disallow any other reads or writes to the array while it is in scope.
-  ///
-  VTKM_CONT
-  VTKM_DEPRECATED(1.6,
-                  "Use ArrayHandle::WritePortal() instead. "
-                  "Note that the returned portal will lock the array while it is in scope.")
-
-  /// \cond NOPE
-  typename StorageType::PortalType GetPortalControl();
-  /// \endcond
-
-  /// Get the array portal of the control array.
-  /// Since worklet invocations are asynchronous and this routine is a synchronization point,
-  /// exceptions maybe thrown for errors from previously executed worklets.
-  ///
-  /// \deprecated Use `ReadPortal` instead. Note that the portal returned from `ReadPortal`
-  /// will disallow any writes to the array while it is in scope.
-  ///
-  VTKM_CONT
-  VTKM_DEPRECATED(1.6,
-                  "Use ArrayHandle::ReadPortal() instead. "
-                  "Note that the returned portal will lock the array while it is in scope.")
-  /// \cond NOPE
-  typename StorageType::PortalConstType GetPortalConstControl() const;
-  /// \endcond
-
-  /// \@{
-  /// \brief Get an array portal that can be used in the control environment.
-  ///
-  /// The returned array can be used in the control environment to read values from the array. (It
-  /// is not possible to write to the returned portal. That is `Get` will work on the portal, but
-  /// `Set` will not.)
-  ///
-  /// **Note:** The returned portal cannot be used in the execution environment. This is because
-  /// the portal will not work on some devices like GPUs. To get a portal that will work in the
-  /// execution environment, use `PrepareForInput`.
-  ///
-  VTKM_CONT ReadPortalType ReadPortal() const;
-  /// \@}
-
-  /// \@{
-  /// \brief Get an array portal that can be used in the control environment.
-  ///
-  /// The returned array can be used in the control environment to reand and write values to the
-  /// array.
-  ///
-  ///
-  /// **Note:** The returned portal cannot be used in the execution environment. This is because
-  /// the portal will not work on some devices like GPUs. To get a portal that will work in the
-  /// execution environment, use `PrepareForInput`.
-  ///
-  VTKM_CONT WritePortalType WritePortal() const;
-  /// \@}
-
-  /// Returns the number of entries in the array.
-  ///
-  VTKM_CONT vtkm::Id GetNumberOfValues() const
-  {
-    LockType lock = this->GetLock();
-
-    return this->GetNumberOfValues(lock);
-  }
-
-  /// \brief Allocates an array large enough to hold the given number of values.
-  ///
-  /// The allocation may be done on an already existing array, but can wipe out
-  /// any data already in the array. This method can throw
-  /// ErrorBadAllocation if the array cannot be allocated or
-  /// ErrorBadValue if the allocation is not feasible (for example, the
-  /// array storage is read-only).
-  ///
-  VTKM_CONT
-  void Allocate(vtkm::Id numberOfValues)
-  {
-    // A Token should not be declared within the scope of a lock. when the token goes out of scope
-    // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-    // the lock.
-    vtkm::cont::Token token;
-    {
-      LockType lock = this->GetLock();
-      this->WaitToWrite(lock, token);
-      this->ReleaseResourcesExecutionInternal(lock, token);
-      this->Internals->GetControlArray(lock)->Allocate(numberOfValues);
-      // Set to false and then to true to ensure anything pointing to an array before the allocate
-      // is invalidated.
-      this->Internals->SetControlArrayValid(lock, false);
-      this->Internals->SetControlArrayValid(lock, true);
-    }
-  }
-
-  /// \brief Reduces the size of the array without changing its values.
-  ///
-  /// This method allows you to resize the array without reallocating it. The
-  /// number of entries in the array is changed to \c numberOfValues. The data
-  /// in the array (from indices 0 to \c numberOfValues - 1) are the same, but
-  /// \c numberOfValues must be equal or less than the preexisting size
-  /// (returned from GetNumberOfValues). That is, this method can only be used
-  /// to shorten the array, not lengthen.
-  void Shrink(vtkm::Id numberOfValues);
-
-  /// Releases any resources being used in the execution environment (that are
-  /// not being shared by the control environment).
-  ///
-  VTKM_CONT void ReleaseResourcesExecution()
-  {
-    // A Token should not be declared within the scope of a lock. when the token goes out of scope
-    // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-    // the lock.
-    vtkm::cont::Token token;
-    {
-      LockType lock = this->GetLock();
-      this->WaitToWrite(lock, token);
-
-      // Save any data in the execution environment by making sure it is synced
-      // with the control environment.
-      this->SyncControlArray(lock, token);
-
-      this->ReleaseResourcesExecutionInternal(lock, token);
-    }
-  }
-
-  /// Releases all resources in both the control and execution environments.
-  ///
-  VTKM_CONT void ReleaseResources()
-  {
-    // A Token should not be declared within the scope of a lock. when the token goes out of scope
-    // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-    // the lock.
-    vtkm::cont::Token token;
-    {
-      LockType lock = this->GetLock();
-
-      this->ReleaseResourcesExecutionInternal(lock, token);
-
-      if (this->Internals->IsControlArrayValid(lock))
-      {
-        this->Internals->GetControlArray(lock)->ReleaseResources();
-        this->Internals->SetControlArrayValid(lock, false);
-      }
-    }
-  }
-
-  /// Prepares this array to be used as an input to an operation in the
-  /// execution environment. If necessary, copies data to the execution
-  /// environment. Can throw an exception if this array does not yet contain
-  /// any data. Returns a portal that can be used in code running in the
-  /// execution environment.
-  ///
-  /// The `Token` object provided will be attached to this `ArrayHandle`.
-  /// The returned portal is guaranteed to be valid while the `Token` is
-  /// still attached and in scope. Other operations on this `ArrayHandle`
-  /// that would invalidate the returned portal will block until the `Token`
-  /// is released. Likewise, this method will block if another `Token` is
-  /// already attached. This can potentially lead to deadlocks.
-  ///
-  template <typename DeviceAdapterTag>
-  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::PortalConst PrepareForInput(
-    DeviceAdapterTag,
-    vtkm::cont::Token& token) const;
-
-  /// Prepares (allocates) this array to be used as an output from an operation
-  /// in the execution environment. The internal state of this class is set to
-  /// have valid data in the execution array with the assumption that the array
-  /// will be filled soon (i.e. before any other methods of this object are
-  /// called). Returns a portal that can be used in code running in the
-  /// execution environment.
-  ///
-  /// The `Token` object provided will be attached to this `ArrayHandle`.
-  /// The returned portal is guaranteed to be valid while the `Token` is
-  /// still attached and in scope. Other operations on this `ArrayHandle`
-  /// that would invalidate the returned portal will block until the `Token`
-  /// is released. Likewise, this method will block if another `Token` is
-  /// already attached. This can potentially lead to deadlocks.
-  ///
-  template <typename DeviceAdapterTag>
-  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal
-  PrepareForOutput(vtkm::Id numberOfValues, DeviceAdapterTag, vtkm::cont::Token& token);
-
-  /// Prepares this array to be used in an in-place operation (both as input
-  /// and output) in the execution environment. If necessary, copies data to
-  /// the execution environment. Can throw an exception if this array does not
-  /// yet contain any data. Returns a portal that can be used in code running
-  /// in the execution environment.
-  ///
-  /// The `Token` object provided will be attached to this `ArrayHandle`.
-  /// The returned portal is guaranteed to be valid while the `Token` is
-  /// still attached and in scope. Other operations on this `ArrayHandle`
-  /// that would invalidate the returned portal will block until the `Token`
-  /// is released. Likewise, this method will block if another `Token` is
-  /// already attached. This can potentially lead to deadlocks.
-  ///
-  template <typename DeviceAdapterTag>
-  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForInPlace(
-    DeviceAdapterTag,
-    vtkm::cont::Token& token);
-
-  template <typename DeviceAdapterTag>
-  VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForInput now requires a vtkm::cont::Token object.")
-    typename ExecutionTypes<DeviceAdapterTag>::PortalConst PrepareForInput(DeviceAdapterTag) const
-  {
-    vtkm::cont::Token token;
-    return this->PrepareForInput(DeviceAdapterTag{}, token);
-  }
-  template <typename DeviceAdapterTag>
-  VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForOutput now requires a vtkm::cont::Token object.")
-    typename ExecutionTypes<DeviceAdapterTag>::Portal
-    PrepareForOutput(vtkm::Id numberOfValues, DeviceAdapterTag)
-  {
-    vtkm::cont::Token token;
-    return this->PrepareForOutput(numberOfValues, DeviceAdapterTag{}, token);
-  }
-  template <typename DeviceAdapterTag>
-  VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForInPlace now requires a vtkm::cont::Token object.")
-    typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForInPlace(DeviceAdapterTag)
-  {
-    vtkm::cont::Token token;
-    return this->PrepareForInPlace(DeviceAdapterTag{}, token);
-  }
-
-  /// Returns the DeviceAdapterId for the current device. If there is no device
-  /// with an up-to-date copy of the data, VTKM_DEVICE_ADAPTER_UNDEFINED is
-  /// returned.
-  ///
-  /// Note that in a multithreaded environment the validity of this result can
-  /// change.
-  VTKM_CONT
-  DeviceAdapterId GetDeviceAdapterId() const
-  {
-    LockType lock = this->GetLock();
-    return this->Internals->IsExecutionArrayValid(lock)
-      ? this->Internals->GetExecutionArray(lock)->GetDeviceAdapterId()
-      : DeviceAdapterTagUndefined{};
-  }
-
-  /// Synchronizes the control array with the execution array. If either the
-  /// user array or control array is already valid, this method does nothing
-  /// (because the data is already available in the control environment).
-  /// Although the internal state of this class can change, the method is
-  /// declared const because logically the data does not.
-  ///
-  VTKM_CONT void SyncControlArray() const
-  {
-    // A Token should not be declared within the scope of a lock. when the token goes out of scope
-    // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-    // the lock.
-    vtkm::cont::Token token;
-    {
-      LockType lock = this->GetLock();
-      this->SyncControlArray(lock, token);
-    }
-  }
-
-  /// \brief Enqueue a token for access to this ArrayHandle.
-  ///
-  /// This method places the given `Token` into the queue of `Token`s waiting for
-  /// access to this `ArrayHandle` and then returns immediately. When this token
-  /// is later used to get data from this `ArrayHandle` (for example, in a call to
-  /// `PrepareForInput`), it will use this place in the queue while waiting for
-  /// access.
-  ///
-  /// This method is to be used to ensure that a set of accesses to an `ArrayHandle`
-  /// that happen on multiple threads occur in a specified order. For example, if
-  /// you spawn of a job to modify data in an `ArrayHandle` and then spawn off a job
-  /// that reads that same data, you need to make sure that the first job gets
-  /// access to the `ArrayHandle` before the second. If they both just attempt to call
-  /// their respective `Prepare` methods, there is no guarantee which order they
-  /// will occur. Having the spawning thread first call this method will ensure the order.
-  ///
-  /// \warning After calling this method it is required to subsequently
-  /// call a method like one of the `Prepare` methods that attaches the token
-  /// to this `ArrayHandle`. Otherwise, the enqueued token will block any subsequent
-  /// access to the `ArrayHandle`, even if the `Token` is destroyed.
-  ///
-  VTKM_CONT void Enqueue(const vtkm::cont::Token& token) const;
-
-private:
-  /// Acquires a lock on the internals of this `ArrayHandle`. The calling
-  /// function should keep the returned lock and let it go out of scope
-  /// when the lock is no longer needed.
-  ///
-  LockType GetLock() const { return LockType(this->Internals->Mutex); }
-
-  /// Returns true if read operations can currently be performed.
-  ///
-  VTKM_CONT bool CanRead(const LockType& lock, const vtkm::cont::Token& token) const;
-
-  //// Returns true if write operations can currently be performed.
-  ///
-  VTKM_CONT bool CanWrite(const LockType& lock, const vtkm::cont::Token& token) const;
-
-  //// Will block the current thread until a read can be performed.
-  ///
-  VTKM_CONT void WaitToRead(LockType& lock, vtkm::cont::Token& token) const;
-
-  //// Will block the current thread until a write can be performed.
-  ///
-  VTKM_CONT void WaitToWrite(LockType& lock, vtkm::cont::Token& token, bool fakeRead = false) const;
-
-  /// Gets this array handle ready to interact with the given device. If the
-  /// array handle has already interacted with this device, then this method
-  /// does nothing. Although the internal state of this class can change, the
-  /// method is declared const because logically the data does not.
-  ///
-  template <typename DeviceAdapterTag>
-  VTKM_CONT void PrepareForDevice(LockType& lock, vtkm::cont::Token& token, DeviceAdapterTag) const;
-
-  /// Synchronizes the control array with the execution array. If either the
-  /// user array or control array is already valid, this method does nothing
-  /// (because the data is already available in the control environment).
-  /// Although the internal state of this class can change, the method is
-  /// declared const because logically the data does not.
-  ///
-  VTKM_CONT void SyncControlArray(LockType& lock, vtkm::cont::Token& token) const;
-
-  vtkm::Id GetNumberOfValues(LockType& lock) const;
-
-  VTKM_CONT
-  void ReleaseResourcesExecutionInternal(LockType& lock, vtkm::cont::Token& token) const
-  {
-    if (this->Internals->IsExecutionArrayValid(lock))
-    {
-      this->WaitToWrite(lock, token);
-      // Note that it is possible that while waiting someone else deleted the execution array.
-      // That is why we check again.
-    }
-    if (this->Internals->IsExecutionArrayValid(lock))
-    {
-      this->Internals->GetExecutionArray(lock)->ReleaseResources();
-      this->Internals->SetExecutionArrayValid(lock, false);
-    }
-  }
-
-  VTKM_CONT void Enqueue(const LockType& lock, const vtkm::cont::Token& token) const;
-
-  class VTKM_ALWAYS_EXPORT InternalStruct
-  {
-    mutable StorageType ControlArray;
-    mutable std::shared_ptr<bool> ControlArrayValid;
-
-    mutable std::unique_ptr<ExecutionManagerType> ExecutionArray;
-    mutable bool ExecutionArrayValid = false;
-
-    mutable vtkm::cont::Token::ReferenceCount ReadCount = 0;
-    mutable vtkm::cont::Token::ReferenceCount WriteCount = 0;
-
-    mutable std::deque<vtkm::cont::Token::Reference> Queue;
-
-    VTKM_CONT void CheckLock(const LockType& lock) const
-    {
-      VTKM_ASSERT((lock.mutex() == &this->Mutex) && (lock.owns_lock()));
-    }
-
-  public:
-    MutexType Mutex;
-    std::condition_variable ConditionVariable;
-
-    InternalStruct() = default;
-    InternalStruct(const StorageType& storage);
-    InternalStruct(StorageType&& storage);
-
-    ~InternalStruct()
-    {
-      // It should not be possible to destroy this array if any tokens are still attached to it.
-      LockType lock(this->Mutex);
-      VTKM_ASSERT((*this->GetReadCount(lock) == 0) && (*this->GetWriteCount(lock) == 0));
-      this->SetControlArrayValid(lock, false);
-    }
-
-    // To access any feature in InternalStruct, you must have locked the mutex. You have
-    // to prove it by passing in a reference to a std::unique_lock.
-    VTKM_CONT bool IsControlArrayValid(const LockType& lock) const
-    {
-      this->CheckLock(lock);
-      if (!this->ControlArrayValid)
-      {
-        return false;
-      }
-      else
-      {
-        return *this->ControlArrayValid;
-      }
-    }
-    VTKM_CONT void SetControlArrayValid(const LockType& lock, bool value)
-    {
-      this->CheckLock(lock);
-      if (IsControlArrayValid(lock) == value)
-      {
-        return;
-      }
-      if (value) // ControlArrayValid == false or nullptr
-      {
-        // If we are changing the valid flag from false to true, then refresh the pointer.
-        // There may be array portals that already have a reference to the flag. Those portals
-        // will stay in an invalid state whereas new portals will go to a valid state. To
-        // handle both conditions, drop the old reference and create a new one.
-        this->ControlArrayValid.reset(new bool(true));
-      }
-      else // value == false and ControlArrayValid == true
-      {
-        *this->ControlArrayValid = false;
-      }
-    }
-    VTKM_CONT std::shared_ptr<bool> GetControlArrayValidPointer(const LockType& lock) const
-    {
-      this->CheckLock(lock);
-      return this->ControlArrayValid;
-    }
-    VTKM_CONT StorageType* GetControlArray(const LockType& lock) const
-    {
-      this->CheckLock(lock);
-      return &this->ControlArray;
-    }
-
-    VTKM_CONT bool IsExecutionArrayValid(const LockType& lock) const
-    {
-      this->CheckLock(lock);
-      return this->ExecutionArrayValid;
-    }
-    VTKM_CONT void SetExecutionArrayValid(const LockType& lock, bool value)
-    {
-      this->CheckLock(lock);
-      this->ExecutionArrayValid = value;
-    }
-    VTKM_CONT ExecutionManagerType* GetExecutionArray(const LockType& lock) const
-    {
-      this->CheckLock(lock);
-      return this->ExecutionArray.get();
-    }
-    VTKM_CONT void DeleteExecutionArray(const LockType& lock)
-    {
-      this->CheckLock(lock);
-      this->ExecutionArray.reset();
-      this->ExecutionArrayValid = false;
-    }
-    template <typename DeviceAdapterTag>
-    VTKM_CONT void NewExecutionArray(const LockType& lock, DeviceAdapterTag)
-    {
-      VTKM_IS_DEVICE_ADAPTER_TAG(DeviceAdapterTag);
-      this->CheckLock(lock);
-      VTKM_ASSERT(this->ExecutionArray == nullptr);
-      VTKM_ASSERT(!this->ExecutionArrayValid);
-      this->ExecutionArray.reset(
-        new vtkm::cont::internal::ArrayHandleExecutionManager<T, StorageTag, DeviceAdapterTag>(
-          &this->ControlArray));
-    }
-    VTKM_CONT vtkm::cont::Token::ReferenceCount* GetReadCount(const LockType& lock) const
-    {
-      this->CheckLock(lock);
-      return &this->ReadCount;
-    }
-    VTKM_CONT vtkm::cont::Token::ReferenceCount* GetWriteCount(const LockType& lock) const
-    {
-      this->CheckLock(lock);
-      return &this->WriteCount;
-    }
-    VTKM_CONT std::deque<vtkm::cont::Token::Reference>& GetQueue(const LockType& lock) const
-    {
-      this->CheckLock(lock);
-      return this->Queue;
-    }
-  };
-
-  VTKM_CONT
-  ArrayHandle(const std::shared_ptr<InternalStruct>& i)
-    : Internals(i)
-  {
-  }
-
-  std::shared_ptr<InternalStruct> Internals;
-};
-
 namespace detail
 {

@ -895,91 +267,31 @@ VTKM_CONT_EXPORT VTKM_CONT vtkm::cont::DeviceAdapterId ArrayHandleGetDeviceAdapt

 } // namespace detail

-// This macro is used to declare an ArrayHandle that uses the new style of Storage
-// that leverages Buffer objects. This macro will go away once ArrayHandle
-// is replaced with ArrayHandleNewStyle. To use this macro, first have a declaration
-// of the template and then put the macro like this:
-//
-// template <typename T>
-// VTKM_ARRAY_HANDLE_NEW_STYLE(T, vtkm::cont::StorageTagFoo);
-//
-// Don't forget to use VTKM_PASS_COMMAS if one of the macro arguments contains
-// a template with multiple parameters.
-#define VTKM_ARRAY_HANDLE_NEW_STYLE(ValueType_, StorageType_)                       \
-  class VTKM_ALWAYS_EXPORT ArrayHandle<ValueType_, StorageType_>                    \
-    : public ArrayHandleNewStyle<ValueType_, StorageType_>                          \
-  {                                                                                 \
-    using Superclass = ArrayHandleNewStyle<ValueType_, StorageType_>;               \
-                                                                                    \
-  public:                                                                           \
-    VTKM_CONT                                                                       \
-    ArrayHandle()                                                                   \
-      : Superclass()                                                                \
-    {                                                                               \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT                                                                       \
-    ArrayHandle(const ArrayHandle<ValueType_, StorageType_>& src)                   \
-      : Superclass(src)                                                             \
-    {                                                                               \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT                                                                       \
-    ArrayHandle(ArrayHandle<ValueType_, StorageType_>&& src) noexcept               \
-      : Superclass(std::move(src))                                                  \
-    {                                                                               \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT                                                                       \
-    ArrayHandle(const ArrayHandleNewStyle<ValueType_, StorageType_>& src)           \
-      : Superclass(src)                                                             \
-    {                                                                               \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT                                                                       \
-    ArrayHandle(ArrayHandleNewStyle<ValueType_, StorageType_>&& src) noexcept       \
-      : Superclass(std::move(src))                                                  \
-    {                                                                               \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT ArrayHandle(const vtkm::cont::internal::Buffer* buffers)              \
-      : Superclass(buffers)                                                         \
-    {                                                                               \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT ArrayHandle(const std::vector<vtkm::cont::internal::Buffer>& buffers) \
-      : Superclass(buffers)                                                         \
-    {                                                                               \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT ArrayHandle(std::vector<vtkm::cont::internal::Buffer>&& buffers)      \
-      : Superclass(std::move(buffers))                                              \
-    {                                                                               \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT                                                                       \
-    ArrayHandle<ValueType_, StorageType_>& operator=(                               \
-      const ArrayHandle<ValueType_, StorageType_>& src)                             \
-    {                                                                               \
-      this->Superclass::operator=(src);                                             \
-      return *this;                                                                 \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT                                                                       \
-    ArrayHandle<ValueType_, StorageType_>& operator=(                               \
-      ArrayHandle<ValueType_, StorageType_>&& src) noexcept                         \
-    {                                                                               \
-      this->Superclass::operator=(std::move(src));                                  \
-      return *this;                                                                 \
-    }                                                                               \
-                                                                                    \
-    VTKM_CONT ~ArrayHandle() {}                                                     \
-  }
-
-/// This new style of ArrayHandle will eventually replace the classic ArrayHandle
+/// \brief Manages an array-worth of data.
+///
+/// `ArrayHandle` manages as array of data that can be manipulated by VTKm
+/// algorithms. The `ArrayHandle` may have up to two copies of the array, one
+/// for the control environment and one for the execution environment, although
+/// depending on the device and how the array is being used, the `ArrayHandle`
+/// will only have one copy when possible.
+///
+/// An `ArrayHandle` is often constructed by instantiating one of the `ArrayHandle`
+/// subclasses. Several basic `ArrayHandle` types can also be constructed directly
+/// and then allocated. The `ArrayHandleBasic` subclass provides mechanisms for
+/// importing user arrays into an `ArrayHandle`.
+///
+/// `ArrayHandle` behaves like a shared smart pointer in that when it is copied
+/// each copy holds a reference to the same array.  These copies are reference
+/// counted so that when all copies of the `ArrayHandle` are destroyed, any
+/// allocated memory is released.
+///
 template <typename T, typename StorageTag_ = VTKM_DEFAULT_STORAGE_TAG>
-class VTKM_ALWAYS_EXPORT ArrayHandleNewStyle : public internal::ArrayHandleBase
+class VTKM_ALWAYS_EXPORT ArrayHandle : public internal::ArrayHandleBase
 {
+  VTKM_STATIC_ASSERT_MSG(
+    (internal::IsValidArrayHandle<T, StorageTag_>::value),
+    "Attempted to create an ArrayHandle with an invalid type/storage combination.");
+
 public:
  using ValueType = T;
  using StorageTag = StorageTag_;
@ -990,7 +302,7 @@ public:

  // TODO: Deprecate this
  template <typename Device>
-  struct ExecutionTypes
+  struct VTKM_DEPRECATED(1.6, "Use ReadPortalType and WritePortalType.") ExecutionTypes
  {
    using Portal = WritePortalType;
    using PortalConst = ReadPortalType;
@ -1003,7 +315,7 @@ public:

  /// Constructs an empty ArrayHandle.
  ///
-  VTKM_CONT ArrayHandleNewStyle()
+  VTKM_CONT ArrayHandle()
    : Buffers(static_cast<std::size_t>(StorageType::GetNumberOfBuffers()))
  {
  }
@ -1015,7 +327,7 @@ public:
  /// with CUDA), then the automatically generated copy constructor could be
  /// created for all devices, and it would not be valid for all devices.
  ///
-  VTKM_CONT ArrayHandleNewStyle(const vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& src)
+  VTKM_CONT ArrayHandle(const vtkm::cont::ArrayHandle<ValueType, StorageTag>& src)
    : Buffers(src.Buffers)
  {
  }
@ -1027,8 +339,7 @@ public:
  /// with CUDA), then the automatically generated move constructor could be
  /// created for all devices, and it would not be valid for all devices.
  ///
-  VTKM_CONT ArrayHandleNewStyle(
-    vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>&& src) noexcept
+  VTKM_CONT ArrayHandle(vtkm::cont::ArrayHandle<ValueType, StorageTag>&& src) noexcept
    : Buffers(std::move(src.Buffers))
  {
  }
@ -1037,19 +348,19 @@ public:
  /// Special constructor for subclass specializations that need to set the
  /// initial state array. Used when pulling data from other sources.
  ///
-  VTKM_CONT ArrayHandleNewStyle(const std::vector<vtkm::cont::internal::Buffer>& buffers)
+  VTKM_CONT ArrayHandle(const std::vector<vtkm::cont::internal::Buffer>& buffers)
    : Buffers(buffers)
  {
    VTKM_ASSERT(static_cast<vtkm::IdComponent>(this->Buffers.size()) == this->GetNumberOfBuffers());
  }

-  VTKM_CONT ArrayHandleNewStyle(std::vector<vtkm::cont::internal::Buffer>&& buffers) noexcept
+  VTKM_CONT ArrayHandle(std::vector<vtkm::cont::internal::Buffer>&& buffers) noexcept
    : Buffers(std::move(buffers))
  {
    VTKM_ASSERT(static_cast<vtkm::IdComponent>(this->Buffers.size()) == this->GetNumberOfBuffers());
  }

-  VTKM_CONT ArrayHandleNewStyle(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT ArrayHandle(const vtkm::cont::internal::Buffer* buffers)
    : Buffers(buffers, buffers + StorageType::GetNumberOfBuffers())
  {
  }
@ -1062,13 +373,13 @@ public:
  /// with CUDA), then the automatically generated destructor could be
  /// created for all devices, and it would not be valid for all devices.
  ///
-  VTKM_CONT ~ArrayHandleNewStyle() {}
+  VTKM_CONT ~ArrayHandle() {}

  /// \brief Copies an ArrayHandle
  ///
  VTKM_CONT
-  vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& operator=(
-    const vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& src)
+  vtkm::cont::ArrayHandle<ValueType, StorageTag>& operator=(
+    const vtkm::cont::ArrayHandle<ValueType, StorageTag>& src)
  {
    this->Buffers = src.Buffers;
    return *this;
@ -1077,8 +388,8 @@ public:
  /// \brief Move and Assignment of an ArrayHandle
  ///
  VTKM_CONT
-  vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& operator=(
-    vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>&& src) noexcept
+  vtkm::cont::ArrayHandle<ValueType, StorageTag>& operator=(
+    vtkm::cont::ArrayHandle<ValueType, StorageTag>&& src) noexcept
  {
    this->Buffers = std::move(src.Buffers);
    return *this;
@ -1111,7 +422,7 @@ public:
    return true; // different valuetype and/or storage
  }

-  VTKM_CONT vtkm::IdComponent GetNumberOfBuffers() const
+  VTKM_CONT static constexpr vtkm::IdComponent GetNumberOfBuffers()
  {
    return StorageType::GetNumberOfBuffers();
  }
@ -1216,7 +527,7 @@ public:
  }
  ///@}

-  /// Deprecate this.
+  VTKM_DEPRECATED(1.6, "Use Allocate(n, vtkm::CopyFlag::On) instead of Shrink(n).")
  VTKM_CONT void Shrink(vtkm::Id numberOfValues)
  {
    this->Allocate(numberOfValues, vtkm::CopyFlag::On);
@ -1389,8 +700,7 @@ public:
  ///
  /// Takes the data that is in \a source and copies that data into this array.
  ///
-  VTKM_CONT void DeepCopyFrom(
-    const vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& source) const
+  VTKM_CONT void DeepCopyFrom(const vtkm::cont::ArrayHandle<ValueType, StorageTag>& source) const
  {
    VTKM_ASSERT(this->Buffers.size() == source.Buffers.size());

@ -1532,6 +842,112 @@ VTKM_NEVER_EXPORT VTKM_CONT inline void printSummary_ArrayHandle(
  }
  out << "]\n";
 }
+
+namespace internal
+{
+
+namespace detail
+{
+
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>&)
+{
+  // Nothing left to add.
+}
+
+template <typename T, typename S, typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const vtkm::cont::ArrayHandle<T, S>& array,
+                                        const Args&... args)
+{
+  vtkm::cont::internal::Buffer* arrayBuffers = array.GetBuffers();
+  buffers.insert(buffers.end(), arrayBuffers, arrayBuffers + array.GetNumberOfBuffers());
+  CreateBuffersImpl(buffers, args...);
+}
+
+template <typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const vtkm::cont::internal::Buffer& buffer,
+                                        const Args&... args)
+{
+  buffers.push_back(buffer);
+  CreateBuffersImpl(buffers, args...);
+}
+
+template <typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const std::vector<vtkm::cont::internal::Buffer>& addbuffs,
+                                        const Args&... args)
+{
+  buffers.insert(buffers.end(), addbuffs.begin(), addbuffs.end());
+  CreateBuffersImpl(buffers, args...);
+}
+
+template <typename Arg0, typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const Arg0& arg0,
+                                        const Args&... args);
+
+template <typename T, typename S, typename... Args>
+VTKM_CONT inline void CreateBuffersResolveArrays(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                                 std::true_type,
+                                                 const vtkm::cont::ArrayHandle<T, S>& array,
+                                                 const Args&... args)
+{
+  CreateBuffersImpl(buffers, array, args...);
+}
+
+template <typename MetaData, typename... Args>
+VTKM_CONT inline void CreateBuffersResolveArrays(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                                 std::false_type,
+                                                 const MetaData& metadata,
+                                                 const Args&... args)
+{
+  vtkm::cont::internal::Buffer buffer;
+  buffer.SetMetaData(metadata);
+  buffers.push_back(std::move(buffer));
+  CreateBuffersImpl(buffers, args...);
+}
+
+template <typename Arg0, typename... Args>
+VTKM_CONT inline void CreateBuffersImpl(std::vector<vtkm::cont::internal::Buffer>& buffers,
+                                        const Arg0& arg0,
+                                        const Args&... args)
+{
+  // If the argument is a subclass of ArrayHandle, the template resolution will pick this
+  // overload instead of the correct ArrayHandle overload. To resolve that, check to see
+  // if the type is an `ArrayHandle` and use `CreateBuffersResolveArrays` to choose the
+  // right path.
+  using IsArray = typename vtkm::cont::internal::ArrayHandleCheck<Arg0>::type::type;
+  CreateBuffersResolveArrays(buffers, IsArray{}, arg0, args...);
+}
+
+} // namespace detail
+
+/// \brief Create the buffers for an `ArrayHandle` specialization.
+///
+/// When creating an `ArrayHandle` specialization, it is important to build a
+/// `std::vector` of `Buffer` objects. This function simplifies creating
+/// these buffer objects. Simply pass as arguments the things you want in the
+/// buffers. The parameters to `CreateBuffers` are added to the `Buffer` `vector`
+/// in the order provided. The actual object(s) added depends on the type of
+/// parameter:
+///
+///   - `ArrayHandle`: The buffers from the `ArrayHandle` are added to the list.
+///   - `Buffer`: A copy of the buffer is added to the list.
+///   - `std::vector<Buffer>`: A copy of all buffers in this vector are added to the list.
+///   - Anything else: A buffer with the given object attached as metadata is
+///
+template <typename... Args>
+VTKM_CONT inline std::vector<vtkm::cont::internal::Buffer> CreateBuffers(const Args&... args)
+{
+  std::vector<vtkm::cont::internal::Buffer> buffers;
+  buffers.reserve(sizeof...(args));
+  detail::CreateBuffersImpl(buffers, args...);
+  return buffers;
+}
+
+} // namespace internal
+
 }
 } //namespace vtkm::cont

@ -1539,8 +955,4 @@ VTKM_NEVER_EXPORT VTKM_CONT inline void printSummary_ArrayHandle(
 #include <vtkm/cont/ArrayHandleBasic.h>
 #endif

-#ifndef vtk_m_cont_ArrayHandle_hxx
-#include <vtkm/cont/ArrayHandle.hxx>
-#endif
-
 #endif //vtk_m_cont_ArrayHandle_h
--- a/vtkm/cont/ArrayHandle.hxx
+++ b/vtkm/cont/ArrayHandle.hxx
@ -1,603 +0,0 @@
-//============================================================================
-//  Copyright (c) Kitware, Inc.
-//  All rights reserved.
-//  See LICENSE.txt for details.
-//
-//  This software is distributed WITHOUT ANY WARRANTY; without even
-//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-//  PURPOSE.  See the above copyright notice for more information.
-//============================================================================
-#ifndef vtk_m_cont_ArrayHandle_hxx
-#define vtk_m_cont_ArrayHandle_hxx
-
-#include <vtkm/cont/ArrayHandle.h>
-
-namespace vtkm
-{
-namespace cont
-{
-
-template <typename T, typename S>
-ArrayHandle<T, S>::InternalStruct::InternalStruct(
-  const typename ArrayHandle<T, S>::StorageType& storage)
-  : ControlArray(storage)
-  , ControlArrayValid(new bool(true))
-  , ExecutionArrayValid(false)
-{
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>::InternalStruct::InternalStruct(typename ArrayHandle<T, S>::StorageType&& storage)
-  : ControlArray(std::move(storage))
-  , ControlArrayValid(new bool(true))
-  , ExecutionArrayValid(false)
-{
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>::ArrayHandle()
-  : Internals(std::make_shared<InternalStruct>())
-{
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>::ArrayHandle(const ArrayHandle<T, S>& src)
-  : Internals(src.Internals)
-{
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>::ArrayHandle(ArrayHandle<T, S>&& src) noexcept
-  : Internals(std::move(src.Internals))
-{
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>::ArrayHandle(const typename ArrayHandle<T, S>::StorageType& storage)
-  : Internals(std::make_shared<InternalStruct>(storage))
-{
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>::ArrayHandle(typename ArrayHandle<T, S>::StorageType&& storage) noexcept
-  : Internals(std::make_shared<InternalStruct>(std::move(storage)))
-{
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>::~ArrayHandle()
-{
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>& ArrayHandle<T, S>::operator=(const ArrayHandle<T, S>& src)
-{
-  this->Internals = src.Internals;
-  return *this;
-}
-
-template <typename T, typename S>
-ArrayHandle<T, S>& ArrayHandle<T, S>::operator=(ArrayHandle<T, S>&& src) noexcept
-{
-  this->Internals = std::move(src.Internals);
-  return *this;
-}
-
-template <typename T, typename S>
-typename ArrayHandle<T, S>::StorageType& ArrayHandle<T, S>::GetStorage()
-{
-  // A Token should not be declared within the scope of a lock. when the token goes out of scope
-  // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-  // the lock.
-  vtkm::cont::Token token;
-  {
-    LockType lock = this->GetLock();
-
-    this->SyncControlArray(lock, token);
-    if (this->Internals->IsControlArrayValid(lock))
-    {
-      return *this->Internals->GetControlArray(lock);
-    }
-    else
-    {
-      throw vtkm::cont::ErrorInternal(
-        "ArrayHandle::SyncControlArray did not make control array valid.");
-    }
-  }
-}
-
-template <typename T, typename S>
-const typename ArrayHandle<T, S>::StorageType& ArrayHandle<T, S>::GetStorage() const
-{
-  // A Token should not be declared within the scope of a lock. when the token goes out of scope
-  // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-  // the lock.
-  vtkm::cont::Token token;
-  {
-    LockType lock = this->GetLock();
-
-    this->SyncControlArray(lock, token);
-    if (this->Internals->IsControlArrayValid(lock))
-    {
-      return *this->Internals->GetControlArray(lock);
-    }
-    else
-    {
-      throw vtkm::cont::ErrorInternal(
-        "ArrayHandle::SyncControlArray did not make control array valid.");
-    }
-  }
-}
-
-template <typename T, typename S>
-typename ArrayHandle<T, S>::StorageType::PortalType ArrayHandle<T, S>::GetPortalControl()
-{
-  // A Token should not be declared within the scope of a lock. when the token goes out of scope
-  // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-  // the lock.
-  vtkm::cont::Token token;
-  {
-    LockType lock = this->GetLock();
-
-    this->SyncControlArray(lock, token);
-    if (this->Internals->IsControlArrayValid(lock))
-    {
-      // If the user writes into the iterator we return, then the execution
-      // array will become invalid. Play it safe and release the execution
-      // resources. (Use the const version to preserve the execution array.)
-      this->ReleaseResourcesExecutionInternal(lock, token);
-      return this->Internals->GetControlArray(lock)->GetPortal();
-    }
-    else
-    {
-      throw vtkm::cont::ErrorInternal(
-        "ArrayHandle::SyncControlArray did not make control array valid.");
-    }
-  }
-}
-
-template <typename T, typename S>
-typename ArrayHandle<T, S>::StorageType::PortalConstType ArrayHandle<T, S>::GetPortalConstControl()
-  const
-{
-  // A Token should not be declared within the scope of a lock. when the token goes out of scope
-  // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-  // the lock.
-  vtkm::cont::Token token;
-  {
-    LockType lock = this->GetLock();
-
-    this->SyncControlArray(lock, token);
-    if (this->Internals->IsControlArrayValid(lock))
-    {
-      return this->Internals->GetControlArray(lock)->GetPortalConst();
-    }
-    else
-    {
-      throw vtkm::cont::ErrorInternal(
-        "ArrayHandle::SyncControlArray did not make control array valid.");
-    }
-  }
-}
-
-template <typename T, typename S>
-typename ArrayHandle<T, S>::ReadPortalType ArrayHandle<T, S>::ReadPortal() const
-{
-  // A Token should not be declared within the scope of a lock. when the token goes out of scope
-  // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-  // the lock.
-  vtkm::cont::Token token;
-  {
-    LockType lock = this->GetLock();
-    this->WaitToRead(lock, token);
-
-    this->SyncControlArray(lock, token);
-    if (this->Internals->IsControlArrayValid(lock))
-    {
-      return ReadPortalType(this->Internals->GetControlArrayValidPointer(lock),
-                            this->Internals->GetControlArray(lock)->GetPortalConst());
-    }
-    else
-    {
-      throw vtkm::cont::ErrorInternal(
-        "ArrayHandle::SyncControlArray did not make control array valid.");
-    }
-  }
-}
-
-template <typename T, typename S>
-typename ArrayHandle<T, S>::WritePortalType ArrayHandle<T, S>::WritePortal() const
-{
-  // A Token should not be declared within the scope of a lock. when the token goes out of scope
-  // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-  // the lock.
-  vtkm::cont::Token token;
-  {
-    LockType lock = this->GetLock();
-    this->WaitToWrite(lock, token);
-
-    this->SyncControlArray(lock, token);
-    if (this->Internals->IsControlArrayValid(lock))
-    {
-      // If the user writes into the iterator we return, then the execution
-      // array will become invalid. Play it safe and release the execution
-      // resources. (Use the const version to preserve the execution array.)
-      this->ReleaseResourcesExecutionInternal(lock, token);
-      return WritePortalType(this->Internals->GetControlArrayValidPointer(lock),
-                             this->Internals->GetControlArray(lock)->GetPortal());
-    }
-    else
-    {
-      throw vtkm::cont::ErrorInternal(
-        "ArrayHandle::SyncControlArray did not make control array valid.");
-    }
-  }
-}
-
-template <typename T, typename S>
-vtkm::Id ArrayHandle<T, S>::GetNumberOfValues(LockType& lock) const
-{
-  if (this->Internals->IsControlArrayValid(lock))
-  {
-    return this->Internals->GetControlArray(lock)->GetNumberOfValues();
-  }
-  else if (this->Internals->IsExecutionArrayValid(lock))
-  {
-    return this->Internals->GetExecutionArray(lock)->GetNumberOfValues();
-  }
-  else
-  {
-    return 0;
-  }
-}
-
-template <typename T, typename S>
-void ArrayHandle<T, S>::Shrink(vtkm::Id numberOfValues)
-{
-  VTKM_ASSERT(numberOfValues >= 0);
-
-  // A Token should not be declared within the scope of a lock. when the token goes out of scope
-  // it will attempt to aquire the lock, which is undefined behavior of the thread already has
-  // the lock.
-  vtkm::cont::Token token;
-
-  if (numberOfValues > 0)
-  {
-    LockType lock = this->GetLock();
-
-    vtkm::Id originalNumberOfValues = this->GetNumberOfValues(lock);
-
-    if (numberOfValues < originalNumberOfValues)
-    {
-      this->WaitToWrite(lock, token);
-      if (this->Internals->IsControlArrayValid(lock))
-      {
-        this->Internals->GetControlArray(lock)->Shrink(numberOfValues);
-      }
-      if (this->Internals->IsExecutionArrayValid(lock))
-      {
-        this->Internals->GetExecutionArray(lock)->Shrink(numberOfValues);
-      }
-    }
-    else if (numberOfValues == originalNumberOfValues)
-    {
-      // Nothing to do.
-    }
-    else // numberOfValues > originalNumberOfValues
-    {
-      throw vtkm::cont::ErrorBadValue("ArrayHandle::Shrink cannot be used to grow array.");
-    }
-
-    VTKM_ASSERT(this->GetNumberOfValues(lock) == numberOfValues);
-  }
-  else // numberOfValues == 0
-  {
-    // If we are shrinking to 0, there is nothing to save and we might as well
-    // free up memory. Plus, some storage classes expect that data will be
-    // deallocated when the size goes to zero.
-    this->Allocate(0);
-  }
-}
-
-template <typename T, typename S>
-template <typename DeviceAdapterTag>
-typename ArrayHandle<T, S>::template ExecutionTypes<DeviceAdapterTag>::PortalConst
-ArrayHandle<T, S>::PrepareForInput(DeviceAdapterTag device, vtkm::cont::Token& token) const
-{
-  VTKM_IS_DEVICE_ADAPTER_TAG(DeviceAdapterTag);
-
-  LockType lock = this->GetLock();
-  this->WaitToRead(lock, token);
-
-  if (!this->Internals->IsControlArrayValid(lock) && !this->Internals->IsExecutionArrayValid(lock))
-  {
-    // Want to use an empty array.
-    // Set up ArrayHandle state so this actually works.
-    this->Internals->GetControlArray(lock)->Allocate(0);
-    this->Internals->SetControlArrayValid(lock, true);
-  }
-
-  this->PrepareForDevice(lock, token, device);
-  auto portal = this->Internals->GetExecutionArray(lock)->PrepareForInput(
-    !this->Internals->IsExecutionArrayValid(lock), device, token);
-
-  this->Internals->SetExecutionArrayValid(lock, true);
-
-  return portal;
-}
-
-template <typename T, typename S>
-template <typename DeviceAdapterTag>
-typename ArrayHandle<T, S>::template ExecutionTypes<DeviceAdapterTag>::Portal
-ArrayHandle<T, S>::PrepareForOutput(vtkm::Id numberOfValues,
-                                    DeviceAdapterTag device,
-                                    vtkm::cont::Token& token)
-{
-  VTKM_IS_DEVICE_ADAPTER_TAG(DeviceAdapterTag);
-
-  LockType lock = this->GetLock();
-  this->WaitToWrite(lock, token);
-
-  // Invalidate any control arrays.
-  // Should the control array resource be released? Probably not a good
-  // idea when shared with execution.
-  this->Internals->SetControlArrayValid(lock, false);
-
-  this->PrepareForDevice(lock, token, device);
-  auto portal =
-    this->Internals->GetExecutionArray(lock)->PrepareForOutput(numberOfValues, device, token);
-
-  // We are assuming that the calling code will fill the array using the
-  // iterators we are returning, so go ahead and mark the execution array as
-  // having valid data. (A previous version of this class had a separate call
-  // to mark the array as filled, but that was onerous to call at the the
-  // right time and rather pointless since it is basically always the case
-  // that the array is going to be filled before anything else. In this
-  // implementation the only access to the array is through the iterators
-  // returned from this method, so you would have to work to invalidate this
-  // assumption anyway.)
-  this->Internals->SetExecutionArrayValid(lock, true);
-
-  return portal;
-}
-
-template <typename T, typename S>
-template <typename DeviceAdapterTag>
-typename ArrayHandle<T, S>::template ExecutionTypes<DeviceAdapterTag>::Portal
-ArrayHandle<T, S>::PrepareForInPlace(DeviceAdapterTag device, vtkm::cont::Token& token)
-{
-  VTKM_IS_DEVICE_ADAPTER_TAG(DeviceAdapterTag);
-
-  LockType lock = this->GetLock();
-  this->WaitToWrite(lock, token);
-
-  if (!this->Internals->IsControlArrayValid(lock) && !this->Internals->IsExecutionArrayValid(lock))
-  {
-    // Want to use an empty array.
-    // Set up ArrayHandle state so this actually works.
-    this->Internals->GetControlArray(lock)->Allocate(0);
-    this->Internals->SetControlArrayValid(lock, true);
-  }
-
-  this->PrepareForDevice(lock, token, device);
-  auto portal = this->Internals->GetExecutionArray(lock)->PrepareForInPlace(
-    !this->Internals->IsExecutionArrayValid(lock), device, token);
-
-  this->Internals->SetExecutionArrayValid(lock, true);
-
-  // Invalidate any control arrays since their data will become invalid when
-  // the execution data is overwritten. Don't actually release the control
-  // array. It may be shared as the execution array.
-  this->Internals->SetControlArrayValid(lock, false);
-
-  return portal;
-}
-
-template <typename T, typename S>
-template <typename DeviceAdapterTag>
-void ArrayHandle<T, S>::PrepareForDevice(LockType& lock,
-                                         vtkm::cont::Token& token,
-                                         DeviceAdapterTag device) const
-{
-  if (this->Internals->GetExecutionArray(lock) != nullptr)
-  {
-    if (this->Internals->GetExecutionArray(lock)->IsDeviceAdapter(DeviceAdapterTag()))
-    {
-      // Already have manager for correct device adapter. Nothing to do.
-      return;
-    }
-    else
-    {
-      // Have the wrong manager. Delete the old one and create a new one
-      // of the right type. (TODO: it would be possible for the array handle
-      // to hold references to execution arrays on multiple devices. When data
-      // are written on one devices, all the other devices should get cleared.)
-
-      // BUG: There is a non-zero chance that while waiting for the write lock, another thread
-      // could change the ExecutionInterface, which would cause problems. In the future we should
-      // support multiple devices, in which case we would not have to delete one execution array
-      // to load another.
-      // BUG: The current implementation does not allow the ArrayHandle to be on two devices
-      // at the same time. Thus, it is not possible for two simultaneously read from the same
-      // ArrayHandle on two different devices. This might cause unexpected deadlocks.
-      this->WaitToWrite(lock, token, true); // Make sure no one is reading device array
-      this->SyncControlArray(lock, token);
-      // Need to change some state that does not change the logical state from
-      // an external point of view.
-      this->Internals->DeleteExecutionArray(lock);
-    }
-  }
-
-  // Need to change some state that does not change the logical state from
-  // an external point of view.
-  this->Internals->NewExecutionArray(lock, device);
-}
-
-template <typename T, typename S>
-void ArrayHandle<T, S>::SyncControlArray(LockType& lock, vtkm::cont::Token& token) const
-{
-  if (!this->Internals->IsControlArrayValid(lock))
-  {
-    // It may be the case that `SyncControlArray` is called from a method that has a `Token`.
-    // However, if we are here, that `Token` should not already be attached to this array.
-    // If it were, then there should be no reason to move data arround (unless the `Token`
-    // was used when preparing for multiple devices, which it should not be used like that).
-    this->WaitToRead(lock, token);
-
-    // Need to change some state that does not change the logical state from
-    // an external point of view.
-    if (this->Internals->IsExecutionArrayValid(lock))
-    {
-      this->Internals->GetExecutionArray(lock)->RetrieveOutputData(
-        this->Internals->GetControlArray(lock));
-      this->Internals->SetControlArrayValid(lock, true);
-    }
-    else
-    {
-      // This array is in the null state (there is nothing allocated), but
-      // the calling function wants to do something with the array. Put this
-      // class into a valid state by allocating an array of size 0.
-      this->Internals->GetControlArray(lock)->Allocate(0);
-      this->Internals->SetControlArrayValid(lock, true);
-    }
-  }
-}
-
-template <typename T, typename S>
-bool ArrayHandle<T, S>::CanRead(const LockType& lock, const vtkm::cont::Token& token) const
-{
-  // If the token is already attached to this array, then we allow reading.
-  if (token.IsAttached(this->Internals->GetWriteCount(lock)) ||
-      token.IsAttached(this->Internals->GetReadCount(lock)))
-  {
-    return true;
-  }
-
-  // If there is anyone else waiting at the top of the queue, we cannot access this array.
-  auto& queue = this->Internals->GetQueue(lock);
-  if (!queue.empty() && (queue.front() != token))
-  {
-    return false;
-  }
-
-  // No one else is waiting, so we can read the array as long as no one else is writing.
-  return (*this->Internals->GetWriteCount(lock) < 1);
-}
-
-template <typename T, typename S>
-bool ArrayHandle<T, S>::CanWrite(const LockType& lock, const vtkm::cont::Token& token) const
-{
-  // If the token is already attached to this array, then we allow writing.
-  if (token.IsAttached(this->Internals->GetWriteCount(lock)) ||
-      token.IsAttached(this->Internals->GetReadCount(lock)))
-  {
-    return true;
-  }
-
-  // If there is anyone else waiting at the top of the queue, we cannot access this array.
-  auto& queue = this->Internals->GetQueue(lock);
-  if (!queue.empty() && (queue.front() != token))
-  {
-    return false;
-  }
-
-  // No one else is waiting, so we can write the array as long as no one else is reading or writing.
-  return ((*this->Internals->GetWriteCount(lock) < 1) &&
-          (*this->Internals->GetReadCount(lock) < 1));
-}
-
-template <typename T, typename S>
-void ArrayHandle<T, S>::WaitToRead(LockType& lock, vtkm::cont::Token& token) const
-{
-  this->Enqueue(lock, token);
-
-  // Note that if you deadlocked here, that means that you are trying to do a read operation on an
-  // array where an object is writing to it.
-  this->Internals->ConditionVariable.wait(
-    lock, [&lock, &token, this] { return this->CanRead(lock, token); });
-
-  token.Attach(this->Internals,
-               this->Internals->GetReadCount(lock),
-               lock,
-               &this->Internals->ConditionVariable);
-
-  // We successfully attached the token. Pop it off the queue.
-  auto& queue = this->Internals->GetQueue(lock);
-  if (!queue.empty() && queue.front() == token)
-  {
-    queue.pop_front();
-  }
-}
-
-template <typename T, typename S>
-void ArrayHandle<T, S>::WaitToWrite(LockType& lock, vtkm::cont::Token& token, bool fakeRead) const
-{
-  this->Enqueue(lock, token);
-
-  // Note that if you deadlocked here, that means that you are trying to do a write operation on an
-  // array where an object is reading or writing to it.
-  this->Internals->ConditionVariable.wait(
-    lock, [&lock, &token, this] { return this->CanWrite(lock, token); });
-
-  if (!fakeRead)
-  {
-    token.Attach(this->Internals,
-                 this->Internals->GetWriteCount(lock),
-                 lock,
-                 &this->Internals->ConditionVariable);
-  }
-  else
-  {
-    // A current feature limitation of ArrayHandle is that it can only exist on one device at
-    // a time. Thus, if a read request comes in for a different device, the prepare has to
-    // get satisfy a write lock to boot the array off the existing device. However, we don't
-    // want to attach the Token as a write lock because the resulting state is for reading only
-    // and others might also want to read. So, we have to pretend that this is a read lock even
-    // though we have to make a change to the array.
-    //
-    // The main point is, this condition is a hack that should go away once ArrayHandle supports
-    // multiple devices at once.
-    token.Attach(this->Internals,
-                 this->Internals->GetReadCount(lock),
-                 lock,
-                 &this->Internals->ConditionVariable);
-  }
-
-  // We successfully attached the token. Pop it off the queue.
-  auto& queue = this->Internals->GetQueue(lock);
-  if (!queue.empty() && queue.front() == token)
-  {
-    queue.pop_front();
-  }
-}
-
-template <typename T, typename S>
-void ArrayHandle<T, S>::Enqueue(const vtkm::cont::Token& token) const
-{
-  LockType lock = this->GetLock();
-  this->Enqueue(lock, token);
-}
-
-template <typename T, typename S>
-void ArrayHandle<T, S>::Enqueue(const LockType& lock, const vtkm::cont::Token& token) const
-{
-  if (token.IsAttached(this->Internals->GetWriteCount(lock)) ||
-      token.IsAttached(this->Internals->GetReadCount(lock)))
-  {
-    // Do not need to enqueue if we are already attached.
-    return;
-  }
-
-  auto& queue = this->Internals->GetQueue(lock);
-  if (std::find(queue.begin(), queue.end(), token.GetReference()) != queue.end())
-  {
-    // This token is already in the queue.
-    return;
-  }
-
-  this->Internals->GetQueue(lock).push_back(token.GetReference());
-}
-}
-} // vtkm::cont
-
-#endif //vtk_m_cont_ArrayHandle_hxx
--- a/vtkm/cont/ArrayHandleBasic.cxx
+++ b/vtkm/cont/ArrayHandleBasic.cxx
@ -41,11 +41,11 @@ VTKM_STORAGE_INSTANTIATE(vtkm::Float64)

 } // namespace internal

-#define VTKM_ARRAYHANDLE_INSTANTIATE(Type)                                                  \
-  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<Type, StorageTagBasic>;               \
-  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<vtkm::Vec<Type, 2>, StorageTagBasic>; \
-  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<vtkm::Vec<Type, 3>, StorageTagBasic>; \
-  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<vtkm::Vec<Type, 4>, StorageTagBasic>;
+#define VTKM_ARRAYHANDLE_INSTANTIATE(Type)                                          \
+  template class VTKM_CONT_EXPORT ArrayHandle<Type, StorageTagBasic>;               \
+  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<Type, 2>, StorageTagBasic>; \
+  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<Type, 3>, StorageTagBasic>; \
+  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<Type, 4>, StorageTagBasic>;

 VTKM_ARRAYHANDLE_INSTANTIATE(char)
 VTKM_ARRAYHANDLE_INSTANTIATE(vtkm::Int8)
--- a/vtkm/cont/ArrayHandleBasic.h
+++ b/vtkm/cont/ArrayHandleBasic.h
@ -11,6 +11,7 @@
 #define vtk_m_cont_ArrayHandleBasic_h

 #include <vtkm/cont/ArrayHandle.h>
+#include <vtkm/cont/ArrayPortalToIterators.h>
 #include <vtkm/cont/SerializableTypeString.h>
 #include <vtkm/cont/Serialization.h>
 #include <vtkm/cont/Storage.h>
@ -34,7 +35,7 @@ public:
  using ReadPortalType = vtkm::internal::ArrayPortalBasicRead<T>;
  using WritePortalType = vtkm::internal::ArrayPortalBasicWrite<T>;

-  VTKM_CONT static vtkm::IdComponent GetNumberOfBuffers() { return 1; }
+  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers() { return 1; }

  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
                                      vtkm::cont::internal::Buffer* buffers,
@ -47,7 +48,8 @@ public:

  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
  {
-    return static_cast<vtkm::Id>(buffers->GetNumberOfBytes()) / static_cast<vtkm::Id>(sizeof(T));
+    return static_cast<vtkm::Id>(buffers->GetNumberOfBytes() /
+                                 static_cast<vtkm::BufferSizeType>(sizeof(T)));
  }

  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
@ -69,9 +71,6 @@ public:

 } // namespace internal

-template <typename T>
-VTKM_ARRAY_HANDLE_NEW_STYLE(T, vtkm::cont::StorageTagBasic);
-
 template <typename T>
 class VTKM_ALWAYS_EXPORT ArrayHandleBasic : public ArrayHandle<T, vtkm::cont::StorageTagBasic>
 {
@ -382,14 +381,13 @@ VTKM_STORAGE_EXPORT(vtkm::Float64)

 } // namespace internal

-#define VTKM_ARRAYHANDLE_EXPORT(Type)                                                         \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandleNewStyle<Type, StorageTagBasic>; \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT                                             \
-    ArrayHandleNewStyle<vtkm::Vec<Type, 2>, StorageTagBasic>;                                 \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT                                             \
-    ArrayHandleNewStyle<vtkm::Vec<Type, 3>, StorageTagBasic>;                                 \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT                                             \
-    ArrayHandleNewStyle<vtkm::Vec<Type, 4>, StorageTagBasic>;
+#define VTKM_ARRAYHANDLE_EXPORT(Type)                                                 \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<Type, StorageTagBasic>; \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT                                     \
+    ArrayHandle<vtkm::Vec<Type, 2>, StorageTagBasic>;                                 \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT                                     \
+    ArrayHandle<vtkm::Vec<Type, 3>, StorageTagBasic>;                                 \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<vtkm::Vec<Type, 4>, StorageTagBasic>;

 VTKM_ARRAYHANDLE_EXPORT(char)
 VTKM_ARRAYHANDLE_EXPORT(vtkm::Int8)
--- a/vtkm/cont/ArrayHandleBitField.h
+++ b/vtkm/cont/ArrayHandleBitField.h
@ -82,7 +82,7 @@ public:
  using ReadPortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstType>;
  using WritePortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalType>;

-  VTKM_CONT static vtkm::IdComponent GetNumberOfBuffers() { return 1; }
+  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers() { return 1; }

  VTKM_CONT static void ResizeBuffers(vtkm::Id numberOfBits,
                                      vtkm::cont::internal::Buffer* buffers,
@ -99,12 +99,13 @@ public:
               vtkm::cont::GetSizeString(static_cast<vtkm::UInt64>(numBytes)).c_str());

    buffers[0].SetNumberOfBytes(numBytes, preserve, token);
-    vtkm::cont::detail::GetBitFieldMetaData(buffers[0])->NumberOfBits = numberOfBits;
+    buffers[0].GetMetaData<vtkm::cont::internal::BitFieldMetaData>().NumberOfBits = numberOfBits;
  }

  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
  {
-    vtkm::Id numberOfBits = vtkm::cont::detail::GetBitFieldMetaData(buffers[0])->NumberOfBits;
+    vtkm::Id numberOfBits =
+      buffers[0].GetMetaData<vtkm::cont::internal::BitFieldMetaData>().NumberOfBits;
    VTKM_ASSERT((buffers[0].GetNumberOfBytes() * CHAR_BIT) >= numberOfBits);
    return numberOfBits;
  }
@ -135,9 +136,6 @@ public:
 } // end namespace internal


-template <typename T>
-VTKM_ARRAY_HANDLE_NEW_STYLE(T, vtkm::cont::internal::StorageTagBitField);
-
 /// The ArrayHandleBitField class is a boolean-valued ArrayHandle that is backed
 /// by a BitField.
 ///
--- a/vtkm/cont/ArrayHandleCartesianProduct.h
+++ b/vtkm/cont/ArrayHandleCartesianProduct.h
@ -12,14 +12,13 @@

 #include <vtkm/Assert.h>

+#include <vtkm/cont/ArrayExtractComponent.h>
 #include <vtkm/cont/ArrayHandle.h>
 #include <vtkm/cont/ErrorBadAllocation.h>
 #include <vtkm/cont/Token.h>

 namespace vtkm
 {
-namespace exec
-{
 namespace internal
 {

@ -147,8 +146,7 @@ private:
  PortalTypeThird PortalThird;
 };
 }
-}
-} // namespace vtkm::exec::internal
+} // namespace vtkm::internal

 namespace vtkm
 {
@ -201,191 +199,88 @@ struct ArrayHandleCartesianProductTraits
 template <typename T, typename ST1, typename ST2, typename ST3>
 class Storage<vtkm::Vec<T, 3>, vtkm::cont::StorageTagCartesianProduct<ST1, ST2, ST3>>
 {
-  using AH1 = vtkm::cont::ArrayHandle<T, ST1>;
-  using AH2 = vtkm::cont::ArrayHandle<T, ST2>;
-  using AH3 = vtkm::cont::ArrayHandle<T, ST3>;
+  using Storage1 = vtkm::cont::internal::Storage<T, ST1>;
+  using Storage2 = vtkm::cont::internal::Storage<T, ST2>;
+  using Storage3 = vtkm::cont::internal::Storage<T, ST3>;
+
+  template <typename Buffs>
+  VTKM_CONT constexpr static Buffs* Buffers1(Buffs* buffers)
+  {
+    return buffers;
+  }
+
+  template <typename Buffs>
+  VTKM_CONT constexpr static Buffs* Buffers2(Buffs* buffers)
+  {
+    return buffers + Storage1::GetNumberOfBuffers();
+  }
+
+  template <typename Buffs>
+  VTKM_CONT constexpr static Buffs* Buffers3(Buffs* buffers)
+  {
+    return buffers + Storage1::GetNumberOfBuffers() + Storage2::GetNumberOfBuffers();
+  }

 public:
-  using ValueType = vtkm::Vec<typename AH1::ValueType, 3>;
+  VTKM_STORAGE_NO_RESIZE;

-  using PortalType =
-    vtkm::exec::internal::ArrayPortalCartesianProduct<ValueType,
-                                                      typename AH1::WritePortalType,
-                                                      typename AH2::WritePortalType,
-                                                      typename AH3::WritePortalType>;
-  using PortalConstType =
-    vtkm::exec::internal::ArrayPortalCartesianProduct<ValueType,
-                                                      typename AH1::ReadPortalType,
-                                                      typename AH2::ReadPortalType,
-                                                      typename AH3::ReadPortalType>;
+  using ReadPortalType =
+    vtkm::internal::ArrayPortalCartesianProduct<vtkm::Vec<T, 3>,
+                                                typename Storage1::ReadPortalType,
+                                                typename Storage2::ReadPortalType,
+                                                typename Storage3::ReadPortalType>;
+  using WritePortalType =
+    vtkm::internal::ArrayPortalCartesianProduct<vtkm::Vec<T, 3>,
+                                                typename Storage1::WritePortalType,
+                                                typename Storage2::WritePortalType,
+                                                typename Storage3::WritePortalType>;

-  VTKM_CONT
-  Storage()
-    : FirstArray()
-    , SecondArray()
-    , ThirdArray()
+  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers()
  {
+    return Storage1::GetNumberOfBuffers() + Storage2::GetNumberOfBuffers() +
+      Storage3::GetNumberOfBuffers();
  }

-  VTKM_CONT
-  Storage(const AH1& array1, const AH2& array2, const AH3& array3)
-    : FirstArray(array1)
-    , SecondArray(array2)
-    , ThirdArray(array3)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
  {
+    return (Storage1::GetNumberOfValues(Buffers1(buffers)) *
+            Storage2::GetNumberOfValues(Buffers2(buffers)) *
+            Storage3::GetNumberOfValues(Buffers3(buffers)));
  }

-  VTKM_CONT
-  PortalType GetPortal()
+  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
+                                                   vtkm::cont::DeviceAdapterId device,
+                                                   vtkm::cont::Token& token)
  {
-    return PortalType(this->FirstArray.WritePortal(),
-                      this->SecondArray.WritePortal(),
-                      this->ThirdArray.WritePortal());
+    return ReadPortalType(Storage1::CreateReadPortal(Buffers1(buffers), device, token),
+                          Storage2::CreateReadPortal(Buffers2(buffers), device, token),
+                          Storage3::CreateReadPortal(Buffers3(buffers), device, token));
  }

-  VTKM_CONT
-  PortalConstType GetPortalConst() const
+  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
+                                                     vtkm::cont::DeviceAdapterId device,
+                                                     vtkm::cont::Token& token)
  {
-    return PortalConstType(
-      this->FirstArray.ReadPortal(), this->SecondArray.ReadPortal(), this->ThirdArray.ReadPortal());
+    return WritePortalType(Storage1::CreateWritePortal(Buffers1(buffers), device, token),
+                           Storage2::CreateWritePortal(Buffers2(buffers), device, token),
+                           Storage3::CreateWritePortal(Buffers3(buffers), device, token));
  }

-  VTKM_CONT
-  vtkm::Id GetNumberOfValues() const
+  VTKM_CONT static vtkm::cont::ArrayHandle<T, ST1> GetArrayHandle1(
+    const vtkm::cont::internal::Buffer* buffers)
  {
-    return this->FirstArray.GetNumberOfValues() * this->SecondArray.GetNumberOfValues() *
-      this->ThirdArray.GetNumberOfValues();
+    return vtkm::cont::ArrayHandle<T, ST1>(Buffers1(buffers));
  }
-
-  VTKM_CONT
-  void Allocate(vtkm::Id /*numberOfValues*/)
+  VTKM_CONT static vtkm::cont::ArrayHandle<T, ST2> GetArrayHandle2(
+    const vtkm::cont::internal::Buffer* buffers)
  {
-    throw vtkm::cont::ErrorBadAllocation("Does not make sense.");
+    return vtkm::cont::ArrayHandle<T, ST2>(Buffers2(buffers));
  }
-
-  VTKM_CONT
-  void Shrink(vtkm::Id /*numberOfValues*/)
+  VTKM_CONT static vtkm::cont::ArrayHandle<T, ST3> GetArrayHandle3(
+    const vtkm::cont::internal::Buffer* buffers)
  {
-    throw vtkm::cont::ErrorBadAllocation("Does not make sense.");
+    return vtkm::cont::ArrayHandle<T, ST3>(Buffers3(buffers));
  }
-
-  VTKM_CONT
-  void ReleaseResources()
-  {
-    // This request is ignored since it is asking to release the resources
-    // of the arrays, which may be used elsewhere.
-  }
-
-  VTKM_CONT
-  const AH1& GetFirstArray() const { return this->FirstArray; }
-
-  VTKM_CONT
-  const AH2& GetSecondArray() const { return this->SecondArray; }
-
-  VTKM_CONT
-  const AH3& GetThirdArray() const { return this->ThirdArray; }
-
-private:
-  AH1 FirstArray;
-  AH2 SecondArray;
-  AH3 ThirdArray;
-};
-
-template <typename T, typename ST1, typename ST2, typename ST3, typename Device>
-class ArrayTransfer<vtkm::Vec<T, 3>, vtkm::cont::StorageTagCartesianProduct<ST1, ST2, ST3>, Device>
-{
-public:
-  using ValueType = vtkm::Vec<T, 3>;
-
-private:
-  using AH1 = vtkm::cont::ArrayHandle<T, ST1>;
-  using AH2 = vtkm::cont::ArrayHandle<T, ST2>;
-  using AH3 = vtkm::cont::ArrayHandle<T, ST3>;
-
-  using StorageTag = vtkm::cont::StorageTagCartesianProduct<ST1, ST2, ST3>;
-  using StorageType = vtkm::cont::internal::Storage<ValueType, StorageTag>;
-
-public:
-  using PortalControl = typename StorageType::PortalType;
-  using PortalConstControl = typename StorageType::PortalConstType;
-
-  using PortalExecution = vtkm::exec::internal::ArrayPortalCartesianProduct<
-    ValueType,
-    typename AH1::template ExecutionTypes<Device>::Portal,
-    typename AH2::template ExecutionTypes<Device>::Portal,
-    typename AH3::template ExecutionTypes<Device>::Portal>;
-
-  using PortalConstExecution = vtkm::exec::internal::ArrayPortalCartesianProduct<
-    ValueType,
-    typename AH1::template ExecutionTypes<Device>::PortalConst,
-    typename AH2::template ExecutionTypes<Device>::PortalConst,
-    typename AH3::template ExecutionTypes<Device>::PortalConst>;
-
-  VTKM_CONT
-  ArrayTransfer(StorageType* storage)
-    : FirstArray(storage->GetFirstArray())
-    , SecondArray(storage->GetSecondArray())
-    , ThirdArray(storage->GetThirdArray())
-  {
-  }
-
-  VTKM_CONT
-  vtkm::Id GetNumberOfValues() const
-  {
-    return this->FirstArray.GetNumberOfValues() * this->SecondArray.GetNumberOfValues() *
-      this->ThirdArray.GetNumberOfValues();
-  }
-
-  VTKM_CONT
-  PortalConstExecution PrepareForInput(bool vtkmNotUsed(updateData), vtkm::cont::Token& token)
-  {
-    return PortalConstExecution(this->FirstArray.PrepareForInput(Device(), token),
-                                this->SecondArray.PrepareForInput(Device(), token),
-                                this->ThirdArray.PrepareForInput(Device(), token));
-  }
-
-  VTKM_CONT
-  PortalExecution PrepareForInPlace(bool vtkmNotUsed(updateData), vtkm::cont::Token&)
-  {
-    throw vtkm::cont::ErrorBadAllocation(
-      "Cannot write to an ArrayHandleCartesianProduct. It does not make "
-      "sense because there is overlap in the data.");
-  }
-
-  VTKM_CONT
-  PortalExecution PrepareForOutput(vtkm::Id vtkmNotUsed(numberOfValues), vtkm::cont::Token&)
-  {
-    throw vtkm::cont::ErrorBadAllocation(
-      "Cannot write to an ArrayHandleCartesianProduct. It does not make "
-      "sense because there is overlap in the data.");
-  }
-
-  VTKM_CONT
-  void RetrieveOutputData(StorageType* vtkmNotUsed(storage)) const
-  {
-    // Implementation of this method should be unnecessary. The internal
-    // first and second array handles should automatically retrieve the
-    // output data as necessary.
-  }
-
-  VTKM_CONT
-  void Shrink(vtkm::Id /*numberOfValues*/)
-  {
-    throw vtkm::cont::ErrorBadAllocation("Does not make sense.");
-  }
-
-  VTKM_CONT
-  void ReleaseResources()
-  {
-    this->FirstArray.ReleaseResourcesExecution();
-    this->SecondArray.ReleaseResourcesExecution();
-    this->ThirdArray.ReleaseResourcesExecution();
-  }
-
-private:
-  AH1 FirstArray;
-  AH2 SecondArray;
-  AH3 ThirdArray;
 };
 } // namespace internal

@ -421,7 +316,7 @@ public:
  ArrayHandleCartesianProduct(const FirstHandleType& firstArray,
                              const SecondHandleType& secondArray,
                              const ThirdHandleType& thirdArray)
-    : Superclass(StorageType(firstArray, secondArray, thirdArray))
+    : Superclass(vtkm::cont::internal::CreateBuffers(firstArray, secondArray, thirdArray))
  {
  }

@ -431,6 +326,19 @@ public:
  /// created for all devices, and it would not be valid for all devices.
  ///
  ~ArrayHandleCartesianProduct() {}
+
+  VTKM_CONT FirstHandleType GetFirstArray() const
+  {
+    return StorageType::GetArrayHandle1(this->GetBuffers());
+  }
+  VTKM_CONT SecondHandleType GetSecondArray() const
+  {
+    return StorageType::GetArrayHandle2(this->GetBuffers());
+  }
+  VTKM_CONT ThirdHandleType GetThirdArray() const
+  {
+    return StorageType::GetArrayHandle3(this->GetBuffers());
+  }
 };

 /// A convenience function for creating an ArrayHandleCartesianProduct. It takes the two
@ -446,6 +354,103 @@ VTKM_CONT
  return ArrayHandleCartesianProduct<FirstHandleType, SecondHandleType, ThirdHandleType>(
    first, second, third);
 }
+
+//--------------------------------------------------------------------------------
+// Specialization of ArrayExtractComponent
+namespace internal
+{
+
+template <typename... STs>
+struct ArrayExtractComponentImpl<vtkm::cont::StorageTagCartesianProduct<STs...>>
+{
+  template <typename T>
+  vtkm::cont::ArrayHandleStride<T> AdjustStrideForComponent(
+    const vtkm::cont::ArrayHandleStride<T>& componentArray,
+    const vtkm::Id3& dims,
+    vtkm::IdComponent component,
+    vtkm::Id totalNumValues) const
+  {
+    VTKM_ASSERT(componentArray.GetModulo() == 0);
+    VTKM_ASSERT(componentArray.GetDivisor() == 1);
+
+    vtkm::Id modulo = 0;
+    if (component < 2)
+    {
+      modulo = dims[component];
+    }
+
+    vtkm::Id divisor = 1;
+    for (vtkm::IdComponent c = 0; c < component; ++c)
+    {
+      divisor *= dims[c];
+    }
+
+    return vtkm::cont::ArrayHandleStride<T>(componentArray.GetBasicArray(),
+                                            totalNumValues,
+                                            componentArray.GetStride(),
+                                            componentArray.GetOffset(),
+                                            modulo,
+                                            divisor);
+  }
+
+  template <typename T, typename ST, typename CartesianArrayType>
+  vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType>
+  GetStrideForComponentArray(const vtkm::cont::ArrayHandle<T, ST>& componentArray,
+                             const CartesianArrayType& cartesianArray,
+                             vtkm::IdComponent subIndex,
+                             vtkm::IdComponent productIndex,
+                             vtkm::CopyFlag allowCopy) const
+  {
+    vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType> strideArray =
+      ArrayExtractComponentImpl<ST>{}(componentArray, subIndex, allowCopy);
+    if ((strideArray.GetModulo() != 0) || (strideArray.GetDivisor() != 1))
+    {
+      // If the sub array has its own modulo and/or divisor, that will likely interfere
+      // with this math. Give up and fall back to simple copy.
+      constexpr vtkm::IdComponent NUM_SUB_COMPONENTS = vtkm::VecFlat<T>::NUM_COMPONENTS;
+      return vtkm::cont::internal::ArrayExtractComponentFallback(
+        cartesianArray, (productIndex * NUM_SUB_COMPONENTS) + subIndex, allowCopy);
+    }
+
+    vtkm::Id3 dims = { cartesianArray.GetFirstArray().GetNumberOfValues(),
+                       cartesianArray.GetSecondArray().GetNumberOfValues(),
+                       cartesianArray.GetThirdArray().GetNumberOfValues() };
+
+    return this->AdjustStrideForComponent(
+      strideArray, dims, productIndex, cartesianArray.GetNumberOfValues());
+  }
+
+  template <typename T>
+  vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType> operator()(
+    const vtkm::cont::ArrayHandle<vtkm::Vec<T, 3>, vtkm::cont::StorageTagCartesianProduct<STs...>>&
+      src,
+    vtkm::IdComponent componentIndex,
+    vtkm::CopyFlag allowCopy) const
+  {
+    vtkm::cont::ArrayHandleCartesianProduct<vtkm::cont::ArrayHandle<T, STs>...> array(src);
+    constexpr vtkm::IdComponent NUM_SUB_COMPONENTS = vtkm::VecFlat<T>::NUM_COMPONENTS;
+    vtkm::IdComponent subIndex = componentIndex % NUM_SUB_COMPONENTS;
+    vtkm::IdComponent productIndex = componentIndex / NUM_SUB_COMPONENTS;
+
+    switch (productIndex)
+    {
+      case 0:
+        return this->GetStrideForComponentArray(
+          array.GetFirstArray(), array, subIndex, productIndex, allowCopy);
+      case 1:
+        return this->GetStrideForComponentArray(
+          array.GetSecondArray(), array, subIndex, productIndex, allowCopy);
+      case 2:
+        return this->GetStrideForComponentArray(
+          array.GetThirdArray(), array, subIndex, productIndex, allowCopy);
+      default:
+        throw vtkm::cont::ErrorBadValue("Invalid component index to ArrayExtractComponent.");
+    }
+  }
+};
+
+} // namespace internal
+
 }
 } // namespace vtkm::cont

@ -492,10 +497,10 @@ private:
 public:
  static VTKM_CONT void save(BinaryBuffer& bb, const BaseType& obj)
  {
-    auto storage = obj.GetStorage();
-    vtkmdiy::save(bb, storage.GetFirstArray());
-    vtkmdiy::save(bb, storage.GetSecondArray());
-    vtkmdiy::save(bb, storage.GetThirdArray());
+    Type array = obj;
+    vtkmdiy::save(bb, array.GetFirstArray());
+    vtkmdiy::save(bb, array.GetSecondArray());
+    vtkmdiy::save(bb, array.GetThirdArray());
  }

  static VTKM_CONT void load(BinaryBuffer& bb, BaseType& obj)
--- a/vtkm/cont/ArrayHandleCast.h
+++ b/vtkm/cont/ArrayHandleCast.h
@ -105,8 +105,6 @@ struct ArrayHandleCastTraits<TargetT, SourceT, SourceStorage, true, false>
  using StorageTagSuperclass = StorageTagTransform<vtkm::cont::ArrayHandle<SourceT, SourceStorage>,
                                                   vtkm::cont::internal::Cast<SourceT, TargetT>>;
  using StorageSuperclass = vtkm::cont::internal::Storage<TargetT, StorageTagSuperclass>;
-  template <typename Device>
-  using ArrayTransferSuperclass = ArrayTransfer<TargetT, StorageTagSuperclass, Device>;
 };

 // Case where both forward and backward casts are valid.
@ -117,29 +115,16 @@ struct ArrayHandleCastTraits<TargetT, SourceT, SourceStorage, true, true>
                                                   vtkm::cont::internal::Cast<SourceT, TargetT>,
                                                   vtkm::cont::internal::Cast<TargetT, SourceT>>;
  using StorageSuperclass = vtkm::cont::internal::Storage<TargetT, StorageTagSuperclass>;
-  template <typename Device>
-  using ArrayTransferSuperclass = ArrayTransfer<TargetT, StorageTagSuperclass, Device>;
 };

 } // namespace detail

-template <typename TargetT, typename SourceT, typename SourceStorage>
-struct Storage<TargetT, vtkm::cont::StorageTagCast<SourceT, SourceStorage>>
-  : detail::ArrayHandleCastTraits<TargetT, SourceT, SourceStorage>::StorageSuperclass
+template <typename TargetT, typename SourceT, typename SourceStorage_>
+struct Storage<TargetT, vtkm::cont::StorageTagCast<SourceT, SourceStorage_>>
+  : detail::ArrayHandleCastTraits<TargetT, SourceT, SourceStorage_>::StorageSuperclass
 {
  using Superclass =
-    typename detail::ArrayHandleCastTraits<TargetT, SourceT, SourceStorage>::StorageSuperclass;
-
-  using Superclass::Superclass;
-};
-
-template <typename TargetT, typename SourceT, typename SourceStorage, typename Device>
-struct ArrayTransfer<TargetT, vtkm::cont::StorageTagCast<SourceT, SourceStorage>, Device>
-  : detail::ArrayHandleCastTraits<TargetT, SourceT, SourceStorage>::
-      template ArrayTransferSuperclass<Device>
-{
-  using Superclass = typename detail::ArrayHandleCastTraits<TargetT, SourceT, SourceStorage>::
-    template ArrayTransferSuperclass<Device>;
+    typename detail::ArrayHandleCastTraits<TargetT, SourceT, SourceStorage_>::StorageSuperclass;

  using Superclass::Superclass;
 };
@ -168,7 +153,7 @@ public:

  ArrayHandleCast(const vtkm::cont::ArrayHandle<typename ArrayHandleType::ValueType,
                                                typename ArrayHandleType::StorageTag>& handle)
-    : Superclass(typename Superclass::StorageType(handle))
+    : Superclass(Superclass::StorageType::CreateBuffers(handle))
  {
    this->ValidateTypeCast<typename ArrayHandleType::ValueType>();
  }
@ -180,6 +165,12 @@ public:
  ///
  ~ArrayHandleCast() {}

+  /// \brief Returns the `ArrayHandle` that is being transformed.
+  ArrayHandleType GetSourceArray() const
+  {
+    return Superclass::StorageType::GetArray(this->GetBuffers());
+  }
+
 private:
  // Log warnings if type cast is valid but lossy:
  template <typename SrcValueType>
@ -284,9 +275,8 @@ struct SerializableTypeString<vtkm::cont::ArrayHandleCast<T, AH>>
 {
  static VTKM_CONT const std::string& Get()
  {
-    static std::string name = "AH_Cast<" + SerializableTypeString<T>::Get() + "," +
-      SerializableTypeString<typename AH::ValueType>::Get() + "," +
-      SerializableTypeString<typename AH::StorageTag>::Get() + ">";
+    static std::string name =
+      "AH_Cast<" + SerializableTypeString<T>::Get() + "," + SerializableTypeString<AH>::Get() + ">";
    return name;
  }
 };
@ -313,14 +303,16 @@ private:
 public:
  static VTKM_CONT void save(BinaryBuffer& bb, const BaseType& obj)
  {
-    vtkmdiy::save(bb, obj.GetStorage().GetArray());
+    vtkm::cont::ArrayHandleCast<TargetT, vtkm::cont::ArrayHandle<SourceT, SourceStorage>>
+      castArray = obj;
+    vtkmdiy::save(bb, castArray.GetSourceArray());
  }

  static VTKM_CONT void load(BinaryBuffer& bb, BaseType& obj)
  {
    vtkm::cont::ArrayHandle<SourceT, SourceStorage> array;
    vtkmdiy::load(bb, array);
-    obj = BaseType(array);
+    obj = vtkm::cont::make_ArrayHandleCast<TargetT>(array);
  }
 };

--- a/vtkm/cont/ArrayHandleCompositeVector.h
+++ b/vtkm/cont/ArrayHandleCompositeVector.h
@ -10,6 +10,7 @@
 #ifndef vtk_m_ArrayHandleCompositeVector_h
 #define vtk_m_ArrayHandleCompositeVector_h

+#include <vtkm/cont/ArrayExtractComponent.h>
 #include <vtkm/cont/ArrayHandle.h>

 #include <vtkm/Deprecated.h>
@ -25,41 +26,16 @@

 namespace vtkm
 {
-namespace cont
-{
 namespace internal
 {

 namespace compvec
 {

-// AllAreArrayHandles: ---------------------------------------------------------
-// Ensures that all types in ArrayHandlesT... are subclasses of ArrayHandleBase
-template <typename... ArrayHandlesT>
-struct AllAreArrayHandlesImpl;
-
-template <typename Head, typename... Tail>
-struct AllAreArrayHandlesImpl<Head, Tail...>
-{
-private:
-  using Next = AllAreArrayHandlesImpl<Tail...>;
-  constexpr static bool HeadValid = std::is_base_of<ArrayHandleBase, Head>::value;
-
-public:
-  constexpr static bool Value = HeadValid && Next::Value;
-};
-
-template <typename Head>
-struct AllAreArrayHandlesImpl<Head>
-{
-  constexpr static bool Value = std::is_base_of<ArrayHandleBase, Head>::value;
-};
-
-template <typename... ArrayHandleTs>
-struct AllAreArrayHandles
-{
-  constexpr static bool Value = AllAreArrayHandlesImpl<ArrayHandleTs...>::Value;
-};
+template <typename... PortalList>
+using AllPortalsAreWritable =
+  typename brigand::all<brigand::list<PortalList...>,
+                        brigand::bind<vtkm::internal::PortalSupportsSets, brigand::_1>>::type;

 // GetValueType: ---------------------------------------------------------------
 // Determines the output `ValueType` of the set of `ArrayHandle` objects. For example, if the input
@ -93,169 +69,6 @@ struct GetValueType<ArrayType>
  using ValueType = typename ArrayType::ValueType;
 };

-// -----------------------------------------------------------------------------
-// Functors to access Storage methods. This is used with vtkm::Tuple's
-// ForEach and Transform methods.
-
-struct WritePortal
-{
-  template <typename ArrayHandle>
-  typename ArrayHandle::WritePortalType operator()(const ArrayHandle& array) const
-  {
-    return array.WritePortal();
-  }
-};
-
-struct ReadPortal
-{
-  template <typename ArrayHandle>
-  typename ArrayHandle::ReadPortalType operator()(const ArrayHandle& array) const
-  {
-    return array.ReadPortal();
-  }
-};
-
-struct Allocate
-{
-  vtkm::Id NumValues;
-  VTKM_CONT Allocate(vtkm::Id numValues)
-    : NumValues(numValues)
-  {
-  }
-
-  template <typename Array>
-  VTKM_CONT void operator()(Array& array)
-  {
-    array.Allocate(this->NumValues);
-  }
-};
-
-struct Shrink
-{
-  vtkm::Id NumValues;
-  VTKM_CONT Shrink(vtkm::Id numValues)
-    : NumValues(numValues)
-  {
-  }
-
-  template <typename Array>
-  VTKM_CONT void operator()(Array& array)
-  {
-    array.Shrink(this->NumValues);
-  }
-};
-
-struct ReleaseResources
-{
-  template <typename Array>
-  VTKM_CONT void operator()(Array& array)
-  {
-    array.ReleaseResources();
-  }
-};
-
-// -----------------------------------------------------------------------------
-// Functors to access ArrayTransfer methods. This is used with vtkm::Tuple's
-// ForEach and Transform methods.
-
-template <typename Device>
-struct PrepareForInput
-{
-  vtkm::cont::Token& Token;
-  VTKM_CONT PrepareForInput(vtkm::cont::Token& token)
-    : Token(token)
-  {
-  }
-
-  template <typename Array>
-  VTKM_CONT typename Array::template ExecutionTypes<Device>::PortalConst operator()(
-    const Array& array)
-  {
-    return array.PrepareForInput(Device{}, this->Token);
-  }
-};
-
-template <typename Device>
-struct PrepareForInPlace
-{
-  vtkm::cont::Token& Token;
-  VTKM_CONT PrepareForInPlace(vtkm::cont::Token& token)
-    : Token(token)
-  {
-  }
-
-  template <typename Array>
-  VTKM_CONT typename Array::template ExecutionTypes<Device>::Portal operator()(Array& array)
-  {
-    return array.PrepareForInPlace(Device{}, this->Token);
-  }
-};
-
-template <typename Device>
-struct PrepareForOutput
-{
-  vtkm::Id NumValues;
-  vtkm::cont::Token& Token;
-  VTKM_CONT PrepareForOutput(vtkm::Id numValues, vtkm::cont::Token& token)
-    : NumValues(numValues)
-    , Token(token)
-  {
-  }
-
-  template <typename Array>
-  VTKM_CONT typename Array::template ExecutionTypes<Device>::Portal operator()(Array& array)
-  {
-    return array.PrepareForOutput(this->NumValues, Device{}, this->Token);
-  }
-};
-
-struct ReleaseResourcesExecution
-{
-  template <typename Array>
-  VTKM_CONT void operator()(Array& array)
-  {
-    array.ReleaseResourcesExecution();
-  }
-};
-
-// ArraySizeValidator: ---------------------------------------------------------
-// Call Exec(ArrayTuple, NumValues) to ensure that all arrays in the tuple have
-// the specified number of values.
-template <std::size_t Index, std::size_t Count, typename TupleType>
-struct ArraySizeValidatorImpl
-{
-  using Next = ArraySizeValidatorImpl<Index + 1, Count, TupleType>;
-
-  VTKM_CONT
-  static bool Exec(const TupleType& tuple, vtkm::Id numVals)
-  {
-    return vtkm::Get<Index>(tuple).GetNumberOfValues() == numVals && Next::Exec(tuple, numVals);
-  }
-};
-
-template <std::size_t Index, typename TupleType>
-struct ArraySizeValidatorImpl<Index, Index, TupleType>
-{
-  VTKM_CONT
-  static bool Exec(const TupleType&, vtkm::Id) { return true; }
-};
-
-template <typename TupleType>
-struct ArraySizeValidator
-{
-  VTKM_CONT
-  static bool Exec(const TupleType& tuple, vtkm::Id numVals)
-  {
-    return ArraySizeValidatorImpl<0, vtkm::TupleSize<TupleType>::value, TupleType>::Exec(tuple,
-                                                                                         numVals);
-  }
-};
-
-template <typename... PortalList>
-using AllPortalsAreWritable =
-  typename brigand::all<brigand::list<PortalList...>,
-                        brigand::bind<vtkm::internal::PortalSupportsSets, brigand::_1>>::type;
-
 // GetFromPortals: -------------------------------------------------------------
 // Given a set of array portals as arguments, returns a Vec comprising the values
 // at the provided index.
@ -294,7 +107,7 @@ VTKM_EXEC_CONT void SetToPortals(vtkm::Id index, const ValueType& value, const P
    portals...);
 }

-} // end namespace compvec
+} // namespace compvec

 template <typename... PortalTypes>
 class VTKM_ALWAYS_EXPORT ArrayPortalCompositeVector
@ -338,6 +151,54 @@ public:
  }
 };

+}
+} // vtkm::internal
+
+namespace vtkm
+{
+namespace cont
+{
+namespace internal
+{
+
+namespace compvec
+{
+
+template <typename ArrayType>
+struct VerifyArrayHandle
+{
+  VTKM_STATIC_ASSERT_MSG(vtkm::cont::internal::ArrayHandleCheck<ArrayType>::type::value,
+                         "Template parameters for ArrayHandleCompositeVector "
+                         "must be a list of ArrayHandle types.");
+};
+
+template <std::size_t I>
+struct BufferIndexImpl
+{
+  template <typename... Ts>
+  static constexpr vtkm::IdComponent Value(vtkm::IdComponent n, Ts... remaining)
+  {
+    return n + BufferIndexImpl<I - 1>::Value(remaining...);
+  }
+};
+template <>
+struct BufferIndexImpl<0>
+{
+  template <typename... Ts>
+  static constexpr vtkm::IdComponent Value(Ts...)
+  {
+    return 0;
+  }
+};
+
+template <std::size_t I, typename... StorageTypes>
+constexpr vtkm::IdComponent BufferIndex()
+{
+  return BufferIndexImpl<I>::Value(StorageTypes::GetNumberOfBuffers()...);
+}
+
+} // end namespace compvec
+
 } // namespace internal

 template <typename... StorageTags>
@ -354,11 +215,9 @@ struct CompositeVectorTraits
  // Need to check this here, since this traits struct is used in the
  // ArrayHandleCompositeVector superclass definition before any other
  // static_asserts could be used.
-  VTKM_STATIC_ASSERT_MSG(compvec::AllAreArrayHandles<ArrayTs...>::Value,
-                         "Template parameters for ArrayHandleCompositeVector "
-                         "must be a list of ArrayHandle types.");
+  using CheckArrayHandles = vtkm::List<compvec::VerifyArrayHandle<ArrayTs>...>;

-  using ValueType = typename compvec::GetValueType<ArrayTs...>::ValueType;
+  using ValueType = typename vtkm::internal::compvec::GetValueType<ArrayTs...>::ValueType;
  using StorageTag = vtkm::cont::StorageTagCompositeVec<typename ArrayTs::StorageTag...>;
  using StorageType = Storage<ValueType, StorageTag>;
  using Superclass = ArrayHandle<ValueType, StorageTag>;
@ -368,187 +227,177 @@ template <typename T, typename... StorageTags>
 class Storage<vtkm::Vec<T, static_cast<vtkm::IdComponent>(sizeof...(StorageTags))>,
              vtkm::cont::StorageTagCompositeVec<StorageTags...>>
 {
-  using ArrayTuple = vtkm::Tuple<vtkm::cont::ArrayHandle<T, StorageTags>...>;
+  template <typename S>
+  using StorageFor = vtkm::cont::internal::Storage<T, S>;

-  ArrayTuple Arrays;
-  bool Valid;
+  using StorageTuple = vtkm::Tuple<StorageFor<StorageTags>...>;
+
+  template <std::size_t I>
+  VTKM_CONT static constexpr vtkm::IdComponent BufferIndex()
+  {
+    return compvec::BufferIndex<I, StorageFor<StorageTags>...>();
+  }
+
+  template <std::size_t I, typename Buff>
+  VTKM_CONT static Buff* Buffers(Buff* buffers)
+  {
+    return buffers + BufferIndex<I>();
+  }
+
+  using IndexList = vtkmstd::make_index_sequence<sizeof...(StorageTags)>;

 public:
-  using ValueType = vtkm::Vec<T, static_cast<vtkm::IdComponent>(sizeof...(StorageTags))>;
-  using PortalType = ArrayPortalCompositeVector<
-    typename vtkm::cont::ArrayHandle<T, StorageTags>::WritePortalType...>;
-  using PortalConstType =
-    ArrayPortalCompositeVector<typename vtkm::cont::ArrayHandle<T, StorageTags>::ReadPortalType...>;
-
-  VTKM_CONT
-  Storage()
-    : Valid(false)
-  {
-  }
-
-  template <typename... ArrayTypes>
-  VTKM_CONT Storage(const ArrayTypes&... arrays)
-    : Arrays(arrays...)
-    , Valid(true)
-  {
-    using SizeValidator = compvec::ArraySizeValidator<ArrayTuple>;
-    if (!SizeValidator::Exec(this->Arrays, this->GetNumberOfValues()))
-    {
-      throw ErrorBadValue("All arrays must have the same number of values.");
-    }
-  }
-
-  VTKM_CONT
-  PortalType GetPortal()
-  {
-    VTKM_ASSERT(this->Valid);
-    return this->Arrays.Transform(compvec::WritePortal{});
-  }
-
-  void TypeCheck(int) const;
-  VTKM_CONT
-  PortalConstType GetPortalConst() const
-  {
-    VTKM_ASSERT(this->Valid);
-    this->Arrays.Transform(compvec::ReadPortal{});
-    return this->Arrays.Transform(compvec::ReadPortal{});
-  }
-
-  VTKM_CONT
-  vtkm::Id GetNumberOfValues() const
-  {
-    VTKM_ASSERT(this->Valid);
-    return vtkm::Get<0>(this->Arrays).GetNumberOfValues();
-  }
-
-  VTKM_CONT
-  void Allocate(vtkm::Id numValues)
-  {
-    VTKM_ASSERT(this->Valid);
-    this->Arrays.ForEach(compvec::Allocate{ numValues });
-  }
-
-  VTKM_CONT
-  void Shrink(vtkm::Id numValues)
-  {
-    VTKM_ASSERT(this->Valid);
-    this->Arrays.ForEach(compvec::Shrink{ numValues });
-  }
-
-  VTKM_CONT
-  void ReleaseResources()
-  {
-    VTKM_ASSERT(this->Valid);
-    this->Arrays.ForEach(compvec::ReleaseResources{});
-  }
-
-  VTKM_CONT
-  const ArrayTuple& GetArrayTuple() const
-  {
-    VTKM_ASSERT(this->Valid);
-    return this->Arrays;
-  }
-
-  VTKM_CONT
-  ArrayTuple& GetArrayTuple()
-  {
-    VTKM_ASSERT(this->Valid);
-    return this->Arrays;
-  }
-};
-
-// Special case for single component. Just defer to the original storage.
-template <typename T, typename StorageTag>
-class Storage<T, vtkm::cont::StorageTagCompositeVec<StorageTag>> : public Storage<T, StorageTag>
-{
-  using ArrayType = vtkm::cont::ArrayHandle<T, StorageTag>;
-  using TupleType = vtkm::Tuple<ArrayType>;
-
-public:
-  Storage() = default;
-  Storage(const ArrayType& array)
-    : Storage<T, StorageTag>(array.GetStorage())
-  {
-  }
-
-  VTKM_CONT
-  const TupleType GetArrayTuple() const { return TupleType(ArrayType(this->GetStoragea())); }
-};
-
-template <typename T, typename... StorageTags, typename DeviceTag>
-class ArrayTransfer<vtkm::Vec<T, static_cast<vtkm::IdComponent>(sizeof...(StorageTags))>,
-                    vtkm::cont::StorageTagCompositeVec<StorageTags...>,
-                    DeviceTag>
-{
-  VTKM_IS_DEVICE_ADAPTER_TAG(DeviceTag);
-
-  using ArrayTuple = vtkm::Tuple<vtkm::cont::ArrayHandle<T, StorageTags>...>;
-
-public:
-  using ValueType = vtkm::Vec<T, static_cast<vtkm::IdComponent>(sizeof...(StorageTags))>;
+  using ReadPortalType =
+    vtkm::internal::ArrayPortalCompositeVector<typename StorageFor<StorageTags>::ReadPortalType...>;
+  using WritePortalType = vtkm::internal::ArrayPortalCompositeVector<
+    typename StorageFor<StorageTags>::WritePortalType...>;

 private:
-  using StorageTag = vtkm::cont::StorageTagCompositeVec<StorageTags...>;
-  using StorageType = internal::Storage<ValueType, StorageTag>;
+  // Hoop to jump through to use Storage::ResizeBuffer in an initializer list.
+  template <typename StorageType>
+  static bool ResizeBuffersCallthrough(StorageType,
+                                       vtkm::Id numValues,
+                                       vtkm::cont::internal::Buffer* buffers,
+                                       vtkm::CopyFlag preserve,
+                                       vtkm::cont::Token& token)
+  {
+    StorageType::ResizeBuffers(numValues, buffers, preserve, token);
+    return false; // Return value does not matter. Hopefully just thrown away by compiler.
+  }

-  StorageType* Storage;
+  template <std::size_t... Is>
+  static void ResizeBuffersImpl(vtkmstd::index_sequence<Is...>,
+                                vtkm::Id numValues,
+                                vtkm::cont::internal::Buffer* buffers,
+                                vtkm::CopyFlag preserve,
+                                vtkm::cont::Token& token)
+  {
+    auto init_list = { ResizeBuffersCallthrough(vtkm::tuple_element_t<Is, StorageTuple>{},
+                                                numValues,
+                                                Buffers<Is>(buffers),
+                                                preserve,
+                                                token)... };
+    (void)init_list;
+  }
+
+  template <std::size_t... Is>
+  static ReadPortalType CreateReadPortalImpl(vtkmstd::index_sequence<Is...>,
+                                             const vtkm::cont::internal::Buffer* buffers,
+                                             vtkm::cont::DeviceAdapterId device,
+                                             vtkm::cont::Token& token)
+  {
+    return ReadPortalType(vtkm::tuple_element_t<Is, StorageTuple>::CreateReadPortal(
+      Buffers<Is>(buffers), device, token)...);
+  }
+
+  template <std::size_t... Is>
+  static WritePortalType CreateWritePortalImpl(vtkmstd::index_sequence<Is...>,
+                                               vtkm::cont::internal::Buffer* buffers,
+                                               vtkm::cont::DeviceAdapterId device,
+                                               vtkm::cont::Token& token)
+  {
+    return WritePortalType(vtkm::tuple_element_t<Is, StorageTuple>::CreateWritePortal(
+      Buffers<Is>(buffers), device, token)...);
+  }

 public:
-  using PortalControl = typename StorageType::PortalType;
-  using PortalConstControl = typename StorageType::PortalConstType;
-
-  using PortalExecution =
-    ArrayPortalCompositeVector<typename vtkm::cont::ArrayHandle<T, StorageTags>::
-                                 template ExecutionTypes<DeviceTag>::Portal...>;
-  using PortalConstExecution =
-    ArrayPortalCompositeVector<typename vtkm::cont::ArrayHandle<T, StorageTags>::
-                                 template ExecutionTypes<DeviceTag>::PortalConst...>;
-
-  VTKM_CONT
-  ArrayTransfer(StorageType* storage)
-    : Storage(storage)
+  VTKM_CONT constexpr static vtkm::IdComponent GetNumberOfBuffers()
  {
+    return BufferIndex<sizeof...(StorageTags)>();
  }

-  VTKM_CONT
-  vtkm::Id GetNumberOfValues() const { return this->Storage->GetNumberOfValues(); }
-
-  VTKM_CONT
-  PortalConstExecution PrepareForInput(bool vtkmNotUsed(updateData), vtkm::cont::Token& token) const
+  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
  {
-    return this->GetArrayTuple().Transform(compvec::PrepareForInput<DeviceTag>{ token });
+    return vtkm::TupleElement<0, StorageTuple>::GetNumberOfValues(buffers);
  }

-  VTKM_CONT
-  PortalExecution PrepareForInPlace(bool vtkmNotUsed(updateData), vtkm::cont::Token& token)
+  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
+                                      vtkm::cont::internal::Buffer* buffers,
+                                      vtkm::CopyFlag preserve,
+                                      vtkm::cont::Token& token)
  {
-    return this->GetArrayTuple().Transform(compvec::PrepareForInPlace<DeviceTag>{ token });
+    ResizeBuffersImpl(IndexList{}, numValues, buffers, preserve, token);
  }

-  VTKM_CONT
-  PortalExecution PrepareForOutput(vtkm::Id numValues, vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
+                                                   vtkm::cont::DeviceAdapterId device,
+                                                   vtkm::cont::Token& token)
  {
-    return this->GetArrayTuple().Transform(
-      compvec::PrepareForOutput<DeviceTag>{ numValues, token });
+    return CreateReadPortalImpl(IndexList{}, buffers, device, token);
  }

-  VTKM_CONT
-  void RetrieveOutputData(StorageType* vtkmNotUsed(storage)) const
+  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
+                                                     vtkm::cont::DeviceAdapterId device,
+                                                     vtkm::cont::Token& token)
  {
-    // Implementation of this method should be unnecessary. The internal
-    // array handle should automatically retrieve the output data as
-    // necessary.
+    return CreateWritePortalImpl(IndexList{}, buffers, device, token);
  }

-  VTKM_CONT
-  void Shrink(vtkm::Id numValues) { this->GetArrayTuple().ForEach(compvec::Shrink{ numValues }); }
+private:
+  template <typename ArrayType>
+  VTKM_CONT static bool CopyBuffers(const ArrayType& array,
+                                    vtkm::cont::internal::Buffer* destBuffers)
+  {
+    vtkm::IdComponent numBuffers = array.GetNumberOfBuffers();
+    const vtkm::cont::internal::Buffer* srcBuffers = array.GetBuffers();
+    for (vtkm::IdComponent buffIndex = 0; buffIndex < numBuffers; ++buffIndex)
+    {
+      destBuffers[buffIndex] = srcBuffers[buffIndex];
+    }
+    return false; // Return value does not matter. Hopefully just thrown away by compiler.
+  }

-  VTKM_CONT
-  void ReleaseResources() { this->GetArrayTuple().ForEach(compvec::ReleaseResourcesExecution{}); }
+  template <std::size_t... Is, typename... ArrayTs>
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffersImpl(
+    vtkmstd::index_sequence<Is...>,
+    const ArrayTs... arrays)
+  {
+    std::vector<vtkm::cont::internal::Buffer> buffers(
+      static_cast<std::size_t>(GetNumberOfBuffers()));
+    auto init_list = { CopyBuffers(arrays, Buffers<Is>(&buffers.front()))... };
+    (void)init_list;
+    return buffers;
+  }

-  VTKM_CONT
-  const ArrayTuple& GetArrayTuple() const { return this->Storage->GetArrayTuple(); }
-  ArrayTuple& GetArrayTuple() { return this->Storage->GetArrayTuple(); }
+public:
+  template <typename... ArrayTs>
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers(const ArrayTs... arrays)
+  {
+    return CreateBuffersImpl(IndexList{}, arrays...);
+  }
+
+private:
+  using ArrayTupleType = vtkm::Tuple<vtkm::cont::ArrayHandle<T, StorageTags>...>;
+
+  template <std::size_t... Is>
+  VTKM_CONT static ArrayTupleType GetArrayTupleImpl(vtkmstd::index_sequence<Is...>,
+                                                    const vtkm::cont::internal::Buffer* buffers)
+  {
+    return ArrayTupleType(vtkm::cont::ArrayHandle<T, StorageTags>(Buffers<Is>(buffers))...);
+  }
+
+public:
+  VTKM_CONT static ArrayTupleType GetArrayTuple(const vtkm::cont::internal::Buffer* buffers)
+  {
+    return GetArrayTupleImpl(IndexList{}, buffers);
+  }
+};
+
+// Special degenerative case when there is only one array being composited
+template <typename T, typename StorageTag>
+struct Storage<T, vtkm::cont::StorageTagCompositeVec<StorageTag>> : Storage<T, StorageTag>
+{
+  VTKM_CONT static std::vector<vtkm::cont::internal::Buffer> CreateBuffers(
+    const vtkm::cont::ArrayHandle<T, StorageTag>& array)
+  {
+    return vtkm::cont::internal::CreateBuffers(array);
+  }
+
+  VTKM_CONT static vtkm::Tuple<vtkm::cont::ArrayHandle<T, StorageTag>> GetArrayTuple(
+    const vtkm::cont::internal::Buffer* buffers)
+  {
+    return vtkm::cont::ArrayHandle<T, StorageTag>(buffers);
+  }
 };

 } // namespace internal
@ -573,7 +422,6 @@ class ArrayHandleCompositeVector
 {
 private:
  using Traits = internal::CompositeVectorTraits<ArrayTs...>;
-  using TupleType = vtkm::Tuple<ArrayTs...>;
  using StorageType = typename Traits::StorageType;

 public:
@ -583,9 +431,14 @@ public:

  VTKM_CONT
  ArrayHandleCompositeVector(const ArrayTs&... arrays)
-    : Superclass(StorageType(arrays...))
+    : Superclass(StorageType::CreateBuffers(arrays...))
  {
  }
+
+  VTKM_CONT vtkm::Tuple<ArrayTs...> GetArrayTuple() const
+  {
+    return StorageType::GetArrayTuple(this->GetBuffers());
+  }
 };

 /// Create a composite vector array from other arrays.
@ -594,11 +447,73 @@ template <typename... ArrayTs>
 VTKM_CONT ArrayHandleCompositeVector<ArrayTs...> make_ArrayHandleCompositeVector(
  const ArrayTs&... arrays)
 {
-  VTKM_STATIC_ASSERT_MSG(internal::compvec::AllAreArrayHandles<ArrayTs...>::Value,
-                         "Arguments to make_ArrayHandleCompositeVector must be "
-                         "of ArrayHandle types.");
+  // Will issue compiler error if any of ArrayTs is not a valid ArrayHandle.
+  vtkm::List<internal::compvec::VerifyArrayHandle<ArrayTs>...> checkArrayHandles;
+  (void)checkArrayHandles;
  return ArrayHandleCompositeVector<ArrayTs...>(arrays...);
 }
+
+//--------------------------------------------------------------------------------
+// Specialization of ArrayExtractComponent
+namespace internal
+{
+
+namespace detail
+{
+
+template <typename T>
+struct ExtractComponentCompositeVecFunctor
+{
+  using ResultArray = vtkm::cont::ArrayHandleStride<typename vtkm::VecTraits<T>::BaseComponentType>;
+
+  ResultArray operator()(vtkm::IdComponent, vtkm::IdComponent, vtkm::CopyFlag) const
+  {
+    throw vtkm::cont::ErrorBadValue("Invalid component index given to ArrayExtractComponent.");
+  }
+
+  template <typename A0, typename... As>
+  ResultArray operator()(vtkm::IdComponent compositeIndex,
+                         vtkm::IdComponent subIndex,
+                         vtkm::CopyFlag allowCopy,
+                         const A0& array0,
+                         const As&... arrays) const
+  {
+    if (compositeIndex == 0)
+    {
+      return vtkm::cont::internal::ArrayExtractComponentImpl<typename A0::StorageTag>{}(
+        array0, subIndex, allowCopy);
+    }
+    else
+    {
+      return (*this)(--compositeIndex, subIndex, allowCopy, arrays...);
+    }
+  }
+};
+
+} // namespace detail
+
+template <typename... StorageTags>
+struct ArrayExtractComponentImpl<StorageTagCompositeVec<StorageTags...>>
+{
+  template <typename T, vtkm::IdComponent NUM_COMPONENTS>
+  typename detail::ExtractComponentCompositeVecFunctor<T>::ResultArray operator()(
+    const vtkm::cont::ArrayHandle<vtkm::Vec<T, NUM_COMPONENTS>,
+                                  vtkm::cont::StorageTagCompositeVec<StorageTags...>>& src,
+    vtkm::IdComponent componentIndex,
+    vtkm::CopyFlag allowCopy) const
+  {
+    vtkm::cont::ArrayHandleCompositeVector<vtkm::cont::ArrayHandle<T, StorageTags>...> array(src);
+    constexpr vtkm::IdComponent NUM_SUB_COMPONENTS = vtkm::VecFlat<T>::NUM_COMPONENTS;
+
+    return array.GetArrayTuple().Apply(detail::ExtractComponentCompositeVecFunctor<T>{},
+                                       componentIndex / NUM_SUB_COMPONENTS,
+                                       componentIndex % NUM_SUB_COMPONENTS,
+                                       allowCopy);
+  }
+};
+
+} // namespace internal
+
 }
 } // namespace vtkm::cont

@ -677,7 +592,7 @@ private:
 public:
  static VTKM_CONT void save(BinaryBuffer& bb, const BaseType& obj)
  {
-    obj.GetStorage().GetArrayTuple().ForEach(SaveFunctor{ bb });
+    Type(obj).GetArrayTuple().ForEach(SaveFunctor{ bb });
  }

  static VTKM_CONT void load(BinaryBuffer& bb, BaseType& obj)
--- a/vtkm/cont/ArrayHandleConcatenate.h
+++ b/vtkm/cont/ArrayHandleConcatenate.h
@ -17,8 +17,6 @@

 namespace vtkm
 {
-namespace cont
-{
 namespace internal
 {

@ -103,7 +101,13 @@ private:
  PortalType2 portal2;
 }; // class ArrayPortalConcatenate

-} // namespace internal
+}
+} // namespace vtkm::internal
+
+namespace vtkm
+{
+namespace cont
+{

 template <typename StorageTag1, typename StorageTag2>
 class VTKM_ALWAYS_EXPORT StorageTagConcatenate
@ -119,10 +123,11 @@ namespace detail
 template <typename T, typename ArrayOrStorage, bool IsArrayType>
 struct ConcatinateTypeArgImpl;

-template <typename T, typename Storage>
-struct ConcatinateTypeArgImpl<T, Storage, false>
+template <typename T, typename StorageTag_>
+struct ConcatinateTypeArgImpl<T, StorageTag_, false>
 {
-  using StorageTag = Storage;
+  using StorageTag = StorageTag_;
+  using Storage = vtkm::cont::internal::Storage<T, StorageTag>;
  using ArrayHandle = vtkm::cont::ArrayHandle<T, StorageTag>;
 };

@ -135,6 +140,10 @@ struct ConcatinateTypeArgImpl<T, Array, true>
    1.6,
    "Use storage tags instead of array handles in StorageTagConcatenate.") =
    typename Array::StorageTag;
+  using Storage VTKM_DEPRECATED(
+    1.6,
+    "Use storage tags instead of array handles in StorageTagConcatenate.") =
+    vtkm::cont::internal::Storage<T, typename Array::StorageTag>;
  using ArrayHandle VTKM_DEPRECATED(
    1.6,
    "Use storage tags instead of array handles in StorageTagConcatenate.") =
@ -154,186 +163,79 @@ struct ConcatinateTypeArg
 template <typename T, typename ST1, typename ST2>
 class Storage<T, StorageTagConcatenate<ST1, ST2>>
 {
+  using SourceStorage1 = typename detail::ConcatinateTypeArg<T, ST1>::Storage;
+  using SourceStorage2 = typename detail::ConcatinateTypeArg<T, ST2>::Storage;
+
  using ArrayHandleType1 = typename detail::ConcatinateTypeArg<T, ST1>::ArrayHandle;
  using ArrayHandleType2 = typename detail::ConcatinateTypeArg<T, ST2>::ArrayHandle;

+  template <typename Buff>
+  VTKM_CONT static Buff* Buffers1(Buff* buffers)
+  {
+    return buffers;
+  }
+
+  template <typename Buff>
+  VTKM_CONT static Buff* Buffers2(Buff* buffers)
+  {
+    return buffers + SourceStorage1::GetNumberOfBuffers();
+  }
+
 public:
-  using ValueType = T;
-  using PortalType = ArrayPortalConcatenate<typename ArrayHandleType1::WritePortalType,
-                                            typename ArrayHandleType2::WritePortalType>;
-  using PortalConstType = ArrayPortalConcatenate<typename ArrayHandleType1::ReadPortalType,
-                                                 typename ArrayHandleType2::ReadPortalType>;
+  VTKM_STORAGE_NO_RESIZE;

-  VTKM_CONT
-  Storage()
-    : valid(false)
+  using ReadPortalType =
+    vtkm::internal::ArrayPortalConcatenate<typename SourceStorage1::ReadPortalType,
+                                           typename SourceStorage2::ReadPortalType>;
+  using WritePortalType =
+    vtkm::internal::ArrayPortalConcatenate<typename SourceStorage1::WritePortalType,
+                                           typename SourceStorage2::WritePortalType>;
+
+  VTKM_CONT static constexpr vtkm::IdComponent GetNumberOfBuffers()
  {
+    return (SourceStorage1::GetNumberOfBuffers() + SourceStorage2::GetNumberOfBuffers());
  }

-  VTKM_CONT
-  Storage(const ArrayHandleType1& a1, const ArrayHandleType2& a2)
-    : array1(a1)
-    , array2(a2)
-    , valid(true)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
  {
+    return (SourceStorage1::GetNumberOfValues(Buffers1(buffers)) +
+            SourceStorage2::GetNumberOfValues(Buffers2(buffers)));
  }

-  VTKM_CONT
-  PortalConstType GetPortalConst() const
+  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
+                                                   vtkm::cont::DeviceAdapterId device,
+                                                   vtkm::cont::Token& token)
  {
-    VTKM_ASSERT(this->valid);
-    return PortalConstType(this->array1.ReadPortal(), this->array2.ReadPortal());
+    return ReadPortalType(SourceStorage1::CreateReadPortal(Buffers1(buffers), device, token),
+                          SourceStorage2::CreateReadPortal(Buffers2(buffers), device, token));
  }

-  VTKM_CONT
-  PortalType GetPortal()
+  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
+                                                     vtkm::cont::DeviceAdapterId device,
+                                                     vtkm::cont::Token& token)
  {
-    VTKM_ASSERT(this->valid);
-    return PortalType(this->array1.WritePortal(), this->array2.WritePortal());
+    return WritePortalType(SourceStorage1::CreateWritePortal(Buffers1(buffers), device, token),
+                           SourceStorage2::CreateWritePortal(Buffers2(buffers), device, token));
  }

-  VTKM_CONT
-  vtkm::Id GetNumberOfValues() const
+  VTKM_CONT static auto CreateBuffers(const ArrayHandleType1& array1,
+                                      const ArrayHandleType2& array2)
+    -> decltype(vtkm::cont::internal::CreateBuffers())
  {
-    VTKM_ASSERT(this->valid);
-    return this->array1.GetNumberOfValues() + this->array2.GetNumberOfValues();
+    return vtkm::cont::internal::CreateBuffers(array1, array2);
  }

-  VTKM_CONT
-  void Allocate(vtkm::Id vtkmNotUsed(numberOfValues))
+  VTKM_CONT static const ArrayHandleType1& GetArray1(const vtkm::cont::internal::Buffer* buffers)
  {
-    throw vtkm::cont::ErrorInternal("ArrayHandleConcatenate should not be allocated explicitly. ");
+    return ArrayHandleType1(Buffers1(buffers));
  }

-  VTKM_CONT
-  void Shrink(vtkm::Id numberOfValues)
+  VTKM_CONT static const ArrayHandleType2& GetArray2(const vtkm::cont::internal::Buffer* buffers)
  {
-    VTKM_ASSERT(this->valid);
-    if (numberOfValues < this->array1.GetNumberOfValues())
-    {
-      this->array1.Shrink(numberOfValues);
-      this->array2.Shrink(0);
-    }
-    else
-      this->array2.Shrink(numberOfValues - this->array1.GetNumberOfValues());
+    return ArrayHandleType2(Buffers2(buffers));
  }
-
-  VTKM_CONT
-  void ReleaseResources()
-  {
-    VTKM_ASSERT(this->valid);
-    this->array1.ReleaseResources();
-    this->array2.ReleaseResources();
-  }
-
-  VTKM_CONT
-  const ArrayHandleType1& GetArray1() const
-  {
-    VTKM_ASSERT(this->valid);
-    return this->array1;
-  }
-
-  VTKM_CONT
-  const ArrayHandleType2& GetArray2() const
-  {
-    VTKM_ASSERT(this->valid);
-    return this->array2;
-  }
-
-private:
-  ArrayHandleType1 array1;
-  ArrayHandleType2 array2;
-  bool valid;
 }; // class Storage

-template <typename T, typename ST1, typename ST2, typename Device>
-class ArrayTransfer<T, StorageTagConcatenate<ST1, ST2>, Device>
-{
-  using ArrayHandleType1 = typename detail::ConcatinateTypeArg<T, ST1>::ArrayHandle;
-  using ArrayHandleType2 = typename detail::ConcatinateTypeArg<T, ST2>::ArrayHandle;
-  using StorageTag1 = typename detail::ConcatinateTypeArg<T, ST1>::StorageTag;
-  using StorageTag2 = typename detail::ConcatinateTypeArg<T, ST2>::StorageTag;
-
-public:
-  using ValueType = T;
-
-private:
-  using StorageTag = StorageTagConcatenate<StorageTag1, StorageTag2>;
-  using StorageType = vtkm::cont::internal::Storage<ValueType, StorageTag>;
-
-public:
-  using PortalControl = typename StorageType::PortalType;
-  using PortalConstControl = typename StorageType::PortalConstType;
-
-  using PortalExecution =
-    ArrayPortalConcatenate<typename ArrayHandleType1::template ExecutionTypes<Device>::Portal,
-                           typename ArrayHandleType2::template ExecutionTypes<Device>::Portal>;
-  using PortalConstExecution =
-    ArrayPortalConcatenate<typename ArrayHandleType1::template ExecutionTypes<Device>::PortalConst,
-                           typename ArrayHandleType2::template ExecutionTypes<Device>::PortalConst>;
-
-  VTKM_CONT
-  ArrayTransfer(StorageType* storage)
-    : array1(storage->GetArray1())
-    , array2(storage->GetArray2())
-  {
-  }
-
-  VTKM_CONT
-  vtkm::Id GetNumberOfValues() const
-  {
-    return this->array1.GetNumberOfValues() + this->array2.GetNumberOfValues();
-  }
-
-  VTKM_CONT
-  PortalConstExecution PrepareForInput(bool vtkmNotUsed(updateData), vtkm::cont::Token& token)
-  {
-    return PortalConstExecution(this->array1.PrepareForInput(Device(), token),
-                                this->array2.PrepareForInput(Device(), token));
-  }
-
-  VTKM_CONT
-  PortalExecution PrepareForInPlace(bool vtkmNotUsed(updateData), vtkm::cont::Token& token)
-  {
-    return PortalExecution(this->array1.PrepareForInPlace(Device(), token),
-                           this->array2.PrepareForInPlace(Device(), token));
-  }
-
-  VTKM_CONT
-  PortalExecution PrepareForOutput(vtkm::Id vtkmNotUsed(numberOfValues), vtkm::cont::Token&)
-  {
-    throw vtkm::cont::ErrorInternal("ArrayHandleConcatenate is derived and read-only. ");
-  }
-
-  VTKM_CONT
-  void RetrieveOutputData(StorageType* vtkmNotUsed(storage)) const
-  {
-    // not need to implement
-  }
-
-  VTKM_CONT
-  void Shrink(vtkm::Id numberOfValues)
-  {
-    if (numberOfValues < this->array1.GetNumberOfValues())
-    {
-      this->array1.Shrink(numberOfValues);
-      this->array2.Shrink(0);
-    }
-    else
-      this->array2.Shrink(numberOfValues - this->array1.GetNumberOfValues());
-  }
-
-  VTKM_CONT
-  void ReleaseResources()
-  {
-    this->array1.ReleaseResourcesExecution();
-    this->array2.ReleaseResourcesExecution();
-  }
-
-private:
-  ArrayHandleType1 array1;
-  ArrayHandleType2 array2;
-};
 }
 }
 } // namespace vtkm::cont::internal
@ -363,7 +265,7 @@ protected:
 public:
  VTKM_CONT
  ArrayHandleConcatenate(const ArrayHandleType1& array1, const ArrayHandleType2& array2)
-    : Superclass(StorageType(array1, array2))
+    : Superclass(StorageType::CreateBuffers(array1, array2))
  {
  }
 };
--- a/vtkm/cont/ArrayHandleConstant.h
+++ b/vtkm/cont/ArrayHandleConstant.h
@ -47,16 +47,6 @@ using StorageTagConstantSuperclass =
 template <typename T>
 struct Storage<T, vtkm::cont::StorageTagConstant> : Storage<T, StorageTagConstantSuperclass<T>>
 {
-  using Superclass = Storage<T, StorageTagConstantSuperclass<T>>;
-  using Superclass::Superclass;
-};
-
-template <typename T, typename Device>
-struct ArrayTransfer<T, vtkm::cont::StorageTagConstant, Device>
-  : ArrayTransfer<T, StorageTagConstantSuperclass<T>, Device>
-{
-  using Superclass = ArrayTransfer<T, StorageTagConstantSuperclass<T>, Device>;
-  using Superclass::Superclass;
 };

 } // namespace internal
@ -79,11 +69,16 @@ public:

  VTKM_CONT
  ArrayHandleConstant(T value, vtkm::Id numberOfValues = 0)
-    : Superclass(typename internal::Storage<T, StorageTag>::PortalConstType(
-        internal::ConstantFunctor<T>(value),
-        numberOfValues))
+    : Superclass(internal::FunctorToArrayHandleImplicitBuffers(internal::ConstantFunctor<T>(value),
+                                                               numberOfValues))
  {
  }
+
+  /// \brief Returns the constant value stored in this array.
+  ///
+  /// The value set in the constructor of this array is returned even if the number of values is 0.
+  ///
+  VTKM_CONT T GetValue() const { return this->ReadPortal().GetFunctor()(0); }
 };

 /// make_ArrayHandleConstant is convenience function to generate an
--- a/vtkm/cont/ArrayHandleCounting.h
+++ b/vtkm/cont/ArrayHandleCounting.h
@ -10,9 +10,9 @@
 #ifndef vtk_m_cont_ArrayHandleCounting_h
 #define vtk_m_cont_ArrayHandleCounting_h

-#include <vtkm/cont/ArrayHandle.h>
-#include <vtkm/cont/StorageImplicit.h>
+#include <vtkm/cont/ArrayHandleImplicit.h>

+#include <vtkm/TypeTraits.h>
 #include <vtkm/VecTraits.h>

 namespace vtkm
@ -52,24 +52,6 @@ public:
  {
  }

-  template <typename OtherValueType>
-  VTKM_EXEC_CONT ArrayPortalCounting(const ArrayPortalCounting<OtherValueType>& src)
-    : Start(src.Start)
-    , Step(src.Step)
-    , NumberOfValues(src.NumberOfValues)
-  {
-  }
-
-  template <typename OtherValueType>
-  VTKM_EXEC_CONT ArrayPortalCounting<ValueType>& operator=(
-    const ArrayPortalCounting<OtherValueType>& src)
-  {
-    this->Start = src.Start;
-    this->Step = src.Step;
-    this->NumberOfValues = src.NumberOfValues;
-    return *this;
-  }
-
  VTKM_EXEC_CONT
  ValueType GetStart() const { return this->Start; }

@ -91,23 +73,49 @@ private:
  vtkm::Id NumberOfValues;
 };

+namespace detail
+{
+
+template <typename T, typename UseVecTraits = vtkm::HasVecTraits<T>>
+struct CanCountImpl;
+
+template <typename T>
+struct CanCountImpl<T, std::false_type>
+{
+  using TTraits = vtkm::TypeTraits<T>;
+  static constexpr bool IsNumeric =
+    !std::is_same<typename TTraits::NumericTag, vtkm::TypeTraitsUnknownTag>::value;
+
+  static constexpr bool value = IsNumeric;
+};
+
+template <typename T>
+struct CanCountImpl<T, std::true_type>
+{
+  using VTraits = vtkm::VecTraits<T>;
+  using BaseType = typename VTraits::BaseComponentType;
+  static constexpr bool IsBool = std::is_same<BaseType, bool>::value;
+
+  static constexpr bool value = CanCountImpl<BaseType, std::false_type>::value && !IsBool;
+};
+
+} // namespace detail
+
+// Not all types can be counted.
+template <typename T>
+struct CanCount
+{
+  static constexpr bool value = detail::CanCountImpl<T>::value;
+};
+
 template <typename T>
 using StorageTagCountingSuperclass =
  vtkm::cont::StorageTagImplicit<internal::ArrayPortalCounting<T>>;

 template <typename T>
-struct Storage<T, vtkm::cont::StorageTagCounting> : Storage<T, StorageTagCountingSuperclass<T>>
+struct Storage<T, typename std::enable_if<CanCount<T>::value, vtkm::cont::StorageTagCounting>::type>
+  : Storage<T, StorageTagCountingSuperclass<T>>
 {
-  using Superclass = Storage<T, StorageTagCountingSuperclass<T>>;
-  using Superclass::Superclass;
-};
-
-template <typename T, typename Device>
-struct ArrayTransfer<T, vtkm::cont::StorageTagCounting, Device>
-  : ArrayTransfer<T, StorageTagCountingSuperclass<T>, Device>
-{
-  using Superclass = ArrayTransfer<T, StorageTagCountingSuperclass<T>, Device>;
-  using Superclass::Superclass;
 };

 } // namespace internal
@ -126,7 +134,8 @@ public:

  VTKM_CONT
  ArrayHandleCounting(CountingValueType start, CountingValueType step, vtkm::Id length)
-    : Superclass(internal::ArrayPortalCounting<CountingValueType>(start, step, length))
+    : Superclass(internal::PortalToArrayHandleImplicitBuffers(
+        internal::ArrayPortalCounting<CountingValueType>(start, step, length)))
  {
  }
 };
--- a/Show More
+++ b/Show More