Merge branch 'master' into concurrent_union_find

2024-09-08 13:23:51 +00:00 · 2020-08-31 14:34:29 -06:00 · 2020-08-31 14:34:29 -06:00 · e17345a81f
commit e17345a81f
parent 945e9cf328 ede0c179b7
471 changed files with 10758 additions and 5711 deletions
--- a/.clang-format
+++ b/.clang-format
@ -1,17 +1,19 @@
 ---
-# This configuration requires clang-format 3.8 or higher.
+# This configuration requires clang-format 9 or higher.
 BasedOnStyle: Mozilla
 AlignAfterOpenBracket: Align
+AlignEscapedNewlines: true
 AlignOperands: false
-AlwaysBreakAfterReturnType: None
+AllowAllParametersOfDeclarationOnNextLine: false
 AlwaysBreakAfterDefinitionReturnType: None
-BreakBeforeBraces: Allman
+AlwaysBreakAfterReturnType: None
 BinPackArguments: false
 BinPackParameters: false
+BreakBeforeBraces: Allman
 ColumnLimit: 100
+# FixNamespaceComments: true
 MaxEmptyLinesToKeep: 4
-Standard: Cpp11
-# This requires clang-format 4.0 (at least).
-#FixNamespaceComments: true
 ReflowComments: false
+SpaceAfterTemplateKeyword: true
+Standard: Cpp11
 ...
--- a/.gitattributes
+++ b/.gitattributes
@ -1,5 +1,5 @@
 # Attributes used for formatting.
-[attr]our-c-style   whitespace=tab-in-indent  format.clang-format
+[attr]our-c-style   whitespace=tab-in-indent  format.clang-format=9

 *.cxx   our-c-style
 *.h     our-c-style
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@ -49,45 +49,54 @@
    GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci

 .centos7: &centos7
-  image: "kitware/vtkm:ci-centos7_cuda10.2-20200601"
+  image: "kitware/vtkm:ci-centos7_cuda10.2-20200820"
  extends:
    - .docker_image

 .centos8: &centos8
-  image: "kitware/vtkm:ci-centos8-20200601"
+  image: "kitware/vtkm:ci-centos8-20200820"
  extends:
    - .docker_image

 .rhel8: &rhel8
-  image: "kitware/vtkm:ci-rhel8_cuda10.2-20200601"
+  image: "kitware/vtkm:ci-rhel8_cuda10.2-20200820"
  extends:
    - .docker_image

 .ubuntu1604: &ubuntu1604
-  image: "kitware/vtkm:ci-ubuntu1604-20200601"
+  image: "kitware/vtkm:ci-ubuntu1604-20200820"
  extends:
    - .docker_image

 .ubuntu1604_cuda: &ubuntu1604_cuda
-  image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200601"
+  image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200820"
  extends:
    - .docker_image

 .ubuntu1804: &ubuntu1804
-  image: "kitware/vtkm:ci-ubuntu1804-20200601"
+  image: "kitware/vtkm:ci-ubuntu1804-20200820"
  extends:
    - .docker_image

 .ubuntu1804_cuda: &ubuntu1804_cuda
-  image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200601"
+  image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200820"
+  extends:
+    - .docker_image
+
+.ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
+  image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20200820"
  extends:
    - .docker_image

 .ubuntu2004_doxygen: &ubuntu2004_doxygen
-  image: "kitware/vtkm:ci-doxygen-20200601"
+  image: "kitware/vtkm:ci-doxygen-20200820"
  extends:
    - .docker_image

+.ubuntu2004_kokkos: &ubuntu2004_kokkos
+  image: "kitware/vtkm:ci-ubuntu2004_kokkos-20200820"
+  extends:
+    - .docker_image

 .only-default: &only-default
  only:
@ -178,4 +187,5 @@ include:
  - local: '/.gitlab/ci/rhel8.yml'
  - local: '/.gitlab/ci/ubuntu1604.yml'
  - local: '/.gitlab/ci/ubuntu1804.yml'
+  - local: '/.gitlab/ci/ubuntu2004.yml'
  - local: '/.gitlab/ci/windows10.yml'
--- a/.gitlab/ci/centos7.yml
+++ b/.gitlab/ci/centos7.yml
@ -7,6 +7,7 @@ build:centos7_gcc48:
    - vtkm
    - docker
    - linux
+    - cuda-rt
    - large-memory
  extends:
    - .centos7
@ -20,15 +21,17 @@ build:centos7_gcc48:
 test:centos7_gcc48:
  tags:
    - test
-    - cuda-rt
-    - turing
    - vtkm
    - docker
    - linux
+    - cuda-rt
+    - turing
  extends:
    - .centos7
    - .cmake_test_linux
    - .only-default
+  variables:
+      CTEST_EXCLUSIONS: "UnitTestContourTreeUniformAugmentedFilterCUDA|UnitTestContourTreeUniformAugmentedCUDA"
  dependencies:
    - build:centos7_gcc48
  needs:
@ -37,17 +40,17 @@ test:centos7_gcc48:
 test:rhel8_test_centos7:
  tags:
    - test
-    - cuda-rt
-    - turing
    - vtkm
    - docker
    - linux
+    - cuda-rt
+    - turing
  extends:
    - .rhel8
    - .cmake_test_linux
    - .only-default
  variables:
-      CTEST_EXCLUSIONS: "built_against_test_install"
+      CTEST_EXCLUSIONS: "built_against_test_install|UnitTestContourTreeUniformAugmentedFilterCUDA|UnitTestContourTreeUniformAugmentedCUDA"
  dependencies:
    - build:centos7_gcc48
  needs:
--- a/.gitlab/ci/config/initial_config.cmake
+++ b/.gitlab/ci/config/initial_config.cmake
@ -20,7 +20,6 @@ endif ()
 string(REPLACE "+" ";" options "$ENV{VTKM_SETTINGS}")

 foreach(option IN LISTS options)
-
  if(static STREQUAL option)
    set(BUILD_SHARED_LIBS "OFF" CACHE STRING "")

@ -71,6 +70,9 @@ foreach(option IN LISTS options)
  elseif(cuda STREQUAL option)
    set(VTKm_ENABLE_CUDA "ON" CACHE STRING "")

+  elseif(kokkos STREQUAL option)
+    set(VTKm_ENABLE_KOKKOS "ON" CACHE STRING "")
+
  elseif(maxwell STREQUAL option)
    set(VTKm_CUDA_Architecture "maxwell" CACHE STRING "")

@ -95,7 +97,10 @@ find_program(SCCACHE_COMMAND NAMES sccache)
 if(SCCACHE_COMMAND)
  set(CMAKE_C_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
  set(CMAKE_CXX_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
-  if(VTKm_ENABLE_CUDA)
+
+  # Use VTKm_CUDA_Architecture to determine if we need CUDA sccache setup
+  # since this will also capture when kokkos is being used with CUDA backing
+  if(DEFINED VTKm_CUDA_Architecture)
    set(CMAKE_CUDA_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
  endif()
 endif()
--- a/.gitlab/ci/docker/centos7/cuda10.2/Dockerfile
+++ b/.gitlab/ci/docker/centos7/cuda10.2/Dockerfile
@ -1,7 +1,7 @@
 FROM nvidia/cuda:10.2-devel-centos7
 LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"

-RUN yum install cmake make gcc gcc-c++ -y
+RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 -y
 RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
 RUN yum install git git-lfs -y

--- a/.gitlab/ci/docker/rhel8/cuda10.2/Dockerfile
+++ b/.gitlab/ci/docker/rhel8/cuda10.2/Dockerfile
@ -1,7 +1,7 @@
 FROM nvidia/cuda:10.2-devel-ubi8
 LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"

-RUN yum install make gcc gcc-c++ curl -y
+RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 -y
 RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
 RUN yum install git git-lfs -y

--- a/.gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/cuda10.1/Dockerfile
@ -5,6 +5,7 @@ LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
 RUN apt-get update && apt-get install -y --no-install-recommends \
      curl \
      g++ \
+      clang-8 \
      git \
      git-lfs \
      libmpich-dev \
--- a/.gitlab/ci/docker/ubuntu1804/kokkos-cuda/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu1804/kokkos-cuda/Dockerfile
@ -0,0 +1,47 @@
+FROM nvidia/cuda:11.0-devel-ubuntu18.04
+LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      curl \
+      g++ \
+      git \
+      git-lfs \
+      ninja-build \
+      && \
+    rm -rf /var/lib/apt/lists/*
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# kokkos backend requires cmake 3.18
+RUN mkdir /opt/cmake/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.18.1/cmake-3.18.1-Linux-x86_64.sh > cmake-3.18.1-Linux-x86_64.sh && \
+    sh cmake-3.18.1-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.18.1-Linux-x86_64.sh && \
+    ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
+
+ENV PATH "/opt/cmake/bin:${PATH}"
+
+# Build and install Kokkos
+RUN mkdir -p /opt/kokkos/build && \
+    cd /opt/kokkos/build && \
+    curl -L https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz > kokkos-3.1.01.tar.gz && \
+    tar -xf kokkos-3.1.01.tar.gz && \
+    mkdir bld && cd bld && \
+    CXX=/opt/kokkos/build/kokkos-3.1.01/bin/nvcc_wrapper \
+    cmake -B . -S ../kokkos-3.1.01 \
+          -DCMAKE_BUILD_TYPE=Release \
+          -DCMAKE_INSTALL_PREFIX=/opt/kokkos \
+          -DCMAKE_CXX_FLAGS=-fPIC \
+          -DCMAKE_CXX_STANDARD=14 \
+          -DKokkos_ENABLE_CUDA=ON \
+          -DKokkos_ENABLE_CUDA_CONSTEXPR=ON \
+          -DKokkos_ENABLE_CUDA_LAMBDA=ON \
+          -DKokkos_ENABLE_CUDA_LDG_INTRINSIC=ON \
+          -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
+          -DKokkos_ENABLE_CUDA_UVM=ON \
+          -DKokkos_ARCH_TURING75=ON && \
+    cmake --build . -j 8 && \
+    cmake --install .
--- a/.gitlab/ci/docker/ubuntu2004/kokkos/Dockerfile
+++ b/.gitlab/ci/docker/ubuntu2004/kokkos/Dockerfile
@ -0,0 +1,41 @@
+FROM ubuntu:20.04
+LABEL maintainer "Sujin Philip<sujin.philip@kitware.com>"
+
+# Base dependencies for building VTK-m projects
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
+      cmake \
+      curl \
+      g++ \
+      git \
+      git-lfs \
+      libmpich-dev \
+      libomp-dev \
+      mpich \
+      ninja-build \
+      rsync \
+      ssh \
+      software-properties-common
+
+# Need to run git-lfs install manually on ubuntu based images when using the
+# system packaged version
+RUN git-lfs install
+
+# Provide CMake 3.17 so we can re-run tests easily
+# This will be used when we run just the tests
+RUN mkdir /opt/cmake/ && \
+    curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
+    sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
+    rm cmake-3.17.3-Linux-x86_64.sh && \
+    ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
+
+ENV PATH "${PATH}:/opt/cmake/bin"
+
+# Build and install Kokkos
+RUN mkdir -p /opt/kokkos/build && \
+    cd /opt/kokkos/build && \
+    curl -L https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz > kokkos-3.1.01.tar.gz && \
+    tar -xf kokkos-3.1.01.tar.gz && \
+    mkdir bld && cd bld && \
+    cmake -GNinja -DCMAKE_INSTALL_PREFIX=/opt/kokkos -DCMAKE_CXX_FLAGS=-fPIC -DKokkos_ENABLE_SERIAL=ON ../kokkos-3.1.01 &&\
+    ninja all && \
+    ninja install
--- a/.gitlab/ci/docker/update_all.sh
+++ b/.gitlab/ci/docker/update_all.sh
@ -34,10 +34,18 @@ cd ubuntu1804/cuda10.1
 sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda10.1-$date .
 cd ../..

+cd ubuntu1804/kokkos-cuda
+sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-$date .
+cd ../..
+
 cd ubuntu2004/doxygen/
 sudo docker build -t kitware/vtkm:ci-doxygen-$date .
 cd ../..

+cd ubuntu2004/kokkos
+sudo docker build -t kitware/vtkm:ci-ubuntu2004_kokkos-$date .
+cd ../..
+
 # sudo docker login --username=<docker_hub_name>
 sudo docker push kitware/vtkm
 sudo docker system prune
--- a/.gitlab/ci/ubuntu1604.yml
+++ b/.gitlab/ci/ubuntu1604.yml
@ -7,6 +7,7 @@ build:ubuntu1604_gcc5:
    - vtkm
    - docker
    - linux
+    - cuda-rt
    - large-memory
  extends:
    - .ubuntu1604_cuda
@ -18,39 +19,22 @@ build:ubuntu1604_gcc5:
    CMAKE_BUILD_TYPE: RelWithDebInfo
    VTKM_SETTINGS: "cuda+pascal"

-# Temporarily disabled as we don't have a pascal hw gitlab-runner
-# test:ubuntu1604_gcc5:
-#   tags:
-#     - test
-#     - cuda-rt
-#     - pascal
-#     - vtkm
-#     - docker
-#     - linux
-#   extends:
-#     - .ubuntu1604_cuda
-#     - .cmake_test_linux
-#     - .only-default
-#   dependencies:
-#     - build:ubuntu1604_gcc5
-#   needs:
-#     - build:ubuntu1604_gcc5
-# test:ubuntu1804_test_ubuntu1604_gcc5:
-#   tags:
-#     - test
-#     - cuda-rt
-#     - pascal
-#     - vtkm
-#     - docker
-#     - linux
-#   extends:
-#     - .ubuntu1804_cuda
-#     - .cmake_test_linux
-#     - .only-default
-#   dependencies:
-#     - build:ubuntu1604_gcc5
-#   needs:
-#     - build:ubuntu1604_gcc5
+test:ubuntu1604_gcc5:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - pascal
+  extends:
+    - .ubuntu1604_cuda
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1604_gcc5
+  needs:
+    - build:ubuntu1604_gcc5

 # Build on ubuntu1704 with OpenMP + CUDA
 # Runs only on nightlies
@ -60,6 +44,7 @@ build:ubuntu1604_gcc5_2:
    - vtkm
    - docker
    - linux
+    - cuda-rt
    - large-memory
  extends:
    - .ubuntu1604_cuda
@ -71,6 +56,25 @@ build:ubuntu1604_gcc5_2:
    CMAKE_BUILD_TYPE: Release
    VTKM_SETTINGS: "openmp+cuda+pascal+examples"

+test:ubuntu1804_test_ubuntu1604_gcc5_2:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - pascal
+  extends:
+    - .ubuntu1804_cuda
+    - .cmake_test_linux
+    - .only-master
+  variables:
+      CTEST_EXCLUSIONS: "built_against_test_install"
+  dependencies:
+    - build:ubuntu1604_gcc5_2
+  needs:
+    - build:ubuntu1604_gcc5_2
+
 # Build on ubuntu1604 with mpi + tbb and test on ubuntu1604
 # Uses gcc 4.8
 # Uses OpenMPI
--- a/.gitlab/ci/ubuntu1804.yml
+++ b/.gitlab/ci/ubuntu1804.yml
@ -46,6 +46,7 @@ build:ubuntu1804_gcc7:
    - vtkm
    - docker
    - linux
+    - cuda-rt
    - large-memory
  extends:
    - .ubuntu1804_cuda
@ -59,11 +60,11 @@ build:ubuntu1804_gcc7:
 test:ubuntu1804_gcc7:
  tags:
    - test
-    - cuda-rt
-    - turing
    - vtkm
    - docker
    - linux
+    - cuda-rt
+    - turing
  extends:
    - .ubuntu1804_cuda
    - .cmake_test_linux
@ -74,42 +75,45 @@ test:ubuntu1804_gcc7:
    - build:ubuntu1804_gcc7


-# Build on ubuntu1804 with OpenMP and test on ubuntu1804
-# Uses gcc 7.4
+# Build on ubuntu1804 with CUDA+TBB and test on ubuntu1804
+# Uses clang as CUDA host compiler
 # Runs only on nightlies
-build:ubuntu1804_gcc7_2:
+build:ubuntu1804_clang_cuda:
  tags:
    - build
    - vtkm
    - docker
    - linux
+    - cuda-rt
+    - large-memory
  extends:
-    - .ubuntu1804
+    - .ubuntu1804_cuda
    - .cmake_build_linux
-    - .only-master
+    - .only-default
+    # - .only-master
  variables:
-    CC: "gcc-7"
-    CXX: "g++-7"
-    VTKM_SETTINGS: "openmp+shared+examples"
+    CC: "clang-8"
+    CXX: "clang++-8"
+    CUDAHOSTCXX: "clang++-8"
+    VTKM_SETTINGS: "cuda+pascal+tbb+static+examples"

-test:ubuntu1804_gcc7_2:
+test:ubuntu1804_clang_cuda:
  tags:
    - test
    - vtkm
    - docker
    - linux
+    - cuda-rt
+    - pascal
  extends:
-    - .ubuntu1804
+    - .ubuntu1804_cuda
    - .cmake_test_linux
-    - .only-master
-  variables:
-    #Restrict OpenMP number of threads since multiple test stages
-    #execute on the same hardware concurrently
-    OMP_NUM_THREADS: 4
+    - .only-default
+    # - .only-master
  dependencies:
-    - build:ubuntu1804_gcc7_2
+    - build:ubuntu1804_clang_cuda
  needs:
-    - build:ubuntu1804_gcc7_2
+    - build:ubuntu1804_clang_cuda

 # Build on ubuntu1804 with OpenMP and test on ubuntu1804
 # Uses gcc 6.5
@ -179,3 +183,41 @@ test:ubuntu1804_clang8:
    - build:ubuntu1804_clang8
  needs:
    - build:ubuntu1804_clang8
+
+# Build on ubuntu1804 with kokkos and test on ubuntu1804
+# Uses CUDA 11
+build:ubuntu1804_kokkos:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - large-memory
+  extends:
+    - .ubuntu1804_cuda_kokkos
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_GENERATOR: "Ninja"
+    CMAKE_BUILD_TYPE: Release
+    VTKM_SETTINGS: "kokkos+turing+static+64bit_floats"
+
+test:ubuntu1804_kokkos:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+    - cuda-rt
+    - turing
+  extends:
+    - .ubuntu1804_cuda_kokkos
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu1804_kokkos
+  needs:
+    - build:ubuntu1804_kokkos
+  variables:
+    CUDA_LAUNCH_BLOCKING: "1"
--- a/.gitlab/ci/ubuntu2004.yml
+++ b/.gitlab/ci/ubuntu2004.yml
@ -0,0 +1,28 @@
+build:ubuntu2004_kokkos:
+  tags:
+    - build
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu2004_kokkos
+    - .cmake_build_linux
+    - .only-default
+  variables:
+    CMAKE_BUILD_TYPE: RelWithDebInfo
+    VTKM_SETTINGS: "kokkos+shared+64bit_floats"
+
+test:ubuntu2004_kokkos:
+  tags:
+    - test
+    - vtkm
+    - docker
+    - linux
+  extends:
+    - .ubuntu2004_kokkos
+    - .cmake_test_linux
+    - .only-default
+  dependencies:
+    - build:ubuntu2004_kokkos
+  needs:
+    - build:ubuntu2004_kokkos
--- a/CMake/VTKmCompilerFlags.cmake
+++ b/CMake/VTKmCompilerFlags.cmake
@ -22,6 +22,8 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
  set(VTKM_COMPILER_IS_CLANG 1)
 elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  set(VTKM_COMPILER_IS_GNU 1)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "XLClang")
+  set(VTKM_COMPILER_IS_XL 1)
 endif()

 #-----------------------------------------------------------------------------
@ -51,7 +53,7 @@ if(VTKM_COMPILER_IS_MSVC)
  if(TARGET vtkm::cuda)
    target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler="/Gy">)
  endif()
-elseif(NOT VTKM_COMPILER_IS_PGI) #can't find an equivalant PGI flag
+elseif(NOT (VTKM_COMPILER_IS_PGI OR VTKM_COMPILER_IS_XL)) #can't find an equivalant PGI/XL flag
  target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-ffunction-sections>)
  if(TARGET vtkm::cuda)
    target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-ffunction-sections>)
@ -122,8 +124,8 @@ elseif(VTKM_COMPILER_IS_ICC)
  target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-wd1478 -wd13379>)

 elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
-  set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common)
-  set(cuda_flags -Xcompiler=-Wall,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-local-typedefs,-Wno-unused-function,-Wcast-align,-Wchar-subscripts,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-Wunused,-fno-common)
+  set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common -Wno-unused-function)
+  set(cuda_flags -Xcompiler=-Wall,-Wcast-align,-Wchar-subscripts,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-fno-common,-Wunused,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-function)

  #Only add float-conversion warnings for gcc as the integer warnigns in GCC
  #include the implicit casting of all types smaller than int to ints.
@ -161,17 +163,21 @@ elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
  endif()
 endif()

-#common warnings for all platforms when building cuda
-if(TARGET vtkm::cuda)
+function(setup_cuda_flags)
  if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
    #nvcc 9 introduced specific controls to disable the stack size warning
    #otherwise we let the warning occur. We have to set this in CMAKE_CUDA_FLAGS
    #as it is passed to the device link step, unlike compile_options
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink=--suppress-stack-size-warning")
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink=--suppress-stack-size-warning" PARENT_SCOPE)
  endif()

  set(display_error_nums -Xcudafe=--display_error_number)
  target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:${display_error_nums}>)
+endfunction()
+
+#common warnings for all platforms when building cuda
+if ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
+  setup_cuda_flags()
 endif()

 if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
--- a/CMake/VTKmConfig.cmake.in
+++ b/CMake/VTKmConfig.cmake.in
@ -39,6 +39,7 @@
 #  VTKm_ENABLE_CUDA           Will be enabled if VTK-m was built with CUDA support
 #  VTKm_ENABLE_TBB            Will be enabled if VTK-m was built with TBB support
 #  VTKm_ENABLE_OPENMP         Will be enabled if VTK-m was built with OpenMP support
+#  VTKm_ENABLE_KOKKOS         Will be enabled if VTK-m was built with Kokkos support
 #  VTKm_ENABLE_LOGGING        Will be enabled if VTK-m was built with logging support
 #  VTKm_ENABLE_MPI            Will be enabled if VTK-m was built with MPI support
 #  VTKm_ENABLE_RENDERING      Will be enabled if VTK-m was built with rendering support
@ -69,6 +70,7 @@ set(VTKm_BUILD_SHARED_LIBS "@VTKm_BUILD_SHARED_LIBS@")
 set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@")
 set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
 set(VTKm_ENABLE_OPENMP "@VTKm_ENABLE_OPENMP@")
+set(VTKm_ENABLE_KOKKOS "@VTKm_ENABLE_KOKKOS@")
 set(VTKm_ENABLE_LOGGING "@VTKm_ENABLE_LOGGING@")
 set(VTKm_ENABLE_RENDERING "@VTKm_ENABLE_RENDERING@")
 set(VTKm_ENABLE_GL_CONTEXT "@VTKm_ENABLE_GL_CONTEXT@")
@ -101,6 +103,12 @@ endif()
 if(VTKm_ENABLE_CUDA AND VTKM_FROM_INSTALL_DIR)
  set_target_properties(vtkm::cuda PROPERTIES cuda_architecture_flags "@VTKm_CUDA_Architecture_Flags@")
  set_target_properties(vtkm::cuda PROPERTIES requires_static_builds TRUE)
+
+  # If VTK-m is built with 3.18+ and the consumer is < 3.18 we need to drop
+  # these properties as they break the VTK-m cuda flag logic
+  if(CMAKE_VERSION VERSION_LESS 3.18)
+    set_target_properties(vtkm::cuda PROPERTIES INTERFACE_LINK_OPTIONS "")
+  endif()
 endif()

 # VTKm requires some CMake Find modules not included with CMake, so
--- a/CMake/VTKmDeviceAdapters.cmake
+++ b/CMake/VTKmDeviceAdapters.cmake
@ -127,10 +127,13 @@ if(VTKm_ENABLE_CUDA)
      requires_static_builds TRUE
    )

+    target_compile_options(vtkm_cuda INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)

-    set_target_properties(vtkm_cuda PROPERTIES
-      INTERFACE_COMPILE_OPTIONS $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
-    )
+    if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND
+      CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
+      # CUDA 11+ deprecated C++11 support
+      target_compile_features(vtkm_cuda INTERFACE cxx_std_14)
+    endif()

    # add the -gencode flags so that all cuda code
    # way compiled properly
@ -241,13 +244,103 @@ if(VTKm_ENABLE_CUDA)
    endif()

    string(REPLACE ";" " " arch_flags "${arch_flags}")
-    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
+    if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
+      #We propagate cuda flags via target* options so that they
+      #export cleanly
+      set(CMAKE_CUDA_ARCHITECTURES OFF)
+      target_compile_options(vtkm_cuda INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:${arch_flags}>)
+      target_link_options(vtkm_cuda INTERFACE $<DEVICE_LINK:${arch_flags}>)
+    else()
+      # Before 3.18 we had to use CMAKE_CUDA_FLAGS as we had no way
+      # to propagate flags to the device link step
+      set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
+    endif()

    # This needs to be lower-case for the property to be properly exported
    # CMake 3.15 we can add `cuda_architecture_flags` to the EXPORT_PROPERTIES
    # target property to have this automatically exported for us
-    set_target_properties(vtkm_cuda PROPERTIES cuda_architecture_flags "${arch_flags}")
    set(VTKm_CUDA_Architecture_Flags "${arch_flags}")
+    set_target_properties(vtkm_cuda PROPERTIES cuda_architecture_flags "${arch_flags}")
+    unset(arch_flags)
+  endif()
+endif()
+
+#-----------------------------------------------------------------------------
+# Kokkos with its Cuda backend enabled, expects everything to be compiled using its
+# `nvcc-wrapper` as the CXX compiler. As the name suggests, nvcc-wrapper is a wrapper around
+# Cuda's nvcc compiler. Kokkos targets have all of the flags meant for the nvcc compiler set as the
+# CXX compiler flags. This function changes all such flags to be CUDA flags so that we can use
+# CMake and vtk-m's existing infrastructure to compile for Cuda and Host separately. Without this
+# all of the files will be compiled using nvcc which can be very time consuming. It can also have
+# issues with calling host functions from device functions when compiling code for other backends.
+function(kokkos_fix_compile_options)
+  set(targets Kokkos::kokkos)
+  set(seen_targets)
+  set(cuda_arch)
+
+  while(targets)
+    list(GET targets 0 target_name)
+    list(REMOVE_AT targets 0)
+
+    get_target_property(link_libraries ${target_name} INTERFACE_LINK_LIBRARIES)
+    foreach(lib_target IN LISTS link_libraries)
+      if (TARGET ${lib_target})
+        if (lib_target IN_LIST seen_targets)
+          continue()
+        endif()
+
+        list(APPEND seen_targets ${lib_target})
+        list(APPEND targets ${lib_target})
+        get_target_property(compile_options ${lib_target} INTERFACE_COMPILE_OPTIONS)
+        if (compile_options)
+          string(REGEX MATCH "[$]<[$]<COMPILE_LANGUAGE:CXX>:-Xcompiler;.*>" cxx_compile_options "${compile_options}")
+          string(REGEX MATCH "-arch=sm_[0-9][0-9]" cuda_arch "${compile_options}")
+          string(REPLACE "-Xcompiler;" "" cxx_compile_options "${cxx_compile_options}")
+          list(TRANSFORM compile_options REPLACE "--relocatable-device-code=true" "") #We use CMake for this flag
+          list(TRANSFORM compile_options REPLACE "COMPILE_LANGUAGE:CXX" "COMPILE_LANGUAGE:CUDA")
+          list(APPEND compile_options "${cxx_compile_options}")
+          set_property(TARGET ${lib_target} PROPERTY INTERFACE_COMPILE_OPTIONS ${compile_options})
+        endif()
+
+        set_property(TARGET ${lib_target} PROPERTY INTERFACE_LINK_OPTIONS "")
+      endif()
+    endforeach()
+  endwhile()
+
+  set_property(TARGET vtkm::kokkos PROPERTY INTERFACE_LINK_OPTIONS "$<DEVICE_LINK:${cuda_arch}>")
+  if (OPENMP IN_LIST Kokkos_DEVICES)
+    set_property(TARGET vtkm::kokkos PROPERTY INTERFACE_LINK_OPTIONS "$<HOST_LINK:-fopenmp>")
+  endif()
+endfunction()
+
+if(VTKm_ENABLE_KOKKOS AND NOT TARGET vtkm::kokkos)
+  cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
+
+  find_package(Kokkos REQUIRED)
+  if (CUDA IN_LIST Kokkos_DEVICES)
+    cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
+    enable_language(CUDA)
+
+    if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND
+       CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.0" AND
+       CMAKE_BUILD_TYPE STREQUAL "Release")
+      message(WARNING "There is a known issue with Cuda 10 and -O3 optimization. Switching to -O2. Please refer to issue #555.")
+      string(REPLACE "-O3" "-O2" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
+      string(REPLACE "-O3" "-O2" CMAKE_CUDA_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
+    endif()
+
+    string(REGEX MATCH "[0-9][0-9]$" cuda_arch ${Kokkos_ARCH})
+    set(CMAKE_CUDA_ARCHITECTURES ${cuda_arch})
+    message(STATUS "Detected Cuda arch from Kokkos: ${cuda_arch}")
+
+    add_library(vtkm::kokkos_cuda INTERFACE IMPORTED GLOBAL)
+  endif()
+
+  add_library(vtkm::kokkos INTERFACE IMPORTED GLOBAL)
+  set_target_properties(vtkm::kokkos PROPERTIES INTERFACE_LINK_LIBRARIES "Kokkos::kokkos")
+
+  if (TARGET vtkm::kokkos_cuda)
+    kokkos_fix_compile_options()
  endif()
 endif()

--- a/CMake/VTKmWrappers.cmake
+++ b/CMake/VTKmWrappers.cmake
@ -66,7 +66,7 @@ function(vtkm_generate_export_header lib_name)

  # Now generate a header that holds the macros needed to easily export
  # template classes. This
-  string(TOUPPER ${kit_name} BASE_NAME_UPPER)
+  string(TOUPPER ${lib_name} BASE_NAME_UPPER)
  set(EXPORT_MACRO_NAME "${BASE_NAME_UPPER}")

  set(EXPORT_IS_BUILT_STATIC 0)
@ -81,17 +81,17 @@ function(vtkm_generate_export_header lib_name)
  if(NOT EXPORT_IMPORT_CONDITION)
    #set EXPORT_IMPORT_CONDITION to what the DEFINE_SYMBOL would be when
    #building shared
-    set(EXPORT_IMPORT_CONDITION ${kit_name}_EXPORTS)
+    set(EXPORT_IMPORT_CONDITION ${lib_name}_EXPORTS)
  endif()


  configure_file(
      ${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
-      ${VTKm_BINARY_DIR}/include/${dir_prefix}/${kit_name}_export.h
+      ${VTKm_BINARY_DIR}/include/${dir_prefix}/${lib_name}_export.h
    @ONLY)

  if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
-    install(FILES ${VTKm_BINARY_DIR}/include/${dir_prefix}/${kit_name}_export.h
+    install(FILES ${VTKm_BINARY_DIR}/include/${dir_prefix}/${lib_name}_export.h
      DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/${dir_prefix}
      )
  endif()
@ -150,9 +150,14 @@ endfunction()
 # Pass to consumers extra compile flags they need to add to CMAKE_CUDA_FLAGS
 # to have CUDA compatibility.
 #
-# This is required as currently the -sm/-gencode flags when specified inside
-# COMPILE_OPTIONS / target_compile_options are not propagated to the device
-# linker. Instead they must be specified in CMAKE_CUDA_FLAGS
+# If VTK-m was built with CMake 3.18+ and you are using CMake 3.18+ and have
+# a cmake_minimum_required of 3.18 or have set policy CMP0105 to new, this will
+# return an empty string as the `vtkm::cuda` target will correctly propagate
+# all the necessary flags.
+#
+# This is required for CMake < 3.18 as they don't support the `$<DEVICE_LINK>`
+# generator expression for `target_link_options`. Instead they need to be
+# specified in CMAKE_CUDA_FLAGS
 #
 #
 # add_library(lib_that_uses_vtkm ...)
@ -160,7 +165,18 @@ endfunction()
 # target_link_libraries(lib_that_uses_vtkm PRIVATE vtkm_filter)
 #
 function(vtkm_get_cuda_flags settings_var)
+
  if(TARGET vtkm::cuda)
+    if(POLICY CMP0105)
+      cmake_policy(GET CMP0105 does_device_link)
+      get_property(arch_flags
+        TARGET vtkm::cuda
+        PROPERTY INTERFACE_LINK_OPTIONS)
+      if(arch_flags AND CMP0105 STREQUAL "NEW")
+        return()
+      endif()
+    endif()
+
    get_property(arch_flags
      TARGET    vtkm::cuda
      PROPERTY  cuda_architecture_flags)
@ -236,8 +252,14 @@ endfunction()
 #
 #
 #  MODIFY_CUDA_FLAGS: If enabled will add the required -arch=<ver> flags
-#  that VTK-m was compiled with. If you have multiple libraries that use
-#  VTK-m calling `vtkm_add_target_information` multiple times with
+#  that VTK-m was compiled with.
+#
+#  If VTK-m was built with CMake 3.18+ and you are using CMake 3.18+ and have
+#  a cmake_minimum_required of 3.18 or have set policy CMP0105 to new, this will
+#  return an empty string as the `vtkm::cuda` target will correctly propagate
+#  all the necessary flags.
+#
+#  Note: calling `vtkm_add_target_information` multiple times with
 #  `MODIFY_CUDA_FLAGS` will cause duplicate compiler flags. To resolve this issue
 #  you can; pass all targets and sources to a single `vtkm_add_target_information`
 #  call, have the first one use `MODIFY_CUDA_FLAGS`, or use the provided
@ -279,10 +301,11 @@ function(vtkm_add_target_information uses_vtkm_target)
    ${ARGN}
    )

-
  if(VTKm_TI_MODIFY_CUDA_FLAGS)
-    vtkm_get_cuda_flags(CMAKE_CUDA_FLAGS)
-    set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} PARENT_SCOPE)
+    vtkm_get_cuda_flags(cuda_flags)
+    if(cuda_flags)
+      set(CMAKE_CUDA_FLAGS ${cuda_flags} PARENT_SCOPE)
+    endif()
  endif()

  set(targets ${uses_vtkm_target})
@ -295,6 +318,8 @@ function(vtkm_add_target_information uses_vtkm_target)
  # set the required target properties
  set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
  set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+  # CUDA_ARCHITECTURES added in CMake 3.18
+  set_target_properties(${targets} PROPERTIES CUDA_ARCHITECTURES OFF)

  if(VTKm_TI_DROP_UNUSED_SYMBOLS)
    foreach(target IN LISTS targets)
@ -309,11 +334,16 @@ function(vtkm_add_target_information uses_vtkm_target)
  #
  # This is required as CUDA currently doesn't support device side calls across
  # dynamic library boundaries.
-  if(TARGET vtkm::cuda)
+  if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
    set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "CUDA")
    foreach(target IN LISTS targets)
      get_target_property(lib_type ${target} TYPE)
-      get_target_property(requires_static vtkm::cuda requires_static_builds)
+      if (TARGET vtkm::cuda)
+        get_target_property(requires_static vtkm::cuda requires_static_builds)
+      endif()
+      if (TARGET vtkm::kokkos)
+        get_target_property(requires_static vtkm::kokkos requires_static_builds)
+      endif()

      if(requires_static AND ${lib_type} STREQUAL "SHARED_LIBRARY" AND VTKm_TI_EXTENDS_VTKM)
        #We provide different error messages based on if we are building VTK-m
--- a/CMake/testing/VTKmCheckPyexpander.cmake
+++ b/CMake/testing/VTKmCheckPyexpander.cmake
@ -37,11 +37,19 @@ if(NOT GENERATED_FILE)
  return()
 endif()

-execute_process(
-  COMMAND ${PYTHON_EXECUTABLE} ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
-  RESULT_VARIABLE pyexpander_result
-  OUTPUT_VARIABLE pyexpander_output
+if(MSVC)
+  execute_process(
+    COMMAND ${PYTHON_EXECUTABLE} ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
+    RESULT_VARIABLE pyexpander_result
+    OUTPUT_VARIABLE pyexpander_output
  )
+else()
+  execute_process(
+    COMMAND ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
+    RESULT_VARIABLE pyexpander_result
+    OUTPUT_VARIABLE pyexpander_output
+  )
+endif()

 if(pyexpander_result)
  # If pyexpander returned non-zero, it failed.
--- a/CMake/testing/VTKmTestInstall.cmake
+++ b/CMake/testing/VTKmTestInstall.cmake
@ -110,6 +110,10 @@ function(vtkm_test_against_install dir)
    )
  endif()

+  if(TARGET vtkm::kokkos)
+    list(APPEND args "-DKokkos_DIR=${Kokkos_DIR}")
+  endif()
+
  #determine if the test is expected to compile or fail to build. We use
  #this information to built the test name to make it clear to the user
  #what a 'passing' test means
--- a/CMake/testing/VTKmTestWrappers.cmake
+++ b/CMake/testing/VTKmTestWrappers.cmake
@ -27,7 +27,6 @@ function(vtkm_create_test_executable
  # for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
  if (is_mpi_test)
    set(extraArgs EXTRA_INCLUDE "vtkm/thirdparty/diy/environment.h")
-    set(CMAKE_TESTDRIVER_BEFORE_TESTMAIN "vtkmdiy::mpi::environment env(ac, av);")

    if (use_mpi)
      vtkm_diy_use_mpi(ON)
@ -50,7 +49,7 @@ function(vtkm_create_test_executable

  #if all backends are enabled, we can use cuda compiler to handle all possible backends.
  set(device_sources)
-  if(TARGET vtkm::cuda AND enable_all_backends)
+  if(((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda)) AND enable_all_backends)
    set(device_sources ${sources})
  endif()
  vtkm_add_target_information(${prog} DEVICE_SOURCES ${device_sources})
@ -153,6 +152,13 @@ function(vtkm_unit_tests)
      #serially
      list(APPEND per_device_serial TRUE)
    endif()
+    if (VTKm_ENABLE_KOKKOS)
+      list(APPEND per_device_command_line_arguments --device=kokkos)
+      list(APPEND per_device_suffix "KOKKOS")
+      #may require more time because of kernel generation.
+      list(APPEND per_device_timeout 1500)
+      list(APPEND per_device_serial FALSE)
+    endif()
  endif()

  set(test_prog)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -8,16 +8,10 @@
 ##  PURPOSE.  See the above copyright notice for more information.
 ##============================================================================

-# If you want CUDA support, you will need to have CMake 3.9 on Linux/OSX.
-# We require CMake 3.11 with the MSVC generator as the $<COMPILE_LANGUAGE:>
-# generator expression is not supported on older versions.
+# If you want CUDA support, you will need to have CMake 3.13 on Linux/OSX.
 cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
 project (VTKm)

-if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
-  cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
-endif()
-
 # Update module path
 set(VTKm_CMAKE_MODULE_PATH ${VTKm_SOURCE_DIR}/CMake)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${VTKm_CMAKE_MODULE_PATH})
@ -81,6 +75,7 @@ endmacro ()
 vtkm_option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
 vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
 vtkm_option(VTKm_ENABLE_OPENMP "Enable OpenMP support" OFF)
+vtkm_option(VTKm_ENABLE_KOKKOS "Enable Kokkos support" OFF)
 vtkm_option(VTKm_ENABLE_RENDERING "Enable rendering library" ON)
 vtkm_option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
 vtkm_option(VTKm_ENABLE_MPI "Enable MPI support" OFF)
--- a/benchmarking/BenchmarkFilters.cxx
+++ b/benchmarking/BenchmarkFilters.cxx
@ -165,8 +165,8 @@ void BenchGradient(::benchmark::State& state, int options)
  }
 }

-#define VTKM_PRIVATE_GRADIENT_BENCHMARK(Name, Opts)                                                \
-  void BenchGradient##Name(::benchmark::State& state) { BenchGradient(state, Opts); }              \
+#define VTKM_PRIVATE_GRADIENT_BENCHMARK(Name, Opts)                                   \
+  void BenchGradient##Name(::benchmark::State& state) { BenchGradient(state, Opts); } \
  VTKM_BENCHMARK(BenchGradient##Name)

 VTKM_PRIVATE_GRADIENT_BENCHMARK(Scalar, Gradient | ScalarInput);
--- a/benchmarking/BenchmarkODEIntegrators.cxx
+++ b/benchmarking/BenchmarkODEIntegrators.cxx
@ -0,0 +1,97 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+
+#include "Benchmarker.h"
+
+#include <vtkm/cont/DataSet.h>
+#include <vtkm/cont/DataSetBuilderUniform.h>
+#include <vtkm/cont/ErrorInternal.h>
+#include <vtkm/cont/Logging.h>
+#include <vtkm/cont/RuntimeDeviceTracker.h>
+#include <vtkm/cont/Timer.h>
+#include <vtkm/cont/internal/OptionParser.h>
+#include <vtkm/filter/ParticleAdvection.h>
+#include <vtkm/worklet/particleadvection/EulerIntegrator.h>
+#include <vtkm/worklet/particleadvection/RK4Integrator.h>
+#ifdef VTKM_ENABLE_TBB
+#include <tbb/task_scheduler_init.h>
+#endif
+#ifdef VTKM_ENABLE_OPENMP
+#include <omp.h>
+#endif
+
+
+namespace
+{
+// Hold configuration state (e.g. active device):
+vtkm::cont::InitializeResult Config;
+
+// Wrapper around RK4:
+void BenchParticleAdvection(::benchmark::State& state)
+{
+  const vtkm::cont::DeviceAdapterId device = Config.Device;
+  const vtkm::Id3 dims(5, 5, 5);
+  const vtkm::Vec3f vecX(1, 0, 0);
+
+  vtkm::Id numPoints = dims[0] * dims[1] * dims[2];
+
+  std::vector<vtkm::Vec3f> vectorField(static_cast<std::size_t>(numPoints));
+  for (std::size_t i = 0; i < static_cast<std::size_t>(numPoints); i++)
+    vectorField[i] = vecX;
+
+  vtkm::cont::DataSetBuilderUniform dataSetBuilder;
+
+  vtkm::cont::DataSet ds = dataSetBuilder.Create(dims);
+  ds.AddPointField("vector", vectorField);
+
+  vtkm::cont::ArrayHandle<vtkm::Particle> seedArray =
+    vtkm::cont::make_ArrayHandle({ vtkm::Particle(vtkm::Vec3f(.2f, 1.0f, .2f), 0),
+                                   vtkm::Particle(vtkm::Vec3f(.2f, 2.0f, .2f), 1),
+                                   vtkm::Particle(vtkm::Vec3f(.2f, 3.0f, .2f), 2) });
+
+  vtkm::filter::ParticleAdvection particleAdvection;
+
+  particleAdvection.SetStepSize(vtkm::FloatDefault(1) / state.range(0));
+  particleAdvection.SetNumberOfSteps(static_cast<vtkm::Id>(state.range(0)));
+  particleAdvection.SetSeeds(seedArray);
+  particleAdvection.SetActiveField("vector");
+  vtkm::cont::Timer timer{ device };
+  for (auto _ : state)
+  {
+    (void)_;
+    timer.Start();
+    auto output = particleAdvection.Execute(ds);
+    ::benchmark::DoNotOptimize(output);
+    timer.Stop();
+
+    state.SetIterationTime(timer.GetElapsedTime());
+  }
+  state.SetComplexityN(state.range(0));
+}
+VTKM_BENCHMARK_OPTS(BenchParticleAdvection,
+                      ->RangeMultiplier(2)
+                      ->Range(32, 4096)
+                      ->ArgName("Steps")
+                      ->Complexity());
+
+} // end anon namespace
+
+int main(int argc, char* argv[])
+{
+  auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
+  std::vector<char*> args(argv, argv + argc);
+  vtkm::bench::detail::InitializeArgs(&argc, args, opts);
+  Config = vtkm::cont::Initialize(argc, args.data(), opts);
+  if (opts != vtkm::cont::InitializeOptions::None)
+  {
+    vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
+  }
+  VTKM_EXECUTE_BENCHMARKS(argc, args.data());
+}
--- a/benchmarking/Benchmarker.h
+++ b/benchmarking/Benchmarker.h
@ -170,7 +170,7 @@
 /// and modified using the passed arguments; see the Google Benchmark documentation
 /// for more details. The `preamble` string may be used to supply additional
 /// information that will be appended to the output's preamble.
-#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble)                                     \
+#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
  vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)

 /// \def VTKM_BENCHMARK(BenchFunc)
@ -181,7 +181,7 @@
 /// ```
 /// void BenchFunc(::benchmark::State& state)
 /// ```
-#define VTKM_BENCHMARK(BenchFunc)                                                                  \
+#define VTKM_BENCHMARK(BenchFunc) \
  BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)

 /// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
@ -196,7 +196,7 @@
 /// Note the similarity to the raw Google Benchmark usage of
 /// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
 /// the Google Benchmark documentation for more details on the available options.
-#define VTKM_BENCHMARK_OPTS(BenchFunc, options)                                                    \
+#define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
  BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options

 /// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
@ -211,7 +211,7 @@
 /// ```
 ///
 /// See the Google Benchmark documentation for more details on the available options.
-#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor)                                              \
+#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
  BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)

 /// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
@ -224,7 +224,7 @@
 /// template <typename T>
 /// void BenchFunc(::benchmark::State& state)
 /// ```
-#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)                                              \
+#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
  VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)

 /// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
@ -237,10 +237,10 @@
 ///                                ->ArgName("MyParam")->Range(32, 1024*1024),
 ///                              vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
 /// ```
-#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList)                                \
-  VTKM_BENCHMARK_TEMPLATES_APPLY(                                                                  \
-    BenchFunc,                                                                                     \
-    [](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); },       \
+#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList)                          \
+  VTKM_BENCHMARK_TEMPLATES_APPLY(                                                            \
+    BenchFunc,                                                                               \
+    [](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
    TypeList)

 /// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
@ -255,22 +255,22 @@
 /// ```
 ///
 /// See the Google Benchmark documentation for more details on the available options.
-#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList)                                                                                                             \
-  namespace                                                                                                                                                                           \
+#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList)                            \
+  namespace                                                                                          \
  { /* A template function cannot be used as a template parameter, so wrap the function with       \
     * a template struct to get it into the GenerateTemplateBenchmarks class. */ \
-  template <typename... Ts>                                                                                                                                                           \
-  struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)                                                                                                                                       \
-  {                                                                                                                                                                                   \
-    static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; }                                                                                                \
-  };                                                                                                                                                                                  \
-  } /* end anon namespace */                                                                                                                                                          \
-  int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks<                                                                                            \
-    brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>,                                                                                                                            \
+  template <typename... Ts>                                                                          \
+  struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)                                                      \
+  {                                                                                                  \
+    static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; }               \
+  };                                                                                                 \
+  } /* end anon namespace */                                                                         \
+  int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks<           \
+    brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>,                                           \
    TypeList>::Register(#BenchFunc, ApplyFunctor)

 // Internal use only:
-#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)                                                     \
+#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
  BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)

 namespace vtkm
@ -280,9 +280,7 @@ namespace bench
 namespace detail
 {

-static inline void NullApply(::benchmark::internal::Benchmark*)
-{
-}
+static inline void NullApply(::benchmark::internal::Benchmark*) {}

 /// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
 /// instead.
--- a/benchmarking/CMakeLists.txt
+++ b/benchmarking/CMakeLists.txt
@ -44,6 +44,7 @@ set(benchmarks
  BenchmarkDeviceAdapter
  BenchmarkFieldAlgorithms
  BenchmarkFilters
+  BenchmarkODEIntegrators
  BenchmarkTopologyAlgorithms
  )

--- a/data/data/curvilinear/simple_structured_ascii.vtk
+++ b/data/data/curvilinear/simple_structured_ascii.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f85560cc05688d09c21b22e91c14cec22deecb3c51dc364d82cc9fd460c6ab6
+size 328
--- a/data/data/curvilinear/simple_structured_bin.vtk
+++ b/data/data/curvilinear/simple_structured_bin.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a47045b1ae5539ef0125273ee9c50a9a6e809f78411f6a850ac34e6fa43189bb
+size 535
--- a/data/data/rectilinear/simple_rectilinear1_ascii.vtk
+++ b/data/data/rectilinear/simple_rectilinear1_ascii.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:752021630d25aff8dfd00064badd452896be70bc8b2f94b008900b4fc70d4dd5
+size 1811
--- a/data/data/rectilinear/simple_rectilinear2_ascii.vtk
+++ b/data/data/rectilinear/simple_rectilinear2_ascii.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d1dbb4c28f1c829769ad3e03fc58f667935d8a461d3515036d5d98f5e3841cb
+size 395
--- a/data/data/uniform/simple_structured_points_ascii.vtk
+++ b/data/data/uniform/simple_structured_points_ascii.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1860e747d7f460afc63e32de184e445ffb966a42fb07f9d44ba39020584864f
+size 496
--- a/data/data/uniform/simple_structured_points_bin.vtk
+++ b/data/data/uniform/simple_structured_points_bin.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d9bea2064cd3402f3f5b7862e6b775e37f33210ba099f59358857d4bdae1020
+size 255
--- a/data/data/uniform/simple_structured_points_visit_ascii.vtk
+++ b/data/data/uniform/simple_structured_points_visit_ascii.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e154ba13346e6998b864316868da3f155e99efe4f330c8e080b0d7ece22b505a
+size 488
--- a/data/data/unstructured/simple_poly_ascii.vtk
+++ b/data/data/unstructured/simple_poly_ascii.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff3108d009d2eef410593811857e38388001f7df624ddeaed3edceafbc838aea
+size 849
--- a/data/data/unstructured/simple_poly_bin.vtk
+++ b/data/data/unstructured/simple_poly_bin.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5aca6667b06deb4ec6236d5caa3d9518345bc1eb9021bc721289b81acc980af9
+size 789
--- a/data/data/unstructured/simple_unstructured_ascii.vtk
+++ b/data/data/unstructured/simple_unstructured_ascii.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:861fc904b7d4db43288fce85c8c1398726b54ac82d7bcbcebd8f12808cb5599b
+size 1002
--- a/data/data/unstructured/simple_unstructured_bin.vtk
+++ b/data/data/unstructured/simple_unstructured_bin.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29e43c695763535251ab22af815651caa53d103b5fd168c72dfb9188e72e4ff4
+size 1244
--- a/data/data/unstructured/simple_unstructured_visit_ascii.vtk
+++ b/data/data/unstructured/simple_unstructured_visit_ascii.vtk
@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3731448fe4d87b204e185829237a6a6b0140aed2fb27eea0533883a4cf4ed79d
+size 1065
--- a/docs/CI-README.md
+++ b/docs/CI-README.md
@ -60,14 +60,14 @@ Current gitlab runner tags for VTK-m are:
        Used to state that we require a linux based gitlab-runner
    - large-memory
        Used to state that this step will require a machine that has lots of memory.
-        This is currently used for cuda `build` requests
+        This is currently used for CUDA `build` requests
    - cuda-rt
-        Used to state that the runner is required to have the cuda runtime enviornment.
-        This isn't required to `build` VTK-m, only `test`
+        Used to state that the runner is required to have the CUDA runtime environment.
+        This is required to `build` and `test` VTK-m when using CUDA 
    - maxwell
    - pascal
    - turing
-        Only used on a `test` stage to signifiy which GPU hardware is required to
+        Only used on a `test` stage to signify which GPU hardware is required to
        run the VTK-m tests

 # How to use docker builders locally
--- a/docs/changelog/add-kokkos-backend.md
+++ b/docs/changelog/add-kokkos-backend.md
@ -0,0 +1,5 @@
+# Add Kokkos backend
+
+Adds a new device backend `Kokkos` which uses the kokkos library for parallelism.
+User must provide the kokkos build and Vtk-m will use the default configured execution
+space.
--- a/docs/changelog/free-atomic-functions.md
+++ b/docs/changelog/free-atomic-functions.md
@ -0,0 +1,14 @@
+# Add atomic free functions
+
+Previously, all atomic functions were stored in classes named
+`AtomicInterfaceControl` and `AtomicInterfaceExecution`, which required
+you to know at compile time which device was using the methods. That in
+turn means that anything using an atomic needed to be templated on the
+device it is running on.
+
+That can be a big hassle (and is problematic for some code structure).
+Instead, these methods are moved to free functions in the `vtkm`
+namespace. These functions operate like those in `Math.h`. Using
+compiler directives, an appropriate version of the function is compiled
+for the current device the compiler is using.
+
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -28,6 +28,7 @@ if(VTKm_ENABLE_EXAMPLES)
  add_subdirectory(multi_backend)
  add_subdirectory(oscillator)
  add_subdirectory(particle_advection)
+  add_subdirectory(streamline_mpi)
  add_subdirectory(polyline_archimedean_helix)
  add_subdirectory(redistribute_points)
  add_subdirectory(temporal_advection)
--- a/examples/contour_tree_augmented/ContourTreeApp.cxx
+++ b/examples/contour_tree_augmented/ContourTreeApp.cxx
@ -370,28 +370,14 @@ int main(int argc, char* argv[])
    VTKM_LOG_IF_S(vtkm::cont::LogLevel::Info,
                  numLevels > 0,
                  std::endl
-                    << "    ------------ Settings Isolevel Selection -----------"
-                    << std::endl
-                    << "    levels="
-                    << numLevels
-                    << std::endl
-                    << "    eps="
-                    << eps
-                    << std::endl
-                    << "    comp"
-                    << numComp
-                    << std::endl
-                    << "    type="
-                    << contourType
-                    << std::endl
-                    << "    method="
-                    << contourSelectMethod
-                    << std::endl
-                    << "    mc="
-                    << useMarchingCubes
-                    << std::endl
-                    << "    use"
-                    << (usePersistenceSorter ? "PersistenceSorter" : "VolumeSorter"));
+                    << "    ------------ Settings Isolevel Selection -----------" << std::endl
+                    << "    levels=" << numLevels << std::endl
+                    << "    eps=" << eps << std::endl
+                    << "    comp" << numComp << std::endl
+                    << "    type=" << contourType << std::endl
+                    << "    method=" << contourSelectMethod << std::endl
+                    << "    mc=" << useMarchingCubes << std::endl
+                    << "    use" << (usePersistenceSorter ? "PersistenceSorter" : "VolumeSorter"));
  }
  currTime = totalTime.GetElapsedTime();
  vtkm::Float64 startUpTime = currTime - prevTime;
@ -558,19 +544,17 @@ int main(int argc, char* argv[])
  {
    VTKM_LOG_S(vtkm::cont::LogLevel::Info,
               std::endl
-                 << "    ---------------- Input Mesh Properties --------------"
-                 << std::endl
-                 << "    Number of dimensions: "
-                 << nDims);
+                 << "    ---------------- Input Mesh Properties --------------" << std::endl
+                 << "    Number of dimensions: " << nDims);
  }

  // Check if marching cubes is enabled for non 3D data
  bool invalidMCOption = (useMarchingCubes && nDims != 3);
-  VTKM_LOG_IF_S(
-    vtkm::cont::LogLevel::Error,
-    invalidMCOption && (rank == 0),
-    "The input mesh is " << nDims << "D. "
-                         << "Contour tree using marching cubes is only supported for 3D data.");
+  VTKM_LOG_IF_S(vtkm::cont::LogLevel::Error,
+                invalidMCOption && (rank == 0),
+                "The input mesh is "
+                  << nDims << "D. "
+                  << "Contour tree using marching cubes is only supported for 3D data.");

  // If we found any errors in the setttings than finalize MPI and exit the execution
  if (invalidMCOption)
@ -583,7 +567,7 @@ int main(int argc, char* argv[])

 #ifndef WITH_MPI                              // construct regular, single-block VTK-M input dataset
  vtkm::cont::DataSet useDataSet = inDataSet; // Single block dataset
-#else                                         // Create a multi-block dataset for multi-block DIY-paralle processing
+#else  // Create a multi-block dataset for multi-block DIY-paralle processing
  vtkm::cont::PartitionedDataSet useDataSet; // Partitioned variant of the input dataset
  vtkm::Id3 blocksPerDim =
    nDims == 3 ? vtkm::Id3(1, 1, numBlocks) : vtkm::Id3(1, numBlocks, 1); // Decompose the data into
@ -683,7 +667,7 @@ int main(int argc, char* argv[])
      useDataSet.AppendPartition(ds);
    }
  }
-#endif                                        // WITH_MPI construct input dataset
+#endif // WITH_MPI construct input dataset

  currTime = totalTime.GetElapsedTime();
  buildDatasetTime = currTime - prevTime;
@ -866,116 +850,47 @@ int main(int argc, char* argv[])
  currTime = totalTime.GetElapsedTime();
  VTKM_LOG_S(vtkm::cont::LogLevel::Info,
             std::endl
-               << "    -------------------------- Totals "
-               << rank
-               << " -----------------------------"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Start-up"
-               << ": "
-               << startUpTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Data Read"
-               << ": "
-               << dataReadTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Build VTKM Dataset"
-               << ": "
-               << buildDatasetTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Compute Contour Tree"
-               << ": "
-               << computeContourTreeTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Compute Branch Decomposition"
-               << ": "
-               << computeBranchDecompTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Total Time"
-               << ": "
-               << currTime
-               << " seconds");
+               << "    -------------------------- Totals " << rank
+               << " -----------------------------" << std::endl
+               << std::setw(42) << std::left << "    Start-up"
+               << ": " << startUpTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Data Read"
+               << ": " << dataReadTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Build VTKM Dataset"
+               << ": " << buildDatasetTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Compute Contour Tree"
+               << ": " << computeContourTreeTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Compute Branch Decomposition"
+               << ": " << computeBranchDecompTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Total Time"
+               << ": " << currTime << " seconds");

  const ctaug_ns::ContourTree& ct = filter.GetContourTree();
  VTKM_LOG_S(vtkm::cont::LogLevel::Info,
             std::endl
-               << "    ---------------- Contour Tree Array Sizes ---------------------"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #Nodes"
-               << ": "
-               << ct.Nodes.GetNumberOfValues()
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #Arcs"
-               << ": "
-               << ct.Arcs.GetNumberOfValues()
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #Superparents"
-               << ": "
-               << ct.Superparents.GetNumberOfValues()
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #Superarcs"
-               << ": "
-               << ct.Superarcs.GetNumberOfValues()
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #Supernodes"
-               << ": "
-               << ct.Supernodes.GetNumberOfValues()
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #Hyperparents"
-               << ": "
-               << ct.Hyperparents.GetNumberOfValues()
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #WhenTransferred"
-               << ": "
-               << ct.WhenTransferred.GetNumberOfValues()
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #Hypernodes"
-               << ": "
-               << ct.Hypernodes.GetNumberOfValues()
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    #Hyperarcs"
-               << ": "
-               << ct.Hyperarcs.GetNumberOfValues()
-               << std::endl);
+               << "    ---------------- Contour Tree Array Sizes ---------------------" << std::endl
+               << std::setw(42) << std::left << "    #Nodes"
+               << ": " << ct.Nodes.GetNumberOfValues() << std::endl
+               << std::setw(42) << std::left << "    #Arcs"
+               << ": " << ct.Arcs.GetNumberOfValues() << std::endl
+               << std::setw(42) << std::left << "    #Superparents"
+               << ": " << ct.Superparents.GetNumberOfValues() << std::endl
+               << std::setw(42) << std::left << "    #Superarcs"
+               << ": " << ct.Superarcs.GetNumberOfValues() << std::endl
+               << std::setw(42) << std::left << "    #Supernodes"
+               << ": " << ct.Supernodes.GetNumberOfValues() << std::endl
+               << std::setw(42) << std::left << "    #Hyperparents"
+               << ": " << ct.Hyperparents.GetNumberOfValues() << std::endl
+               << std::setw(42) << std::left << "    #WhenTransferred"
+               << ": " << ct.WhenTransferred.GetNumberOfValues() << std::endl
+               << std::setw(42) << std::left << "    #Hypernodes"
+               << ": " << ct.Hypernodes.GetNumberOfValues() << std::endl
+               << std::setw(42) << std::left << "    #Hyperarcs"
+               << ": " << ct.Hyperarcs.GetNumberOfValues() << std::endl);
  // Print hyperstructure statistics
  VTKM_LOG_S(vtkm::cont::LogLevel::Info,
             std::endl
-               << ct.PrintHyperStructureStatistics(false)
-               << std::endl);
+               << ct.PrintHyperStructureStatistics(false) << std::endl);

  // Flush ouput streams just to make sure everything has been logged (in particular when using MPI)
  std::cout << std::flush;
--- a/examples/contour_tree_distributed/ContourTreeApp.cxx
+++ b/examples/contour_tree_distributed/ContourTreeApp.cxx
@ -252,25 +252,14 @@ int main(int argc, char* argv[])
  {
    VTKM_LOG_S(vtkm::cont::LogLevel::Info,
               std::endl
-                 << "    ------------ Settings -----------"
-                 << std::endl
-                 << "    filename="
-                 << filename
-                 << std::endl
-                 << "    device="
-                 << device.GetName()
-                 << std::endl
-                 << "    mc="
-                 << useMarchingCubes
-                 << std::endl
+                 << "    ------------ Settings -----------" << std::endl
+                 << "    filename=" << filename << std::endl
+                 << "    device=" << device.GetName() << std::endl
+                 << "    mc=" << useMarchingCubes << std::endl
 #ifdef ENABLE_SET_NUM_THREADS
-                 << "    numThreads="
-                 << numThreads
-                 << std::endl
+                 << "    numThreads=" << numThreads << std::endl
 #endif
-                 << "    nblocks="
-                 << numBlocks
-                 << std::endl);
+                 << "    nblocks=" << numBlocks << std::endl);
  }
  currTime = totalTime.GetElapsedTime();
  vtkm::Float64 startUpTime = currTime - prevTime;
@ -341,14 +330,9 @@ int main(int argc, char* argv[])
  {
    VTKM_LOG_S(vtkm::cont::LogLevel::Info,
               std::endl
-                 << "    ---------------- Input Mesh Properties --------------"
-                 << std::endl
-                 << "    Number of dimensions: "
-                 << nDims
-                 << std::endl
-                 << "    Number of mesh vertices: "
-                 << numVertices
-                 << std::endl);
+                 << "    ---------------- Input Mesh Properties --------------" << std::endl
+                 << "    Number of dimensions: " << nDims << std::endl
+                 << "    Number of mesh vertices: " << numVertices << std::endl);
  }

  // Check for fatal input errors
@ -359,13 +343,14 @@ int main(int argc, char* argv[])
  // Log any errors if found on rank 0
  VTKM_LOG_IF_S(vtkm::cont::LogLevel::Error,
                invalidNumDimensions && (rank == 0),
-                "The input mesh is " << nDims << "D. "
-                                                 "The input data must be either 2D or 3D.");
-  VTKM_LOG_IF_S(
-    vtkm::cont::LogLevel::Error,
-    invalidMCOption && (rank == 0),
-    "The input mesh is " << nDims << "D. "
-                         << "Contour tree using marching cubes is only supported for 3D data.");
+                "The input mesh is " << nDims
+                                     << "D. "
+                                        "The input data must be either 2D or 3D.");
+  VTKM_LOG_IF_S(vtkm::cont::LogLevel::Error,
+                invalidMCOption && (rank == 0),
+                "The input mesh is "
+                  << nDims << "D. "
+                  << "Contour tree using marching cubes is only supported for 3D data.");
  // If we found any errors in the setttings than finalize MPI and exit the execution
  if (invalidNumDimensions || invalidMCOption)
  {
@ -519,44 +504,18 @@ int main(int argc, char* argv[])
  currTime = totalTime.GetElapsedTime();
  VTKM_LOG_S(vtkm::cont::LogLevel::Info,
             std::endl
-               << "    -------------------------- Totals "
-               << rank
-               << " -----------------------------"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Start-up"
-               << ": "
-               << startUpTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Data Read"
-               << ": "
-               << dataReadTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Build VTKM Dataset"
-               << ": "
-               << buildDatasetTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Compute Contour Tree"
-               << ": "
-               << computeContourTreeTime
-               << " seconds"
-               << std::endl
-               << std::setw(42)
-               << std::left
-               << "    Total Time"
-               << ": "
-               << currTime
-               << " seconds");
+               << "    -------------------------- Totals " << rank
+               << " -----------------------------" << std::endl
+               << std::setw(42) << std::left << "    Start-up"
+               << ": " << startUpTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Data Read"
+               << ": " << dataReadTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Build VTKM Dataset"
+               << ": " << buildDatasetTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Compute Contour Tree"
+               << ": " << computeContourTreeTime << " seconds" << std::endl
+               << std::setw(42) << std::left << "    Total Time"
+               << ": " << currTime << " seconds");

  // Flush ouput streams just to make sure everything has been logged (in particular when using MPI)
  std::cout << std::flush;
--- a/examples/demo/Demo.cxx
+++ b/examples/demo/Demo.cxx
@ -25,10 +25,10 @@
 // write that image to a file. It then computes an isosurface on the input data set and renders
 // this output data set in a separate image file

-using vtkm::rendering::MapperVolume;
-using vtkm::rendering::MapperRayTracer;
-using vtkm::rendering::MapperWireframer;
 using vtkm::rendering::CanvasRayTracer;
+using vtkm::rendering::MapperRayTracer;
+using vtkm::rendering::MapperVolume;
+using vtkm::rendering::MapperWireframer;

 int main(int argc, char* argv[])
 {
--- a/examples/ising/Ising.cxx
+++ b/examples/ising/Ising.cxx
@ -66,10 +66,10 @@ struct UpdateSpins : public vtkm::worklet::WorkletCellNeighborhood
    const auto mySpin = prevspin.Get(0, 0, 0);

    // 1. Calculate the energy of flipping, E_flip
-    vtkm::Float32 E_flip =
-      J * mySpin * (prevspin.Get(-1, -1, 0) + prevspin.Get(-1, 0, 0) + prevspin.Get(-1, 1, 0) +
-                    prevspin.Get(0, -1, 0) + prevspin.Get(0, 1, 0) + prevspin.Get(1, -1, 0) +
-                    prevspin.Get(1, 0, 0) + prevspin.Get(1, 1, 0));
+    vtkm::Float32 E_flip = J * mySpin *
+      (prevspin.Get(-1, -1, 0) + prevspin.Get(-1, 0, 0) + prevspin.Get(-1, 1, 0) +
+       prevspin.Get(0, -1, 0) + prevspin.Get(0, 1, 0) + prevspin.Get(1, -1, 0) +
+       prevspin.Get(1, 0, 0) + prevspin.Get(1, 1, 0));

    if (E_flip <= 0)
    {
--- a/examples/particle_advection/ParticleAdvection.cxx
+++ b/examples/particle_advection/ParticleAdvection.cxx
@ -57,11 +57,11 @@ int main(int argc, char** argv)

  //create seeds randomly placed withing the bounding box of the data.
  vtkm::Bounds bounds = ds.GetCoordinateSystem().GetBounds();
-  std::vector<vtkm::Massless> seeds;
+  std::vector<vtkm::Particle> seeds;

  for (vtkm::Id i = 0; i < numSeeds; i++)
  {
-    vtkm::Massless p;
+    vtkm::Particle p;
    vtkm::FloatDefault rx = (vtkm::FloatDefault)rand() / (vtkm::FloatDefault)RAND_MAX;
    vtkm::FloatDefault ry = (vtkm::FloatDefault)rand() / (vtkm::FloatDefault)RAND_MAX;
    vtkm::FloatDefault rz = (vtkm::FloatDefault)rand() / (vtkm::FloatDefault)RAND_MAX;
--- a/examples/redistribute_points/RedistributePoints.h
+++ b/examples/redistribute_points/RedistributePoints.h
@ -223,11 +223,12 @@ inline VTKM_CONT vtkm::cont::PartitionedDataSet RedistributePoints::PrepareForEx
  vtkmdiy::RegularDecomposer<vtkmdiy::ContinuousBounds> decomposer(
    /*dim*/ 3, internal::convert(gbounds), assigner.nblocks());

-  vtkmdiy::Master master(comm,
-                         /*threads*/ 1,
-                         /*limit*/ -1,
-                         []() -> void* { return new vtkm::cont::DataSet(); },
-                         [](void* ptr) { delete static_cast<vtkm::cont::DataSet*>(ptr); });
+  vtkmdiy::Master master(
+    comm,
+    /*threads*/ 1,
+    /*limit*/ -1,
+    []() -> void* { return new vtkm::cont::DataSet(); },
+    [](void* ptr) { delete static_cast<vtkm::cont::DataSet*>(ptr); });
  decomposer.decompose(comm.rank(), assigner, master);

  assert(static_cast<vtkm::Id>(master.size()) == input.GetNumberOfPartitions());
--- a/examples/streamline_mpi/CMakeLists.txt
+++ b/examples/streamline_mpi/CMakeLists.txt
@ -0,0 +1,27 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
+project(StreamlineMPI CXX)
+
+#Find the VTK-m package
+find_package(VTKm REQUIRED QUIET)
+
+if (VTKm_ENABLE_MPI)
+  add_executable(StreamlineMPI StreamlineMPI.cxx)
+  target_compile_definitions(StreamlineMPI PRIVATE "MPI_ENABLED")
+  target_link_libraries(StreamlineMPI PRIVATE vtkm_filter vtkm_io MPI::MPI_CXX)
+  vtkm_add_target_information(StreamlineMPI
+                              DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
+                              DEVICE_SOURCES StreamlineMPI.cxx)
+endif()
+
+#if(TARGET vtkm::tbb)
+#  target_compile_definitions(streamline_mpi PRIVATE BUILDING_TBB_VERSION)
+#endif()
--- a/examples/streamline_mpi/StreamlineMPI.cxx
+++ b/examples/streamline_mpi/StreamlineMPI.cxx
@ -0,0 +1,120 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+
+#include <vtkm/cont/AssignerPartitionedDataSet.h>
+#include <vtkm/cont/DataSet.h>
+#include <vtkm/cont/EnvironmentTracker.h>
+#include <vtkm/cont/Field.h>
+#include <vtkm/cont/Initialize.h>
+#include <vtkm/cont/PartitionedDataSet.h>
+#include <vtkm/filter/Streamline.h>
+#include <vtkm/io/VTKDataSetReader.h>
+#include <vtkm/io/VTKDataSetWriter.h>
+#include <vtkm/io/reader/VTKDataSetReader.h>
+
+#include <mpi.h>
+#include <vtkm/thirdparty/diy/diy.h>
+#include <vtkm/thirdparty/diy/mpi-cast.h>
+
+
+#include <vtkm/filter/ParticleAdvection.h>
+#include <vtkm/filter/particleadvection/BoundsMap.h>
+#include <vtkm/filter/particleadvection/ParticleMessenger.h>
+
+
+void LoadData(std::string& fname, std::vector<vtkm::cont::DataSet>& dataSets, int rank, int nRanks)
+{
+  std::string buff;
+  std::ifstream is;
+  is.open(fname);
+  std::cout << "Opening: " << fname << std::endl;
+  if (!is)
+  {
+    std::cout << "File not found! : " << fname << std::endl;
+    throw "unknown file: " + fname;
+  }
+
+  auto p0 = fname.rfind(".visit");
+  if (p0 == std::string::npos)
+    throw "Only .visit files are supported.";
+  auto tmp = fname.substr(0, p0);
+  auto p1 = tmp.rfind("/");
+  auto dir = tmp.substr(0, p1);
+
+  std::getline(is, buff);
+  auto numBlocks = std::stoi(buff.substr(buff.find("!NBLOCKS ") + 9, buff.size()));
+  if (rank == 0)
+    std::cout << "numBlocks= " << numBlocks << std::endl;
+
+  int nPer = numBlocks / nRanks;
+  int b0 = rank * nPer, b1 = (rank + 1) * nPer;
+  if (rank == (nRanks - 1))
+    b1 = numBlocks;
+
+  for (int i = 0; i < numBlocks; i++)
+  {
+    std::getline(is, buff);
+    if (i >= b0 && i < b1)
+    {
+      vtkm::cont::DataSet ds;
+      std::string vtkFile = dir + "/" + buff;
+      vtkm::io::reader::VTKDataSetReader reader(vtkFile);
+      ds = reader.ReadDataSet();
+      auto f = ds.GetField("grad").GetData();
+      vtkm::cont::ArrayHandle<vtkm::Vec<double, 3>> fieldArray;
+      fieldArray = f.Cast<vtkm::cont::ArrayHandle<vtkm::Vec<double, 3>>>();
+      int n = fieldArray.GetNumberOfValues();
+      auto portal = fieldArray.WritePortal();
+      for (int ii = 0; ii < n; ii++)
+        portal.Set(ii, vtkm::Vec<double, 3>(1, 0, 0));
+
+      dataSets.push_back(ds);
+    }
+  }
+}
+
+// Example computing streamlines.
+// An example vector field is available in the vtk-m data directory: magField.vtk
+// Example usage:
+//   this will advect 200 particles 50 steps using a step size of 0.01
+//
+// Particle_Advection <path-to-data-dir>/magField.vtk vec 200 50 0.01 output.vtk
+//
+
+int main(int argc, char** argv)
+{
+  MPI_Init(&argc, &argv);
+  auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
+  int rank = comm.rank();
+  int size = comm.size();
+
+  std::string dataFile = argv[1];
+  std::vector<vtkm::cont::DataSet> dataSets;
+  LoadData(dataFile, dataSets, rank, size);
+
+  vtkm::filter::ParticleAdvection pa;
+
+  vtkm::cont::ArrayHandle<vtkm::Particle> seedArray;
+  std::vector<vtkm::Particle> seeds;
+  seeds.push_back(vtkm::Particle(vtkm::Vec3f(.1f, .1f, .9f), 0));
+  seeds.push_back(vtkm::Particle(vtkm::Vec3f(.1f, .6f, .6f), 1));
+  seeds.push_back(vtkm::Particle(vtkm::Vec3f(.1f, .9f, .1f), 2));
+  seedArray = vtkm::cont::make_ArrayHandle(seeds);
+  pa.SetStepSize(0.001f);
+  pa.SetNumberOfSteps(10000);
+  pa.SetSeeds(seedArray);
+  pa.SetActiveField("grad");
+
+  vtkm::cont::PartitionedDataSet pds(dataSets);
+  auto output = pa.Execute(pds);
+  output.PrintSummary(std::cout);
+
+  return 0;
+}
--- a/examples/temporal_advection/TemporalAdvection.cxx
+++ b/examples/temporal_advection/TemporalAdvection.cxx
@ -75,7 +75,7 @@ int main(int argc, char** argv)
  // Use the coordinate system as seeds for performing advection
  vtkm::cont::ArrayHandle<vtkm::Vec3f> pts;
  vtkm::cont::ArrayCopy(ds1.GetCoordinateSystem().GetData(), pts);
-  vtkm::cont::ArrayHandle<vtkm::Massless> seeds;
+  vtkm::cont::ArrayHandle<vtkm::Particle> seeds;

  vtkm::Id numPts = pts.GetNumberOfValues();
  seeds.Allocate(numPts);
@ -83,7 +83,7 @@ int main(int argc, char** argv)
  auto seedPortal = seeds.WritePortal();
  for (vtkm::Id i = 0; i < numPts; i++)
  {
-    vtkm::Massless p;
+    vtkm::Particle p;
    p.Pos = ptsPortal.Get(i);
    p.ID = i;
    seedPortal.Set(i, p);
--- a/vtkm/Atomic.h
+++ b/vtkm/Atomic.h
@ -0,0 +1,821 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+#ifndef vtk_m_Atomic_h
+#define vtk_m_Atomic_h
+
+#include <vtkm/List.h>
+
+#include <vtkm/internal/Windows.h>
+
+#include <atomic>
+
+namespace vtkm
+{
+
+/// \brief Specifies memory order semantics for atomic operations.
+///
+/// The memory order parameter controls how all other memory operations are
+/// ordered around a specific atomic instruction.
+///
+/// Memory access is complicated. Compilers can reorder instructions to optimize
+/// scheduling, processors can speculatively read memory, and caches make
+/// assumptions about coherency that we may not normally be aware of. Because of
+/// this complexity, the order in which multiple updates to shared memory become
+/// visible to other threads is not guaranteed, nor is it guaranteed that each
+/// thread will see memory updates occur in the same order as any other thread.
+/// This can lead to surprising behavior and cause problems when using atomics
+/// to communicate between threads.
+///
+/// These problems are solved by using a standard set of memory orderings which
+/// describe common access patterns used for shared memory programming. Their
+/// goal is to provide guarantees that changes made in one thread will be visible
+/// to another thread at a specific and predictable point in execution, regardless
+/// of any hardware or compiler optimizations.
+///
+/// If unsure, use `SequentiallyConsistent` memory orderings. It will "do the right
+/// thing", but at the cost of increased and possibly unnecessary memory ordering
+/// restrictions. The other orderings are optimizations that are only applicable
+/// in very specific situations.
+///
+/// See https://en.cppreference.com/w/cpp/atomic/memory_order for a detailed
+/// description of the different orderings and their usage.
+///
+/// The memory order semantics follow those of other common atomic operations such as
+/// the `std::memory_order` identifiers used for `std::atomic`.
+///
+/// Note that when a memory order is specified, the enforced memory order is guaranteed
+/// to be as good or better than that requested.
+///
+enum class MemoryOrder
+{
+  /// An atomic operations with `Relaxed` memory order enforces no synchronization or ordering
+  /// constraints on local reads and writes. That is, a read or write to a local, non-atomic
+  /// variable may be moved to before or after an atomic operation with `Relaxed` memory order.
+  ///
+  Relaxed,
+
+  /// A load operation with `Acquire` memory order will enforce that any local read or write
+  /// operations listed in the program after the atomic will happen after the atomic.
+  ///
+  Acquire,
+
+  /// A store operation with `Release` memory order will enforce that any local read or write
+  /// operations listed in the program before the atomic will happen before the atomic.
+  ///
+  Release,
+
+  /// A read-modify-write operation with `AcquireAndRelease` memory order will enforce that any
+  /// local read or write operations listed in the program before the atomic will happen before the
+  /// atomic and likewise any read or write operations listed in the program after the atomic will
+  /// happen after the atomic.
+  ///
+  AcquireAndRelease,
+
+  /// An atomic with `SequentiallyConsistent` memory order will enforce any appropriate semantics
+  /// as `Acquire`, `Release`, and `AcquireAndRelease`. Additionally, `SequentiallyConsistent` will
+  /// enforce a consistent ordering of atomic operations across all threads. That is, all threads
+  /// observe the modifications in the same order.
+  ///
+  SequentiallyConsistent
+};
+
+namespace internal
+{
+
+VTKM_EXEC_CONT inline std::memory_order StdAtomicMemOrder(vtkm::MemoryOrder order)
+{
+  switch (order)
+  {
+    case vtkm::MemoryOrder::Relaxed:
+      return std::memory_order_relaxed;
+    case vtkm::MemoryOrder::Acquire:
+      return std::memory_order_acquire;
+    case vtkm::MemoryOrder::Release:
+      return std::memory_order_release;
+    case vtkm::MemoryOrder::AcquireAndRelease:
+      return std::memory_order_acq_rel;
+    case vtkm::MemoryOrder::SequentiallyConsistent:
+      return std::memory_order_seq_cst;
+  }
+
+  // Should never reach here, but avoid compiler warnings
+  return std::memory_order_seq_cst;
+}
+
+} // namespace internal
+
+} // namespace vtkm
+
+
+#if defined(VTKM_CUDA_DEVICE_PASS)
+
+namespace vtkm
+{
+namespace detail
+{
+
+// Fence to ensure that previous non-atomic stores are visible to other threads.
+VTKM_EXEC_CONT inline void AtomicStoreFence(vtkm::MemoryOrder order)
+{
+  if ((order == vtkm::MemoryOrder::Release) || (order == vtkm::MemoryOrder::AcquireAndRelease) ||
+      (order == vtkm::MemoryOrder::SequentiallyConsistent))
+  {
+    __threadfence();
+  }
+}
+
+// Fence to ensure that previous non-atomic stores are visible to other threads.
+VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
+{
+  if ((order == vtkm::MemoryOrder::Acquire) || (order == vtkm::MemoryOrder::AcquireAndRelease) ||
+      (order == vtkm::MemoryOrder::SequentiallyConsistent))
+  {
+    __threadfence();
+  }
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
+{
+  const volatile T* vaddr = addr; /* volatile to bypass cache*/
+  if (order == vtkm::MemoryOrder::SequentiallyConsistent)
+  {
+    __threadfence();
+  }
+  const T value = *vaddr;
+  /* fence to ensure that dependent reads are correctly ordered */
+  AtomicLoadFence(order);
+  return value;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
+{
+  volatile T* vaddr = addr; /* volatile to bypass cache */
+  /* fence to ensure that previous non-atomic stores are visible to other threads */
+  AtomicStoreFence(order);
+  *vaddr = value;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  auto result = atomicAdd(addr, arg);
+  AtomicLoadFence(order);
+  return result;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  auto result = atomicAnd(addr, mask);
+  AtomicLoadFence(order);
+  return result;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  auto result = atomicOr(addr, mask);
+  AtomicLoadFence(order);
+  return result;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  auto result = atomicXor(addr, mask);
+  AtomicLoadFence(order);
+  return result;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
+{
+  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
+                                                 T desired,
+                                                 T expected,
+                                                 vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  auto result = atomicCAS(addr, expected, desired);
+  AtomicLoadFence(order);
+  return result;
+}
+}
+} // namespace vtkm::detail
+
+#elif defined(VTKM_ENABLE_KOKKOS)
+
+VTKM_THIRDPARTY_PRE_INCLUDE
+// Superhack! Kokkos_Macros.hpp defines macros to include modifiers like __device__.
+// However, we don't want to actually use those if compiling this with a standard
+// C++ compiler (because this particular code does not run on a device). Thus,
+// we want to disable that behavior when not using the device compiler. To do that,
+// we are going to have to load the KokkosCore_config.h file (which you are not
+// supposed to do), then undefine the device enables if necessary, then load
+// Kokkos_Macros.hpp to finish the state.
+#ifndef KOKKOS_MACROS_HPP
+#define KOKKOS_MACROS_HPP
+#include <KokkosCore_config.h>
+#undef KOKKOS_MACROS_HPP
+#define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
+
+#if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA)
+#undef KOKKOS_ENABLE_CUDA
+#endif
+#endif //KOKKOS_MACROS_HPP not loaded
+
+#include <Kokkos_Core.hpp>
+VTKM_THIRDPARTY_POST_INCLUDE
+
+namespace vtkm
+{
+namespace detail
+{
+
+// Fence to ensure that previous non-atomic stores are visible to other threads.
+VTKM_EXEC_CONT inline void AtomicStoreFence(vtkm::MemoryOrder order)
+{
+  if ((order == vtkm::MemoryOrder::Release) || (order == vtkm::MemoryOrder::AcquireAndRelease) ||
+      (order == vtkm::MemoryOrder::SequentiallyConsistent))
+  {
+    Kokkos::memory_fence();
+  }
+}
+
+// Fence to ensure that previous non-atomic stores are visible to other threads.
+VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
+{
+  if ((order == vtkm::MemoryOrder::Acquire) || (order == vtkm::MemoryOrder::AcquireAndRelease) ||
+      (order == vtkm::MemoryOrder::SequentiallyConsistent))
+  {
+    Kokkos::memory_fence();
+  }
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
+{
+  switch (order)
+  {
+    case vtkm::MemoryOrder::Relaxed:
+      return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_relaxed);
+    case vtkm::MemoryOrder::Acquire:
+    case vtkm::MemoryOrder::Release:           // Release doesn't make sense. Use Acquire.
+    case vtkm::MemoryOrder::AcquireAndRelease: // Release doesn't make sense. Use Acquire.
+      return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_acquire);
+    case vtkm::MemoryOrder::SequentiallyConsistent:
+      return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_seq_cst);
+  }
+
+  // Should never reach here, but avoid compiler warnings
+  return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_seq_cst);
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
+{
+  switch (order)
+  {
+    case vtkm::MemoryOrder::Relaxed:
+      Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_relaxed);
+      break;
+    case vtkm::MemoryOrder::Acquire: // Acquire doesn't make sense. Use Release.
+    case vtkm::MemoryOrder::Release:
+    case vtkm::MemoryOrder::AcquireAndRelease: // Acquire doesn't make sense. Use Release.
+      Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_release);
+      break;
+    case vtkm::MemoryOrder::SequentiallyConsistent:
+      Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_seq_cst);
+      break;
+  }
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  T result = Kokkos::atomic_fetch_add(addr, arg);
+  AtomicLoadFence(order);
+  return result;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  T result = Kokkos::atomic_fetch_and(addr, mask);
+  AtomicLoadFence(order);
+  return result;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  T result = Kokkos::atomic_fetch_or(addr, mask);
+  AtomicLoadFence(order);
+  return result;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  T result = Kokkos::atomic_fetch_xor(addr, mask);
+  AtomicLoadFence(order);
+  return result;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
+{
+  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
+                                                 T desired,
+                                                 T expected,
+                                                 vtkm::MemoryOrder order)
+{
+  AtomicStoreFence(order);
+  T result = Kokkos::atomic_compare_exchange(addr, expected, desired);
+  AtomicLoadFence(order);
+  return result;
+}
+}
+} // namespace vtkm::detail
+
+#elif defined(VTKM_MSVC)
+
+// Supports vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64
+
+#include <cstdint>
+#include <cstring>
+#include <intrin.h> // For MSVC atomics
+
+namespace vtkm
+{
+namespace detail
+{
+
+template <typename To, typename From>
+VTKM_EXEC_CONT inline To BitCast(const From& src)
+{
+  // The memcpy should be removed by the compiler when possible, but this
+  // works around a host of issues with bitcasting using reinterpret_cast.
+  VTKM_STATIC_ASSERT(sizeof(From) == sizeof(To));
+  To dst;
+  std::memcpy(&dst, &src, sizeof(From));
+  return dst;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T BitCast(T&& src)
+{
+  return std::forward<T>(src);
+}
+
+// Note about Load and Store implementations:
+//
+// "Simple reads and writes to properly-aligned 32-bit variables are atomic
+//  operations"
+//
+// "Simple reads and writes to properly aligned 64-bit variables are atomic on
+// 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
+// atomic on 32-bit Windows."
+//
+// "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
+// guaranteed to be atomic on any platform."
+//
+// https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
+
+VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(const vtkm::UInt8* addr, vtkm::MemoryOrder order)
+{
+  // This assumes that the memory interface is smart enough to load a 32-bit
+  // word atomically and a properly aligned 8-bit word from it.
+  // We could build address masks and do shifts to perform this manually if
+  // this assumption is incorrect.
+  auto result = *static_cast<volatile const vtkm::UInt8*>(addr);
+  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
+  return result;
+}
+VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(const vtkm::UInt16* addr, vtkm::MemoryOrder order)
+{
+  // This assumes that the memory interface is smart enough to load a 32-bit
+  // word atomically and a properly aligned 16-bit word from it.
+  // We could build address masks and do shifts to perform this manually if
+  // this assumption is incorrect.
+  auto result = *static_cast<volatile const vtkm::UInt16*>(addr);
+  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
+  return result;
+}
+VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(const vtkm::UInt32* addr, vtkm::MemoryOrder order)
+{
+  auto result = *static_cast<volatile const vtkm::UInt32*>(addr);
+  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
+  return result;
+}
+VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(const vtkm::UInt64* addr, vtkm::MemoryOrder order)
+{
+  auto result = *static_cast<volatile const vtkm::UInt64*>(addr);
+  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
+  return result;
+}
+
+VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt8* addr,
+                                           vtkm::UInt8 val,
+                                           vtkm::MemoryOrder order)
+{
+  // There doesn't seem to be an atomic store instruction in the windows
+  // API, so just exchange and discard the result.
+  _InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
+}
+VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt16* addr,
+                                           vtkm::UInt16 val,
+                                           vtkm::MemoryOrder order)
+{
+  // There doesn't seem to be an atomic store instruction in the windows
+  // API, so just exchange and discard the result.
+  _InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
+}
+VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt32* addr,
+                                           vtkm::UInt32 val,
+                                           vtkm::MemoryOrder order)
+{
+  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
+  *addr = val;
+}
+VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt64* addr,
+                                           vtkm::UInt64 val,
+                                           vtkm::MemoryOrder order)
+{
+  std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
+  *addr = val;
+}
+
+#define VTKM_ATOMIC_OP(vtkmName, winName, vtkmType, winType, suffix)                             \
+  VTKM_EXEC_CONT inline vtkmType vtkmName(vtkmType* addr, vtkmType arg, vtkm::MemoryOrder order) \
+  {                                                                                              \
+    return BitCast<vtkmType>(                                                                    \
+      winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg)));        \
+  }
+
+#define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix)                             \
+  VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix)     \
+  VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix)             \
+  VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix)               \
+  VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix)             \
+  VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order) \
+  {                                                                                     \
+    return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order);          \
+  }                                                                                     \
+  VTKM_EXEC_CONT inline vtkmType AtomicCompareAndSwapImpl(                              \
+    vtkmType* addr, vtkmType desired, vtkmType expected, vtkm::MemoryOrder order)       \
+  {                                                                                     \
+    return BitCast<vtkmType>(                                                           \
+      _InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr),    \
+                                          BitCast<winType>(desired),                    \
+                                          BitCast<winType>(expected)));                 \
+  }
+
+VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
+VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16, SHORT, 16)
+VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
+VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)
+
+#undef VTKM_ATOMIC_OPS_FOR_TYPE
+}
+} // namespace vtkm::detail
+
+#else // gcc/clang for CPU
+
+// Supports vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64
+
+#include <cstdint>
+#include <cstring>
+
+namespace vtkm
+{
+namespace detail
+{
+
+VTKM_EXEC_CONT inline int GccAtomicMemOrder(vtkm::MemoryOrder order)
+{
+  switch (order)
+  {
+    case vtkm::MemoryOrder::Relaxed:
+      return __ATOMIC_RELAXED;
+    case vtkm::MemoryOrder::Acquire:
+      return __ATOMIC_ACQUIRE;
+    case vtkm::MemoryOrder::Release:
+      return __ATOMIC_RELEASE;
+    case vtkm::MemoryOrder::AcquireAndRelease:
+      return __ATOMIC_ACQ_REL;
+    case vtkm::MemoryOrder::SequentiallyConsistent:
+      return __ATOMIC_SEQ_CST;
+  }
+
+  // Should never reach here, but avoid compiler warnings
+  return __ATOMIC_SEQ_CST;
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
+{
+  return __atomic_load_n(addr, GccAtomicMemOrder(order));
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
+{
+  return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
+{
+  return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
+{
+  return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
+{
+  return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
+}
+
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
+                                                 T desired,
+                                                 T expected,
+                                                 vtkm::MemoryOrder order)
+{
+  __atomic_compare_exchange_n(
+    addr, &expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
+  return expected;
+}
+}
+} // namespace vtkm::detail
+
+#endif // gcc/clang
+
+namespace vtkm
+{
+
+namespace detail
+{
+
+template <typename T>
+using OppositeSign = typename std::conditional<std::is_signed<T>::value,
+                                               typename std::make_unsigned<T>::type,
+                                               typename std::make_signed<T>::type>::type;
+
+} // namespace detail
+
+/// \brief The preferred type to use for atomic operations.
+///
+using AtomicTypePreferred = vtkm::UInt32;
+
+/// \brief A list of types that can be used with atomic operations.
+///
+/// TODO: Adjust based on devices being compiled.
+///
+/// BUG: vtkm::UInt64 is provided in this list even though it is not supported on CUDA
+/// before compute capability 3.5.
+///
+using AtomicTypesSupported = vtkm::List<vtkm::UInt32, vtkm::UInt64>;
+
+/// \brief Atomic function to load a value from a shared memory location.
+///
+/// Given a pointer, returns the value in that pointer. If other threads are writing to
+/// that same location, the returned value will be consistent to what was present before
+/// or after that write.
+///
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicLoad(const T* pointer,
+                                   vtkm::MemoryOrder order = vtkm::MemoryOrder::Acquire)
+{
+  return detail::AtomicLoadImpl(pointer, order);
+}
+
+///@{
+/// \brief Atomic function to save a value to a shared memory location.
+///
+/// Given a pointer and a value, stores that value at the pointer's location. If two
+/// threads are simultaneously using `AtomicStore` at the same location, the resulting
+/// value will be one of the values or the other (as opposed to a mix of bits).
+///
+template <typename T>
+VTKM_EXEC_CONT inline void AtomicStore(T* pointer,
+                                       T value,
+                                       vtkm::MemoryOrder order = vtkm::MemoryOrder::Release)
+{
+  detail::AtomicStoreImpl(pointer, value, order);
+}
+template <typename T>
+VTKM_EXEC_CONT inline void AtomicStore(T* pointer,
+                                       detail::OppositeSign<T> value,
+                                       vtkm::MemoryOrder order = vtkm::MemoryOrder::Release)
+{
+  detail::AtomicStoreImpl(pointer, static_cast<T>(value), order);
+}
+///@}
+
+///@{
+/// \brief Atomic function to add a value to a shared memory location.
+///
+/// Given a pointer and an operand, adds the operand to the value at the given memory
+/// location. The result of the addition is put into that memory location and the
+/// _old_ value that was originally in the memory is returned. For example, if you
+/// call `AtomicAdd` on a memory location that holds a 5 with an operand of 3, the
+/// value of 8 is stored in the memory location and the value of 5 is returned.
+///
+/// If multiple threads call `AtomicAdd` simultaneously, they will not interfere with
+/// each other. The result will be consistent as if one was called before the other
+/// (although it is indeterminate which will be applied first).
+///
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAdd(
+  T* pointer,
+  T operand,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicAddImpl(pointer, operand, order);
+}
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAdd(
+  T* pointer,
+  detail::OppositeSign<T> operand,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicAddImpl(pointer, static_cast<T>(operand), order);
+}
+///@}
+
+///@{
+/// \brief Atomic function to AND bits to a shared memory location.
+///
+/// Given a pointer and an operand, performs a bitwise AND of the operand and thevalue at the given
+/// memory location. The result of the AND is put into that memory location and the _old_ value
+/// that was originally in the memory is returned. For example, if you call `AtomicAnd` on a memory
+/// location that holds a 0x6 with an operand of 0x3, the value of 0x2 is stored in the memory
+/// location and the value of 0x6 is returned.
+///
+/// If multiple threads call `AtomicAnd` simultaneously, they will not interfere with
+/// each other. The result will be consistent as if one was called before the other
+/// (although it is indeterminate which will be applied first).
+///
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAnd(
+  T* pointer,
+  T operand,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicAndImpl(pointer, operand, order);
+}
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicAnd(
+  T* pointer,
+  detail::OppositeSign<T> operand,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicAndImpl(pointer, static_cast<T>(operand), order);
+}
+///@}
+
+///@{
+/// \brief Atomic function to OR bits to a shared memory location.
+///
+/// Given a pointer and an operand, performs a bitwise OR of the operand and the value at the given
+/// memory location. The result of the OR is put into that memory location and the _old_ value
+/// that was originally in the memory is returned. For example, if you call `AtomicOr` on a memory
+/// location that holds a 0x6 with an operand of 0x3, the value of 0x7 is stored in the memory
+/// location and the value of 0x6 is returned.
+///
+/// If multiple threads call `AtomicOr` simultaneously, they will not interfere with
+/// each other. The result will be consistent as if one was called before the other
+/// (although it is indeterminate which will be applied first).
+///
+template <typename T>
+VTKM_EXEC_CONT inline T
+AtomicOr(T* pointer, T operand, vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicOrImpl(pointer, operand, order);
+}
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicOr(
+  T* pointer,
+  detail::OppositeSign<T> operand,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicOrImpl(pointer, static_cast<T>(operand), order);
+}
+///@}
+
+///@{
+/// \brief Atomic function to XOR bits to a shared memory location.
+///
+/// Given a pointer and an operand, performs a bitwise exclusive-OR of the operand and the value at
+/// the given memory location. The result of the XOR is put into that memory location and the _old_
+/// value that was originally in the memory is returned. For example, if you call `AtomicXor` on a
+/// memory location that holds a 0x6 with an operand of 0x3, the value of 0x5 is stored in the
+/// memory location and the value of 0x6 is returned.
+///
+/// If multiple threads call `AtomicXor` simultaneously, they will not interfere with
+/// each other. The result will be consistent as if one was called before the other.
+///
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicXor(
+  T* pointer,
+  T operand,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicXorImpl(pointer, operand, order);
+}
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicXor(
+  T* pointer,
+  detail::OppositeSign<T> operand,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicXorImpl(pointer, static_cast<T>(operand), order);
+}
+///@}
+
+/// \brief Atomic function to NOT bits to a shared memory location.
+///
+/// Given a pointer, performs a bitwise NOT of the value at the given
+/// memory location. The result of the NOT is put into that memory location and the _old_ value
+/// that was originally in the memory is returned.
+///
+/// If multiple threads call `AtomicNot` simultaneously, they will not interfere with
+/// each other. The result will be consistent as if one was called before the other.
+///
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicNot(
+  T* pointer,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicNotImpl(pointer, order);
+}
+
+/// \brief Atomic function that replaces a value given a condition.
+///
+/// Given a pointer, a new desired value, and an expected value, replaces the value at the
+/// pointer if it is the same as the expected value with the new desired value. If the original
+/// value in the pointer does not equal the expected value, then the memory at the pointer
+/// remains unchanged. In either case, the function returns the _old_ original value that
+/// was at the pointer.
+///
+/// If multiple threads call `AtomicCompareAndSwap` simultaneously, the result will be consistent
+/// as if one was called before the other (although it is indeterminate which will be applied
+/// first).
+///
+template <typename T>
+VTKM_EXEC_CONT inline T AtomicCompareAndSwap(
+  T* pointer,
+  T desired,
+  T expected,
+  vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
+{
+  return detail::AtomicCompareAndSwapImpl(pointer, desired, expected, order);
+}
+
+} // namespace vtkm
+
+#endif //vtk_m_Atomic_h
--- a/vtkm/CMakeLists.txt
+++ b/vtkm/CMakeLists.txt
@ -19,6 +19,7 @@ vtkm_install_headers(
 set(headers
  Algorithms.h
  Assert.h
+  Atomic.h
  BinaryPredicates.h
  BinaryOperators.h
  Bitset.h
--- a/vtkm/CellShape.h
+++ b/vtkm/CellShape.h
@ -79,8 +79,8 @@ struct CellShapeTagVtkmToVtkc;
 /// concept check to make sure that a template argument is a proper cell shape
 /// tag.
 ///
-#define VTKM_IS_CELL_SHAPE_TAG(tag)                                                                \
-  VTKM_STATIC_ASSERT_MSG(::vtkm::internal::CellShapeTagCheck<tag>::value,                          \
+#define VTKM_IS_CELL_SHAPE_TAG(tag)                                       \
+  VTKM_STATIC_ASSERT_MSG(::vtkm::internal::CellShapeTagCheck<tag>::value, \
                         "Provided type is not a valid VTK-m cell shape tag.")

 /// A traits-like class to get an CellShapeId known at compile time to a tag.
@ -98,32 +98,32 @@ struct CellShapeIdToTag
 // Define a tag for each cell shape as well as the support structs to go
 // between tags and ids. The following macro is only valid here.

-#define VTKM_DEFINE_CELL_TAG(name, idname)                                                         \
-  struct CellShapeTag##name                                                                        \
-  {                                                                                                \
-    static constexpr vtkm::UInt8 Id = vtkm::idname;                                                \
-  };                                                                                               \
-  namespace internal                                                                               \
-  {                                                                                                \
-  template <>                                                                                      \
-  struct CellShapeTagCheck<vtkm::CellShapeTag##name> : std::true_type                              \
-  {                                                                                                \
-  };                                                                                               \
-  template <>                                                                                      \
-  struct CellShapeTagVtkmToVtkc<vtkm::CellShapeTag##name>                                          \
-  {                                                                                                \
-    using Type = lcl::name;                                                                        \
-  };                                                                                               \
-  }                                                                                                \
-  static inline VTKM_EXEC_CONT const char* GetCellShapeName(vtkm::CellShapeTag##name)              \
-  {                                                                                                \
-    return #name;                                                                                  \
-  }                                                                                                \
-  template <>                                                                                      \
-  struct CellShapeIdToTag<vtkm::idname>                                                            \
-  {                                                                                                \
-    using valid = std::true_type;                                                                  \
-    using Tag = vtkm::CellShapeTag##name;                                                          \
+#define VTKM_DEFINE_CELL_TAG(name, idname)                                            \
+  struct CellShapeTag##name                                                           \
+  {                                                                                   \
+    static constexpr vtkm::UInt8 Id = vtkm::idname;                                   \
+  };                                                                                  \
+  namespace internal                                                                  \
+  {                                                                                   \
+  template <>                                                                         \
+  struct CellShapeTagCheck<vtkm::CellShapeTag##name> : std::true_type                 \
+  {                                                                                   \
+  };                                                                                  \
+  template <>                                                                         \
+  struct CellShapeTagVtkmToVtkc<vtkm::CellShapeTag##name>                             \
+  {                                                                                   \
+    using Type = lcl::name;                                                           \
+  };                                                                                  \
+  }                                                                                   \
+  static inline VTKM_EXEC_CONT const char* GetCellShapeName(vtkm::CellShapeTag##name) \
+  {                                                                                   \
+    return #name;                                                                     \
+  }                                                                                   \
+  template <>                                                                         \
+  struct CellShapeIdToTag<vtkm::idname>                                               \
+  {                                                                                   \
+    using valid = std::true_type;                                                     \
+    using Tag = vtkm::CellShapeTag##name;                                             \
  }

 VTKM_DEFINE_CELL_TAG(Empty, CELL_SHAPE_EMPTY);
@ -189,12 +189,12 @@ inline lcl::Cell make_LclCellShapeTag(const vtkm::CellShapeTagGeneric& tag,

 } // namespace internal

-#define vtkmGenericCellShapeMacroCase(cellShapeId, call)                                           \
-  case vtkm::cellShapeId:                                                                          \
-  {                                                                                                \
-    using CellShapeTag = vtkm::CellShapeIdToTag<vtkm::cellShapeId>::Tag;                           \
-    call;                                                                                          \
-  }                                                                                                \
+#define vtkmGenericCellShapeMacroCase(cellShapeId, call)                 \
+  case vtkm::cellShapeId:                                                \
+  {                                                                      \
+    using CellShapeTag = vtkm::CellShapeIdToTag<vtkm::cellShapeId>::Tag; \
+    call;                                                                \
+  }                                                                      \
  break

 /// \brief A macro used in a \c switch statement to determine cell shape.
@ -227,17 +227,17 @@ inline lcl::Cell make_LclCellShapeTag(const vtkm::CellShapeTagGeneric& tag,
 /// Note that \c vtkmGenericCellShapeMacro does not have a default case. You
 /// should consider adding one that gives a
 ///
-#define vtkmGenericCellShapeMacro(call)                                                            \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_EMPTY, call);                                           \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_VERTEX, call);                                          \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_LINE, call);                                            \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_POLY_LINE, call);                                       \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_TRIANGLE, call);                                        \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_POLYGON, call);                                         \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_QUAD, call);                                            \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_TETRA, call);                                           \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_HEXAHEDRON, call);                                      \
-  vtkmGenericCellShapeMacroCase(CELL_SHAPE_WEDGE, call);                                           \
+#define vtkmGenericCellShapeMacro(call)                       \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_EMPTY, call);      \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_VERTEX, call);     \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_LINE, call);       \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_POLY_LINE, call);  \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_TRIANGLE, call);   \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_POLYGON, call);    \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_QUAD, call);       \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_TETRA, call);      \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_HEXAHEDRON, call); \
+  vtkmGenericCellShapeMacroCase(CELL_SHAPE_WEDGE, call);      \
  vtkmGenericCellShapeMacroCase(CELL_SHAPE_PYRAMID, call)

 } // namespace vtkm
--- a/vtkm/CellTraits.h
+++ b/vtkm/CellTraits.h
@ -81,23 +81,23 @@ struct CellTraits

 // Define traits for every cell type.

-#define VTKM_DEFINE_CELL_TRAITS(name, dimensions, numPoints)                                       \
-  template <>                                                                                      \
-  struct CellTraits<vtkm::CellShapeTag##name>                                                      \
-  {                                                                                                \
-    static constexpr vtkm::IdComponent TOPOLOGICAL_DIMENSIONS = dimensions;                        \
-    using TopologicalDimensionsTag = vtkm::CellTopologicalDimensionsTag<TOPOLOGICAL_DIMENSIONS>;   \
-    using IsSizeFixed = vtkm::CellTraitsTagSizeFixed;                                              \
-    static constexpr vtkm::IdComponent NUM_POINTS = numPoints;                                     \
+#define VTKM_DEFINE_CELL_TRAITS(name, dimensions, numPoints)                                     \
+  template <>                                                                                    \
+  struct CellTraits<vtkm::CellShapeTag##name>                                                    \
+  {                                                                                              \
+    static constexpr vtkm::IdComponent TOPOLOGICAL_DIMENSIONS = dimensions;                      \
+    using TopologicalDimensionsTag = vtkm::CellTopologicalDimensionsTag<TOPOLOGICAL_DIMENSIONS>; \
+    using IsSizeFixed = vtkm::CellTraitsTagSizeFixed;                                            \
+    static constexpr vtkm::IdComponent NUM_POINTS = numPoints;                                   \
  }

-#define VTKM_DEFINE_CELL_TRAITS_VARIABLE(name, dimensions)                                         \
-  template <>                                                                                      \
-  struct CellTraits<vtkm::CellShapeTag##name>                                                      \
-  {                                                                                                \
-    static constexpr vtkm::IdComponent TOPOLOGICAL_DIMENSIONS = dimensions;                        \
-    using TopologicalDimensionsTag = vtkm::CellTopologicalDimensionsTag<TOPOLOGICAL_DIMENSIONS>;   \
-    using IsSizeFixed = vtkm::CellTraitsTagSizeVariable;                                           \
+#define VTKM_DEFINE_CELL_TRAITS_VARIABLE(name, dimensions)                                       \
+  template <>                                                                                    \
+  struct CellTraits<vtkm::CellShapeTag##name>                                                    \
+  {                                                                                              \
+    static constexpr vtkm::IdComponent TOPOLOGICAL_DIMENSIONS = dimensions;                      \
+    using TopologicalDimensionsTag = vtkm::CellTopologicalDimensionsTag<TOPOLOGICAL_DIMENSIONS>; \
+    using IsSizeFixed = vtkm::CellTraitsTagSizeVariable;                                         \
  }

 VTKM_DEFINE_CELL_TRAITS(Empty, 0, 0);
--- a/vtkm/Deprecated.h
+++ b/vtkm/Deprecated.h
@ -13,9 +13,9 @@
 #include <vtkm/StaticAssert.h>
 #include <vtkm/Types.h>

-#define VTK_M_DEPRECATED_MAKE_MESSAGE(...)                                                         \
+#define VTK_M_DEPRECATED_MAKE_MESSAGE(...) \
  VTKM_EXPAND(VTK_M_DEPRECATED_MAKE_MESSAGE_IMPL(__VA_ARGS__, "", vtkm::internal::NullType{}))
-#define VTK_M_DEPRECATED_MAKE_MESSAGE_IMPL(version, message, ...)                                  \
+#define VTK_M_DEPRECATED_MAKE_MESSAGE_IMPL(version, message, ...) \
  message " Deprecated in version " #version "."

 /// \def VTKM_DEPRECATED(version, message)
@ -104,7 +104,7 @@
 #if defined(VTKM_GCC) || defined(VTKM_CLANG)

 #define VTKM_DEPRECATED_SUPPRESS_SUPPORTED
-#define VTKM_DEPRECATED_SUPPRESS_BEGIN                                                             \
+#define VTKM_DEPRECATED_SUPPRESS_BEGIN \
  _Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
 #define VTKM_DEPRECATED_SUPPRESS_END _Pragma("GCC diagnostic pop")

--- a/vtkm/ErrorCode.h
+++ b/vtkm/ErrorCode.h
@ -108,14 +108,14 @@ VTKM_EXEC_CONT inline vtkm::ErrorCode LclErrorToVtkmError(lcl::ErrorCode code) n

 } // namespace vtkm

-#define VTKM_RETURN_ON_ERROR(call)                                                                 \
-  do                                                                                               \
-  {                                                                                                \
-    auto status = (call);                                                                          \
-    if (status != ::vtkm::ErrorCode::Success)                                                      \
-    {                                                                                              \
-      return status;                                                                               \
-    }                                                                                              \
+#define VTKM_RETURN_ON_ERROR(call)            \
+  do                                          \
+  {                                           \
+    auto status = (call);                     \
+    if (status != ::vtkm::ErrorCode::Success) \
+    {                                         \
+      return status;                          \
+    }                                         \
  } while (false)

 #endif //vtk_m_exec_ErrorCode_h
--- a/vtkm/ImplicitFunction.h
+++ b/vtkm/ImplicitFunction.h
@ -665,22 +665,18 @@ private:

 } // namespace vtkm

-#ifdef VTKM_CUDA
-
 // Cuda seems to have a bug where it expects the template class VirtualObjectTransfer
 // to be instantiated in a consistent order among all the translation units of an
 // executable. Failing to do so results in random crashes and incorrect results.
 // We workaroud this issue by explicitly instantiating VirtualObjectTransfer for
 // all the implicit functions here.
-
-#include <vtkm/cont/cuda/internal/VirtualObjectTransferCuda.h>
-
+#ifdef VTKM_CUDA
+#include <vtkm/cont/internal/VirtualObjectTransferInstantiate.h>
 VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Box);
 VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Cylinder);
 VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Frustum);
 VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Plane);
 VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Sphere);
-
 #endif

 #endif //vtk_m_ImplicitFunction_h
--- a/vtkm/List.h
+++ b/vtkm/List.h
@ -57,8 +57,8 @@ using IsList = typename vtkm::internal::IsListImpl<T>::type;
 /// actually a device adapter tag. (You can get weird errors elsewhere in the
 /// code when a mistake is made.)
 ///
-#define VTKM_IS_LIST(type)                                                                         \
-  VTKM_STATIC_ASSERT_MSG((::vtkm::internal::IsList<type>::value),                                  \
+#define VTKM_IS_LIST(type)                                        \
+  VTKM_STATIC_ASSERT_MSG((::vtkm::internal::IsList<type>::value), \
                         "Provided type is not a valid VTK-m list type.")

 namespace detail
@ -226,8 +226,7 @@ template <vtkm::IdComponent NumSearched,
          typename... Ts>
 struct FindFirstOfType<NumSearched, Target, T0, T1, T2, T3, T4, T5, Ts...>
  : FindFirstOfSplit4<(std::is_same<Target, T0>::value || std::is_same<Target, T1>::value ||
-                       std::is_same<Target, T2>::value ||
-                       std::is_same<Target, T3>::value),
+                       std::is_same<Target, T2>::value || std::is_same<Target, T3>::value),
                      NumSearched,
                      Target,
                      T0,
@ -257,8 +256,7 @@ template <vtkm::IdComponent NumSearched,
          typename... Ts>
 struct FindFirstOfSplit8<true, NumSearched, Target, T0, T1, T2, T3, T4, T5, T6, T7, Ts...>
  : FindFirstOfSplit4<(std::is_same<Target, T0>::value || std::is_same<Target, T1>::value ||
-                       std::is_same<Target, T2>::value ||
-                       std::is_same<Target, T3>::value),
+                       std::is_same<Target, T2>::value || std::is_same<Target, T3>::value),
                      NumSearched,
                      Target,
                      T0,
@ -305,12 +303,9 @@ template <vtkm::IdComponent NumSearched,
          typename... Ts>
 struct FindFirstOfType<NumSearched, Target, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, Ts...>
  : FindFirstOfSplit8<(std::is_same<Target, T0>::value || std::is_same<Target, T1>::value ||
-                       std::is_same<Target, T2>::value ||
-                       std::is_same<Target, T3>::value ||
-                       std::is_same<Target, T4>::value ||
-                       std::is_same<Target, T5>::value ||
-                       std::is_same<Target, T6>::value ||
-                       std::is_same<Target, T7>::value),
+                       std::is_same<Target, T2>::value || std::is_same<Target, T3>::value ||
+                       std::is_same<Target, T4>::value || std::is_same<Target, T5>::value ||
+                       std::is_same<Target, T6>::value || std::is_same<Target, T7>::value),
                      NumSearched,
                      Target,
                      T0,
--- a/vtkm/ListTag.h
+++ b/vtkm/ListTag.h
@ -103,8 +103,8 @@ struct VTKM_DEPRECATED(1.6, "VTKM_IS_LIST_TAG replaced with VTKM_IS_LIST.") List
 /// actually a device adapter tag. (You can get weird errors elsewhere in the
 /// code when a mistake is made.)
 ///
-#define VTKM_IS_LIST_TAG(tag)                                                                      \
-  VTKM_STATIC_ASSERT_MSG((::vtkm::detail::ListTagAssert<tag>::value),                              \
+#define VTKM_IS_LIST_TAG(tag)                                         \
+  VTKM_STATIC_ASSERT_MSG((::vtkm::detail::ListTagAssert<tag>::value), \
                         "Provided type is not a valid VTK-m list tag.")

 namespace internal
--- a/vtkm/Particle.h
+++ b/vtkm/Particle.h
@ -13,6 +13,7 @@
 #include <vtkm/Bitset.h>
 #include <vtkm/VecVariable.h>
 #include <vtkm/VectorAnalysis.h>
+#include <vtkm/cont/Serialization.h>

 namespace vtkm
 {
@ -64,24 +65,24 @@ inline VTKM_CONT std::ostream& operator<<(std::ostream& s, const vtkm::ParticleS
  return s;
 }

-class Particle
+class ParticleBase
 {
 public:
  VTKM_EXEC_CONT
-  Particle() {}
+  ParticleBase() {}

-  VTKM_EXEC_CONT virtual ~Particle() noexcept
+  VTKM_EXEC_CONT virtual ~ParticleBase() noexcept
  {
    // This must not be defaulted, since defaulted virtual destructors are
    // troublesome with CUDA __host__ __device__ markup.
  }

  VTKM_EXEC_CONT
-  Particle(const vtkm::Vec3f& p,
-           const vtkm::Id& id,
-           const vtkm::Id& numSteps = 0,
-           const vtkm::ParticleStatus& status = vtkm::ParticleStatus(),
-           const vtkm::FloatDefault& time = 0)
+  ParticleBase(const vtkm::Vec3f& p,
+               const vtkm::Id& id,
+               const vtkm::Id& numSteps = 0,
+               const vtkm::ParticleStatus& status = vtkm::ParticleStatus(),
+               const vtkm::FloatDefault& time = 0)
    : Pos(p)
    , ID(id)
    , NumSteps(numSteps)
@ -91,7 +92,7 @@ public:
  }

  VTKM_EXEC_CONT
-  Particle(const vtkm::Particle& p)
+  ParticleBase(const vtkm::ParticleBase& p)
    : Pos(p.Pos)
    , ID(p.ID)
    , NumSteps(p.NumSteps)
@ -100,7 +101,7 @@ public:
  {
  }

-  vtkm::Particle& operator=(const vtkm::Particle&) = default;
+  vtkm::ParticleBase& operator=(const vtkm::ParticleBase&) = default;

  // The basic particle is only meant to be advected in a velocity
  // field. In that case it is safe to assume that the velocity value
@ -122,13 +123,13 @@ public:
  vtkm::FloatDefault Time = 0;
 };

-class Massless : public vtkm::Particle
+class Particle : public vtkm::ParticleBase
 {
 public:
  VTKM_EXEC_CONT
-  Massless() {}
+  Particle() {}

-  VTKM_EXEC_CONT ~Massless() noexcept override
+  VTKM_EXEC_CONT ~Particle() noexcept override
  {
    // This must not be defaulted, since defaulted virtual destructors are
    // troublesome with CUDA __host__ __device__ markup.
@ -136,21 +137,15 @@ public:


  VTKM_EXEC_CONT
-  Massless(const vtkm::Vec3f& p,
+  Particle(const vtkm::Vec3f& p,
           const vtkm::Id& id,
           const vtkm::Id& numSteps = 0,
           const vtkm::ParticleStatus& status = vtkm::ParticleStatus(),
           const vtkm::FloatDefault& time = 0)
-    : Particle(p, id, numSteps, status, time)
+    : ParticleBase(p, id, numSteps, status, time)
  {
  }

-  /*VTKM_EXEC_CONT
-  Massless(const vtkm::Massless& p)
-    : Particle(p)
-  {
-  }*/
-
  VTKM_EXEC_CONT
  vtkm::Vec3f Next(const vtkm::VecVariable<vtkm::Vec3f, 2>& vectors,
                   const vtkm::FloatDefault& length) override
@ -170,7 +165,7 @@ public:
  }
 };

-class Electron : public vtkm::Particle
+class Electron : public vtkm::ParticleBase
 {
 public:
  VTKM_EXEC_CONT
@ -186,7 +181,7 @@ public:
           const vtkm::Id& numSteps = 0,
           const vtkm::ParticleStatus& status = vtkm::ParticleStatus(),
           const vtkm::FloatDefault& time = 0)
-    : Particle(position, id, numSteps, status, time)
+    : ParticleBase(position, id, numSteps, status, time)
    , Mass(mass)
    , Charge(charge)
    , Weighting(weighting)
@ -254,8 +249,68 @@ private:
  vtkm::Vec3f Momentum;
  constexpr static vtkm::FloatDefault SPEED_OF_LIGHT =
    static_cast<vtkm::FloatDefault>(2.99792458e8);
+
+  friend struct mangled_diy_namespace::Serialization<vtkm::Electron>;
 };

 } //namespace vtkm

+
+namespace mangled_diy_namespace
+{
+template <>
+struct Serialization<vtkm::Particle>
+{
+public:
+  static VTKM_CONT void save(BinaryBuffer& bb, const vtkm::Particle& p)
+  {
+    vtkmdiy::save(bb, p.Pos);
+    vtkmdiy::save(bb, p.ID);
+    vtkmdiy::save(bb, p.NumSteps);
+    vtkmdiy::save(bb, p.Status);
+    vtkmdiy::save(bb, p.Time);
+  }
+
+  static VTKM_CONT void load(BinaryBuffer& bb, vtkm::Particle& p)
+  {
+    vtkmdiy::load(bb, p.Pos);
+    vtkmdiy::load(bb, p.ID);
+    vtkmdiy::load(bb, p.NumSteps);
+    vtkmdiy::load(bb, p.Status);
+    vtkmdiy::load(bb, p.Time);
+  }
+};
+
+template <>
+struct Serialization<vtkm::Electron>
+{
+public:
+  static VTKM_CONT void save(BinaryBuffer& bb, const vtkm::Electron& e)
+  {
+    vtkmdiy::save(bb, e.Pos);
+    vtkmdiy::save(bb, e.ID);
+    vtkmdiy::save(bb, e.NumSteps);
+    vtkmdiy::save(bb, e.Status);
+    vtkmdiy::save(bb, e.Time);
+    vtkmdiy::save(bb, e.Mass);
+    vtkmdiy::save(bb, e.Charge);
+    vtkmdiy::save(bb, e.Weighting);
+    vtkmdiy::save(bb, e.Momentum);
+  }
+
+  static VTKM_CONT void load(BinaryBuffer& bb, vtkm::Electron& e)
+  {
+    vtkmdiy::load(bb, e.Pos);
+    vtkmdiy::load(bb, e.ID);
+    vtkmdiy::load(bb, e.NumSteps);
+    vtkmdiy::load(bb, e.Status);
+    vtkmdiy::load(bb, e.Time);
+    vtkmdiy::load(bb, e.Mass);
+    vtkmdiy::load(bb, e.Charge);
+    vtkmdiy::load(bb, e.Weighting);
+    vtkmdiy::load(bb, e.Momentum);
+  }
+};
+}
+
 #endif // vtk_m_Particle_h
--- a/vtkm/StaticAssert.h
+++ b/vtkm/StaticAssert.h
@ -13,7 +13,7 @@

 #include <type_traits>

-#define VTKM_STATIC_ASSERT(condition)                                                              \
+#define VTKM_STATIC_ASSERT(condition) \
  static_assert((condition), "Failed static assert: " #condition)
 #define VTKM_STATIC_ASSERT_MSG(condition, message) static_assert((condition), message)

@ -30,7 +30,7 @@ struct ReadTheSourceCodeHereForHelpOnThisError<true> : std::true_type

 } // namespace vtkm

-#define VTKM_READ_THE_SOURCE_CODE_FOR_HELP(noError)                                                \
+#define VTKM_READ_THE_SOURCE_CODE_FOR_HELP(noError) \
  VTKM_STATIC_ASSERT(vtkm::ReadTheSourceCodeHereForHelpOnThisError<noError>::value)

 #endif //vtk_m_StaticAssert_h
--- a/vtkm/TopologyElementTag.h
+++ b/vtkm/TopologyElementTag.h
@ -89,8 +89,8 @@ struct TopologyElementTagCheck<vtkm::TopologyElementTagFace> : std::true_type
 {
 };

-#define VTKM_IS_TOPOLOGY_ELEMENT_TAG(type)                                                         \
-  static_assert(::vtkm::internal::TopologyElementTagCheck<type>::value,                            \
+#define VTKM_IS_TOPOLOGY_ELEMENT_TAG(type)                              \
+  static_assert(::vtkm::internal::TopologyElementTagCheck<type>::value, \
                "Invalid Topology Element Tag being used")

 } // namespace internal
--- a/vtkm/TypeTraits.h
+++ b/vtkm/TypeTraits.h
@ -82,26 +82,26 @@ struct TypeTraits<const T> : TypeTraits<T>
 {
 };

-#define VTKM_BASIC_REAL_TYPE(T)                                                                    \
-  template <>                                                                                      \
-  struct TypeTraits<T>                                                                             \
-  {                                                                                                \
-    using NumericTag = TypeTraitsRealTag;                                                          \
-    using DimensionalityTag = TypeTraitsScalarTag;                                                 \
-    VTKM_EXEC_CONT static T ZeroInitialization() { return T(); }                                   \
+#define VTKM_BASIC_REAL_TYPE(T)                                  \
+  template <>                                                    \
+  struct TypeTraits<T>                                           \
+  {                                                              \
+    using NumericTag = TypeTraitsRealTag;                        \
+    using DimensionalityTag = TypeTraitsScalarTag;               \
+    VTKM_EXEC_CONT static T ZeroInitialization() { return T(); } \
  };

-#define VTKM_BASIC_INTEGER_TYPE(T)                                                                 \
-  template <>                                                                                      \
-  struct TypeTraits<T>                                                                             \
-  {                                                                                                \
-    using NumericTag = TypeTraitsIntegerTag;                                                       \
-    using DimensionalityTag = TypeTraitsScalarTag;                                                 \
-    VTKM_EXEC_CONT static T ZeroInitialization()                                                   \
-    {                                                                                              \
-      using ReturnType = T;                                                                        \
-      return ReturnType();                                                                         \
-    }                                                                                              \
+#define VTKM_BASIC_INTEGER_TYPE(T)                 \
+  template <>                                      \
+  struct TypeTraits<T>                             \
+  {                                                \
+    using NumericTag = TypeTraitsIntegerTag;       \
+    using DimensionalityTag = TypeTraitsScalarTag; \
+    VTKM_EXEC_CONT static T ZeroInitialization()   \
+    {                                              \
+      using ReturnType = T;                        \
+      return ReturnType();                         \
+    }                                              \
  };

 /// Traits for basic C++ types.
--- a/vtkm/Types.h
+++ b/vtkm/Types.h
@ -1558,11 +1558,11 @@ static inline VTKM_EXEC_CONT typename detail::DotType<T>::type Dot(const vtkm::V
 }
 // Integer types of a width less than an integer get implicitly casted to
 // an integer when doing a multiplication.
-#define VTK_M_SCALAR_DOT(stype)                                                                    \
-  static inline VTKM_EXEC_CONT detail::DotType<stype>::type dot(stype a, stype b)                  \
-  {                                                                                                \
-    return a * b;                                                                                  \
-  } /* LEGACY */                                                                                   \
+#define VTK_M_SCALAR_DOT(stype)                                                   \
+  static inline VTKM_EXEC_CONT detail::DotType<stype>::type dot(stype a, stype b) \
+  {                                                                               \
+    return a * b;                                                                 \
+  } /* LEGACY */                                                                  \
  static inline VTKM_EXEC_CONT detail::DotType<stype>::type Dot(stype a, stype b) { return a * b; }
 VTK_M_SCALAR_DOT(vtkm::Int8)
 VTK_M_SCALAR_DOT(vtkm::UInt8)
--- a/vtkm/VecTraits.h
+++ b/vtkm/VecTraits.h
@ -556,13 +556,13 @@ struct VTKM_NEVER_EXPORT VecTraits<vtkm::Pair<T, U>>

 } // anonymous namespace

-#define VTKM_BASIC_TYPE_VECTOR(type)                                                               \
-  namespace vtkm                                                                                   \
-  {                                                                                                \
-  template <>                                                                                      \
-  struct VTKM_NEVER_EXPORT VecTraits<type> : public vtkm::internal::VecTraitsBasic<type>           \
-  {                                                                                                \
-  };                                                                                               \
+#define VTKM_BASIC_TYPE_VECTOR(type)                                                     \
+  namespace vtkm                                                                         \
+  {                                                                                      \
+  template <>                                                                            \
+  struct VTKM_NEVER_EXPORT VecTraits<type> : public vtkm::internal::VecTraitsBasic<type> \
+  {                                                                                      \
+  };                                                                                     \
  }

 /// Allows you to treat basic types as if they were vectors.
--- a/vtkm/cont/ArrayCopy.h
+++ b/vtkm/cont/ArrayCopy.h
@ -16,6 +16,8 @@
 #include <vtkm/cont/ErrorExecution.h>
 #include <vtkm/cont/Logging.h>

+#include <vtkm/cont/vtkm_cont_export.h>
+
 // TODO: When virtual arrays are available, compile the implementation in a .cxx/.cu file. Common
 // arrays are copied directly but anything else would be copied through virtual methods.

@ -27,12 +29,13 @@ namespace cont
 namespace detail
 {

-// normal element-wise copy:
+// Element-wise copy.
+// TODO: Remove last argument once ArryHandleNewStyle becomes ArrayHandle
 template <typename InArrayType, typename OutArrayType>
-void ArrayCopyImpl(const InArrayType& in, OutArrayType& out, std::false_type /* Copy storage */)
+void ArrayCopyWithAlgorithm(const InArrayType& source, OutArrayType& destination)
 {
  // Find the device that already has a copy of the data:
-  vtkm::cont::DeviceAdapterId devId = in.GetDeviceAdapterId();
+  vtkm::cont::DeviceAdapterId devId = source.GetDeviceAdapterId();

  // If the data is not on any device, let the runtime tracker pick an available
  // parallel copy algorithm.
@ -41,14 +44,14 @@ void ArrayCopyImpl(const InArrayType& in, OutArrayType& out, std::false_type /*
    devId = vtkm::cont::make_DeviceAdapterId(VTKM_DEVICE_ADAPTER_ANY);
  }

-  bool success = vtkm::cont::Algorithm::Copy(devId, in, out);
+  bool success = vtkm::cont::Algorithm::Copy(devId, source, destination);

  if (!success && devId.GetValue() != VTKM_DEVICE_ADAPTER_ANY)
  { // Retry on any device if the first attempt failed.
    VTKM_LOG_S(vtkm::cont::LogLevel::Error,
               "Failed to run ArrayCopy on device '" << devId.GetName()
                                                     << "'. Retrying on any device.");
-    success = vtkm::cont::Algorithm::Copy(vtkm::cont::DeviceAdapterTagAny{}, in, out);
+    success = vtkm::cont::Algorithm::Copy(vtkm::cont::DeviceAdapterTagAny{}, source, destination);
  }

  if (!success)
@ -57,9 +60,17 @@ void ArrayCopyImpl(const InArrayType& in, OutArrayType& out, std::false_type /*
  }
 }

+// TODO: Remove last argument once ArryHandleNewStyle becomes ArrayHandle
+template <typename InArrayType, typename OutArrayType>
+void ArrayCopyOldImpl(const InArrayType& in, OutArrayType& out, std::false_type /* Copy storage */)
+{
+  ArrayCopyWithAlgorithm(in, out);
+}
+
 // Copy storage for implicit arrays, must be of same type:
+// TODO: This will go away once ArrayHandleNewStyle becomes ArrayHandle.
 template <typename ArrayType>
-void ArrayCopyImpl(const ArrayType& in, ArrayType& out, std::true_type /* Copy storage */)
+void ArrayCopyOldImpl(const ArrayType& in, ArrayType& out, std::true_type /* Copy storage */)
 {
  // This is only called if in/out are the same type and the handle is not
  // writable. This allows read-only implicit array handles to be copied.
@ -67,6 +78,39 @@ void ArrayCopyImpl(const ArrayType& in, ArrayType& out, std::true_type /* Copy s
  out = ArrayType(newStorage);
 }

+// TODO: This will go away once ArrayHandleNewStyle becomes ArrayHandle.
+template <typename InArrayType, typename OutArrayType>
+VTKM_CONT void ArrayCopyImpl(const InArrayType& source,
+                             OutArrayType& destination,
+                             std::false_type /* New style */)
+{
+  using SameTypes = std::is_same<InArrayType, OutArrayType>;
+  using IsWritable = vtkm::cont::internal::IsWritableArrayHandle<OutArrayType>;
+  using JustCopyStorage = std::integral_constant<bool, SameTypes::value && !IsWritable::value>;
+  ArrayCopyOldImpl(source, destination, JustCopyStorage{});
+}
+
+// TODO: ArrayHandleNewStyle will eventually become ArrayHandle, in which case this
+// will become ArrayCopyWithAlgorithm
+template <typename T1, typename S1, typename T2, typename S2>
+VTKM_CONT void ArrayCopyImpl(const vtkm::cont::ArrayHandle<T1, S1>& source,
+                             vtkm::cont::ArrayHandle<T2, S2>& destination,
+                             std::true_type /* New style */)
+{
+  VTKM_STATIC_ASSERT((!std::is_same<T1, T2>::value || !std::is_same<S1, S2>::value));
+  ArrayCopyWithAlgorithm(source, destination);
+}
+
+// TODO: ArrayHandleNewStyle will eventually become ArrayHandle, in which case this
+// will become the only version with the same array types.
+template <typename T, typename S>
+VTKM_CONT void ArrayCopyImpl(const vtkm::cont::ArrayHandle<T, S>& source,
+                             vtkm::cont::ArrayHandle<T, S>& destination,
+                             std::true_type /* New style */)
+{
+  source.DeepCopy(destination);
+}
+
 } // namespace detail

 /// \brief Does a deep copy from one array to another array.
@ -93,11 +137,11 @@ VTKM_CONT void ArrayCopy(const vtkm::cont::ArrayHandle<InValueType, InStorage>&
  using IsWritable = vtkm::cont::internal::IsWritableArrayHandle<OutArrayType>;

  // There are three cases handled here:
-  // 1. Output is writable:
-  //    -> Do element-wise copy (normal copy behavior)
-  // 2. Output is not writable and arrays are same type:
-  //    -> just copy storage (special case for implicit array cloning)
-  // 3. Output is not writable and arrays are different types:
+  // 1. The arrays are the same type:
+  //    -> Deep copy the buffers and the Storage object
+  // 2. The arrays are different and the output is writable:
+  //    -> Do element-wise copy
+  // 3. The arrays are different and the output is not writable:
  //    -> fail (cannot copy)

  // Give a nice error message for case 3:
@ -105,10 +149,11 @@ VTKM_CONT void ArrayCopy(const vtkm::cont::ArrayHandle<InValueType, InStorage>&
                         "Cannot copy to a read-only array with a different "
                         "type than the source.");

-  using JustCopyStorage = std::integral_constant<bool, SameTypes::value && !IsWritable::value>;
+  using IsNewStyle =
+    std::is_base_of<vtkm::cont::ArrayHandleNewStyle<InValueType, InStorage>, InArrayType>;

  // Static dispatch cases 1 & 2
-  detail::ArrayCopyImpl(source, destination, JustCopyStorage{});
+  detail::ArrayCopyImpl(source, destination, std::integral_constant<bool, IsNewStyle::value>{});
 }

 // Forward declaration
--- a/vtkm/cont/ArrayGetValues.h
+++ b/vtkm/cont/ArrayGetValues.h
@ -101,8 +101,7 @@ VTKM_CONT void ArrayGetValues(const vtkm::cont::ArrayHandle<vtkm::Id, SIds>& ids
    { // Retry on any device if the first attempt failed.
      VTKM_LOG_S(vtkm::cont::LogLevel::Error,
                 "Failed to run ArrayGetValues on device '"
-                   << devId.GetName()
-                   << "'. Falling back to control-side copy.");
+                   << devId.GetName() << "'. Falling back to control-side copy.");
      copyComplete = vtkm::cont::Algorithm::Copy(vtkm::cont::DeviceAdapterTagAny{}, input, output);
    }
  }
--- a/vtkm/cont/ArrayHandle.cxx
+++ b/vtkm/cont/ArrayHandle.cxx
@ -24,9 +24,7 @@ VTKM_CONT void ArrayHandleReleaseResourcesExecution(

  for (auto&& buf : buffers)
  {
-    // Getting a write host buffer should invalidate any execution arrays.
-    // Might want to make something more explicit in Buffer.
-    buf.WritePointerHost(token);
+    buf.ReleaseDeviceResources();
  }
 }

--- a/vtkm/cont/ArrayHandle.h
+++ b/vtkm/cont/ArrayHandle.h
@ -135,7 +135,7 @@ struct ArrayHandleCheck
  using type = typename std::is_base_of<::vtkm::cont::internal::ArrayHandleBase, U>::type;
 };

-#define VTKM_IS_ARRAY_HANDLE(T)                                                                    \
+#define VTKM_IS_ARRAY_HANDLE(T) \
  VTKM_STATIC_ASSERT(::vtkm::cont::internal::ArrayHandleCheck<T>::type::value)

 } // namespace internal
@ -173,7 +173,10 @@ struct GetTypeInParentheses<void(T)>
  }                                                                                                \
                                                                                                   \
  VTKM_CONT                                                                                        \
-  classname(Thisclass&& src) noexcept : Superclass(std::move(src)) {}                              \
+  classname(Thisclass&& src) noexcept                                                              \
+    : Superclass(std::move(src))                                                                   \
+  {                                                                                                \
+  }                                                                                                \
                                                                                                   \
  VTKM_CONT                                                                                        \
  classname(const vtkm::cont::ArrayHandle<typename__ Superclass::ValueType,                        \
@ -227,7 +230,7 @@ struct GetTypeInParentheses<void(T)>
 /// templated. For ArrayHandle sublcasses that are not templates, use
 /// VTKM_ARRAY_HANDLE_SUBCLASS_NT.
 ///
-#define VTKM_ARRAY_HANDLE_SUBCLASS(classname, fullclasstype, superclass)                           \
+#define VTKM_ARRAY_HANDLE_SUBCLASS(classname, fullclasstype, superclass) \
  VTK_M_ARRAY_HANDLE_SUBCLASS_IMPL(classname, fullclasstype, superclass, typename)

 /// \brief Macro to make default methods in ArrayHandle subclasses.
@ -250,7 +253,7 @@ struct GetTypeInParentheses<void(T)>
 /// templated. For ArrayHandle sublcasses that are templates, use
 /// VTKM_ARRAY_HANDLE_SUBCLASS.
 ///
-#define VTKM_ARRAY_HANDLE_SUBCLASS_NT(classname, superclass)                                       \
+#define VTKM_ARRAY_HANDLE_SUBCLASS_NT(classname, superclass) \
  VTK_M_ARRAY_HANDLE_SUBCLASS_IMPL(classname, (classname), superclass, )

 /// \brief Manages an array-worth of data.
@ -892,6 +895,87 @@ VTKM_CONT_EXPORT VTKM_CONT vtkm::cont::DeviceAdapterId ArrayHandleGetDeviceAdapt

 } // namespace detail

+// This macro is used to declare an ArrayHandle that uses the new style of Storage
+// that leverages Buffer objects. This macro will go away once ArrayHandle
+// is replaced with ArrayHandleNewStyle. To use this macro, first have a declaration
+// of the template and then put the macro like this:
+//
+// template <typename T>
+// VTKM_ARRAY_HANDLE_NEW_STYLE(T, vtkm::cont::StorageTagFoo);
+//
+// Don't forget to use VTKM_PASS_COMMAS if one of the macro arguments contains
+// a template with multiple parameters.
+#define VTKM_ARRAY_HANDLE_NEW_STYLE(ValueType_, StorageType_)                       \
+  class VTKM_ALWAYS_EXPORT ArrayHandle<ValueType_, StorageType_>                    \
+    : public ArrayHandleNewStyle<ValueType_, StorageType_>                          \
+  {                                                                                 \
+    using Superclass = ArrayHandleNewStyle<ValueType_, StorageType_>;               \
+                                                                                    \
+  public:                                                                           \
+    VTKM_CONT                                                                       \
+    ArrayHandle()                                                                   \
+      : Superclass()                                                                \
+    {                                                                               \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT                                                                       \
+    ArrayHandle(const ArrayHandle<ValueType_, StorageType_>& src)                   \
+      : Superclass(src)                                                             \
+    {                                                                               \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT                                                                       \
+    ArrayHandle(ArrayHandle<ValueType_, StorageType_>&& src) noexcept               \
+      : Superclass(std::move(src))                                                  \
+    {                                                                               \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT                                                                       \
+    ArrayHandle(const ArrayHandleNewStyle<ValueType_, StorageType_>& src)           \
+      : Superclass(src)                                                             \
+    {                                                                               \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT                                                                       \
+    ArrayHandle(ArrayHandleNewStyle<ValueType_, StorageType_>&& src) noexcept       \
+      : Superclass(std::move(src))                                                  \
+    {                                                                               \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT ArrayHandle(const vtkm::cont::internal::Buffer* buffers)              \
+      : Superclass(buffers)                                                         \
+    {                                                                               \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT ArrayHandle(const std::vector<vtkm::cont::internal::Buffer>& buffers) \
+      : Superclass(buffers)                                                         \
+    {                                                                               \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT ArrayHandle(std::vector<vtkm::cont::internal::Buffer>&& buffers)      \
+      : Superclass(std::move(buffers))                                              \
+    {                                                                               \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT                                                                       \
+    ArrayHandle<ValueType_, StorageType_>& operator=(                               \
+      const ArrayHandle<ValueType_, StorageType_>& src)                             \
+    {                                                                               \
+      this->Superclass::operator=(src);                                             \
+      return *this;                                                                 \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT                                                                       \
+    ArrayHandle<ValueType_, StorageType_>& operator=(                               \
+      ArrayHandle<ValueType_, StorageType_>&& src) noexcept                         \
+    {                                                                               \
+      this->Superclass::operator=(std::move(src));                                  \
+      return *this;                                                                 \
+    }                                                                               \
+                                                                                    \
+    VTKM_CONT ~ArrayHandle() {}                                                     \
+  }
+
 /// This new style of ArrayHandle will eventually replace the classic ArrayHandle
 template <typename T, typename StorageTag_ = VTKM_DEFAULT_STORAGE_TAG>
 class VTKM_ALWAYS_EXPORT ArrayHandleNewStyle : public internal::ArrayHandleBase
@ -904,9 +988,6 @@ public:
  using ReadPortalType = typename StorageType::ReadPortalType;
  using WritePortalType = typename StorageType::WritePortalType;

-  static constexpr vtkm::IdComponent NUMBER_OF_BUFFERS = StorageType::NUMBER_OF_BUFFERS;
-  static constexpr vtkm::IdComponent GetNumberOfBuffers() { return NUMBER_OF_BUFFERS; }
-
  // TODO: Deprecate this
  template <typename Device>
  struct ExecutionTypes
@ -923,7 +1004,7 @@ public:
  /// Constructs an empty ArrayHandle.
  ///
  VTKM_CONT ArrayHandleNewStyle()
-    : Internals(std::make_shared<InternalsStruct>())
+    : Buffers(static_cast<std::size_t>(StorageType::GetNumberOfBuffers()))
  {
  }

@ -935,7 +1016,7 @@ public:
  /// created for all devices, and it would not be valid for all devices.
  ///
  VTKM_CONT ArrayHandleNewStyle(const vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& src)
-    : Internals(src.Internals)
+    : Buffers(src.Buffers)
  {
  }

@ -948,7 +1029,7 @@ public:
  ///
  VTKM_CONT ArrayHandleNewStyle(
    vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>&& src) noexcept
-    : Internals(std::move(src.Internals))
+    : Buffers(std::move(src.Buffers))
  {
  }

@ -956,17 +1037,20 @@ public:
  /// Special constructor for subclass specializations that need to set the
  /// initial state array. Used when pulling data from other sources.
  ///
-  VTKM_CONT ArrayHandleNewStyle(const std::vector<vtkm::cont::internal::Buffer>& buffers,
-                                const StorageType& storage = StorageType())
-    : Internals(std::make_shared<InternalsStruct>(buffers.data(), storage))
+  VTKM_CONT ArrayHandleNewStyle(const std::vector<vtkm::cont::internal::Buffer>& buffers)
+    : Buffers(buffers)
  {
-    VTKM_ASSERT(static_cast<vtkm::IdComponent>(this->Internals->Buffers.size()) ==
-                GetNumberOfBuffers());
+    VTKM_ASSERT(static_cast<vtkm::IdComponent>(this->Buffers.size()) == this->GetNumberOfBuffers());
  }

-  VTKM_CONT ArrayHandleNewStyle(const vtkm::cont::internal::Buffer* buffers,
-                                const StorageType& storage = StorageType())
-    : Internals(std::make_shared<InternalsStruct>(buffers, storage))
+  VTKM_CONT ArrayHandleNewStyle(std::vector<vtkm::cont::internal::Buffer>&& buffers) noexcept
+    : Buffers(std::move(buffers))
+  {
+    VTKM_ASSERT(static_cast<vtkm::IdComponent>(this->Buffers.size()) == this->GetNumberOfBuffers());
+  }
+
+  VTKM_CONT ArrayHandleNewStyle(const vtkm::cont::internal::Buffer* buffers)
+    : Buffers(buffers, buffers + StorageType::GetNumberOfBuffers())
  {
  }
  ///@}
@ -986,7 +1070,7 @@ public:
  vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& operator=(
    const vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& src)
  {
-    this->Internals = src.Internals;
+    this->Buffers = src.Buffers;
    return *this;
  }

@ -996,7 +1080,7 @@ public:
  vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& operator=(
    vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>&& src) noexcept
  {
-    this->Internals = std::move(src.Internals);
+    this->Buffers = std::move(src.Buffers);
    return *this;
  }

@ -1006,13 +1090,13 @@ public:
  VTKM_CONT
  bool operator==(const ArrayHandle<ValueType, StorageTag>& rhs) const
  {
-    return this->Internals == rhs.Internals;
+    return this->Buffers == rhs.Buffers;
  }

  VTKM_CONT
  bool operator!=(const ArrayHandle<ValueType, StorageTag>& rhs) const
  {
-    return this->Internals != rhs.Internals;
+    return this->Buffers != rhs.Buffers;
  }

  template <typename VT, typename ST>
@ -1027,9 +1111,14 @@ public:
    return true; // different valuetype and/or storage
  }

+  VTKM_CONT vtkm::IdComponent GetNumberOfBuffers() const
+  {
+    return StorageType::GetNumberOfBuffers();
+  }
+
  /// Get the storage.
  ///
-  VTKM_CONT const StorageType& GetStorage() const { return this->Internals->Storage; }
+  VTKM_CONT StorageType GetStorage() const { return StorageType{}; }

  /// Get the array portal of the control array.
  /// Since worklet invocations are asynchronous and this routine is a synchronization point,
@ -1073,7 +1162,7 @@ public:
  VTKM_CONT ReadPortalType ReadPortal() const
  {
    vtkm::cont::Token token;
-    return this->Internals->Storage.CreateReadPortal(
+    return StorageType::CreateReadPortal(
      this->GetBuffers(), vtkm::cont::DeviceAdapterTagUndefined{}, token);
  }

@ -1090,7 +1179,7 @@ public:
  {
    vtkm::cont::Token token;

-    return this->Internals->Storage.CreateWritePortal(
+    return StorageType::CreateWritePortal(
      this->GetBuffers(), vtkm::cont::DeviceAdapterTagUndefined{}, token);
  }

@ -1098,7 +1187,7 @@ public:
  ///
  VTKM_CONT vtkm::Id GetNumberOfValues() const
  {
-    return this->Internals->Storage.GetNumberOfValues(this->GetBuffers());
+    return StorageType::GetNumberOfValues(this->GetBuffers());
  }

  ///@{
@ -1117,7 +1206,7 @@ public:
                          vtkm::CopyFlag preserve,
                          vtkm::cont::Token& token)
  {
-    this->Internals->Storage.ResizeBuffers(numberOfValues, this->GetBuffers(), preserve, token);
+    StorageType::ResizeBuffers(numberOfValues, this->GetBuffers(), preserve, token);
  }

  VTKM_CONT void Allocate(vtkm::Id numberOfValues, vtkm::CopyFlag preserve = vtkm::CopyFlag::Off)
@ -1138,7 +1227,7 @@ public:
  ///
  VTKM_CONT void ReleaseResourcesExecution()
  {
-    detail::ArrayHandleReleaseResourcesExecution(this->Internals->Buffers);
+    detail::ArrayHandleReleaseResourcesExecution(this->Buffers);
  }

  /// Releases all resources in both the control and execution environments.
@ -1161,7 +1250,7 @@ public:
  VTKM_CONT ReadPortalType PrepareForInput(vtkm::cont::DeviceAdapterId device,
                                           vtkm::cont::Token& token) const
  {
-    return this->Internals->Storage.CreateReadPortal(this->GetBuffers(), device, token);
+    return StorageType::CreateReadPortal(this->GetBuffers(), device, token);
  }

  /// Prepares this array to be used in an in-place operation (both as input
@ -1180,7 +1269,7 @@ public:
  VTKM_CONT WritePortalType PrepareForInPlace(vtkm::cont::DeviceAdapterId device,
                                              vtkm::cont::Token& token) const
  {
-    return this->Internals->Storage.CreateWritePortal(this->GetBuffers(), device, token);
+    return StorageType::CreateWritePortal(this->GetBuffers(), device, token);
  }

  /// Prepares (allocates) this array to be used as an output from an operation
@ -1202,7 +1291,7 @@ public:
                                             vtkm::cont::Token& token)
  {
    this->Allocate(numberOfValues, vtkm::CopyFlag::Off, token);
-    return this->Internals->Storage.CreateWritePortal(this->GetBuffers(), device, token);
+    return StorageType::CreateWritePortal(this->GetBuffers(), device, token);
  }

  template <typename DeviceAdapterTag>
@ -1233,7 +1322,7 @@ public:
  ///
  VTKM_CONT bool IsOnDevice(vtkm::cont::DeviceAdapterId device) const
  {
-    return detail::ArrayHandleIsOnDevice(this->Internals->Buffers, device);
+    return detail::ArrayHandleIsOnDevice(this->Buffers, device);
  }

  /// Returns true if the ArrayHandle's data is on the host. If the data are on the given
@ -1256,7 +1345,7 @@ public:
  VTKM_CONT
  DeviceAdapterId GetDeviceAdapterId() const
  {
-    return detail::ArrayHandleGetDeviceAdapterId(this->Internals->Buffers);
+    return detail::ArrayHandleGetDeviceAdapterId(this->Buffers);
  }

  /// Synchronizes the control array with the execution array. If either the
@ -1294,39 +1383,46 @@ public:
  ///
  VTKM_CONT void Enqueue(const vtkm::cont::Token& token) const
  {
-    for (auto&& buffer : this->Internals->Buffers)
+    for (auto&& buffer : this->Buffers)
    {
      buffer.Enqueue(token);
    }
  }

-  /// Returns the internal `Buffer` structures that hold the data.
+  /// \brief Deep copies the data in the array.
  ///
-  VTKM_CONT vtkm::cont::internal::Buffer* GetBuffers() const
+  /// Takes the data that is in this array and copies that data into the provided
+  /// \a destination.
+  ///
+  VTKM_CONT void DeepCopy(vtkm::cont::ArrayHandleNewStyle<ValueType, StorageTag>& destination) const
  {
-    return this->Internals->Buffers.data();
+    VTKM_ASSERT(this->Buffers.size() == destination.Buffers.size());
+
+    for (std::size_t bufferIndex = 0; bufferIndex < this->Buffers.size(); ++bufferIndex)
+    {
+      this->Buffers[bufferIndex].DeepCopy(destination.Buffers[bufferIndex]);
+    }
  }

+  /// Returns the internal `Buffer` structures that hold the data.
+  ///
+  VTKM_CONT vtkm::cont::internal::Buffer* GetBuffers() const { return this->Buffers.data(); }
+
 private:
-  struct InternalsStruct
+  mutable std::vector<vtkm::cont::internal::Buffer> Buffers;
+
+protected:
+  VTKM_CONT void SetBuffer(vtkm::IdComponent index, const vtkm::cont::internal::Buffer& buffer)
  {
-    mutable std::vector<vtkm::cont::internal::Buffer> Buffers;
-    mutable StorageType Storage;
+    this->Buffers[static_cast<std::size_t>(index)] = buffer;
+  }

-    VTKM_CONT InternalsStruct()
-      : Buffers(GetNumberOfBuffers())
-    {
-    }
-
-    VTKM_CONT InternalsStruct(const vtkm::cont::internal::Buffer* buffers,
-                              const StorageType& storage)
-      : Buffers(GetNumberOfBuffers())
-      , Storage(storage)
-    {
-      std::copy(buffers, buffers + GetNumberOfBuffers(), this->Buffers.begin());
-    }
-  };
-  std::shared_ptr<InternalsStruct> Internals;
+  // BufferContainer must be an iteratable container of Buffer objects.
+  template <typename BufferContainer>
+  VTKM_CONT void SetBuffers(const BufferContainer& buffers)
+  {
+    std::copy(buffers.begin(), buffers.end(), this->Iterators->Buffers.begin());
+  }
 };

 namespace detail
--- a/vtkm/cont/ArrayHandleBasic.cxx
+++ b/vtkm/cont/ArrayHandleBasic.cxx
@ -19,28 +19,10 @@ namespace cont
 namespace internal
 {

-namespace detail
-{
-
-vtkm::BufferSizeType NumberOfBytes(vtkm::Id numValues, std::size_t typeSize)
-{
-  VTKM_ASSERT(numValues >= 0);
-
-  if (numValues > (std::numeric_limits<vtkm::BufferSizeType>::max() /
-                   static_cast<vtkm::BufferSizeType>(typeSize)))
-  {
-    throw vtkm::cont::ErrorBadAllocation("Asking for a buffer too big to represent.");
-  }
-
-  return numValues * static_cast<vtkm::BufferSizeType>(typeSize);
-}
-
-} // namespace detail
-
-#define VTKM_STORAGE_INSTANTIATE(Type)                                                             \
-  template class VTKM_CONT_EXPORT Storage<Type, StorageTagBasic>;                                  \
-  template class VTKM_CONT_EXPORT Storage<vtkm::Vec<Type, 2>, StorageTagBasic>;                    \
-  template class VTKM_CONT_EXPORT Storage<vtkm::Vec<Type, 3>, StorageTagBasic>;                    \
+#define VTKM_STORAGE_INSTANTIATE(Type)                                          \
+  template class VTKM_CONT_EXPORT Storage<Type, StorageTagBasic>;               \
+  template class VTKM_CONT_EXPORT Storage<vtkm::Vec<Type, 2>, StorageTagBasic>; \
+  template class VTKM_CONT_EXPORT Storage<vtkm::Vec<Type, 3>, StorageTagBasic>; \
  template class VTKM_CONT_EXPORT Storage<vtkm::Vec<Type, 4>, StorageTagBasic>;

 VTKM_STORAGE_INSTANTIATE(char)
@ -59,10 +41,10 @@ VTKM_STORAGE_INSTANTIATE(vtkm::Float64)

 } // namespace internal

-#define VTKM_ARRAYHANDLE_INSTANTIATE(Type)                                                         \
-  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<Type, StorageTagBasic>;                      \
-  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<vtkm::Vec<Type, 2>, StorageTagBasic>;        \
-  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<vtkm::Vec<Type, 3>, StorageTagBasic>;        \
+#define VTKM_ARRAYHANDLE_INSTANTIATE(Type)                                                  \
+  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<Type, StorageTagBasic>;               \
+  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<vtkm::Vec<Type, 2>, StorageTagBasic>; \
+  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<vtkm::Vec<Type, 3>, StorageTagBasic>; \
  template class VTKM_CONT_EXPORT ArrayHandleNewStyle<vtkm::Vec<Type, 4>, StorageTagBasic>;

 VTKM_ARRAYHANDLE_INSTANTIATE(char)
--- a/vtkm/cont/ArrayHandleBasic.h
+++ b/vtkm/cont/ArrayHandleBasic.h
@ -27,125 +27,50 @@ namespace cont
 namespace internal
 {

-namespace detail
-{
-
-VTKM_CONT_EXPORT VTKM_CONT vtkm::BufferSizeType NumberOfBytes(vtkm::Id numValues,
-                                                              std::size_t typeSize);
-
-} // namespace detail
-
 template <typename T>
 class VTKM_ALWAYS_EXPORT Storage<T, vtkm::cont::StorageTagBasic>
 {
 public:
-  static constexpr vtkm::IdComponent NUMBER_OF_BUFFERS = 1;
-
  using ReadPortalType = vtkm::internal::ArrayPortalBasicRead<T>;
  using WritePortalType = vtkm::internal::ArrayPortalBasicWrite<T>;

-  VTKM_CONT void ResizeBuffers(vtkm::Id numValues,
-                               vtkm::cont::internal::Buffer* buffers,
-                               vtkm::CopyFlag preserve,
-                               vtkm::cont::Token& token)
+  VTKM_CONT static vtkm::IdComponent GetNumberOfBuffers() { return 1; }
+
+  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
+                                      vtkm::cont::internal::Buffer* buffers,
+                                      vtkm::CopyFlag preserve,
+                                      vtkm::cont::Token& token)
  {
-    buffers[0].SetNumberOfBytes(detail::NumberOfBytes(numValues, sizeof(T)), preserve, token);
+    buffers[0].SetNumberOfBytes(
+      vtkm::internal::NumberOfValuesToNumberOfBytes<T>(numValues), preserve, token);
  }

-  VTKM_CONT vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
  {
    return static_cast<vtkm::Id>(buffers->GetNumberOfBytes()) / static_cast<vtkm::Id>(sizeof(T));
  }

-  VTKM_CONT ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
-                                            vtkm::cont::DeviceAdapterId device,
-                                            vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
+                                                   vtkm::cont::DeviceAdapterId device,
+                                                   vtkm::cont::Token& token)
  {
    return ReadPortalType(reinterpret_cast<const T*>(buffers[0].ReadPointerDevice(device, token)),
-                          this->GetNumberOfValues(buffers));
+                          GetNumberOfValues(buffers));
  }

-  VTKM_CONT WritePortalType CreateWritePortal(const vtkm::cont::internal::Buffer* buffers,
-                                              vtkm::cont::DeviceAdapterId device,
-                                              vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
+                                                     vtkm::cont::DeviceAdapterId device,
+                                                     vtkm::cont::Token& token)
  {
    return WritePortalType(reinterpret_cast<T*>(buffers[0].WritePointerDevice(device, token)),
-                           this->GetNumberOfValues(buffers));
+                           GetNumberOfValues(buffers));
  }
 };

 } // namespace internal

-// This can go away once ArrayHandle is replaced with ArrayHandleNewStyle
 template <typename T>
-class VTKM_ALWAYS_EXPORT ArrayHandle<T, vtkm::cont::StorageTagBasic>
-  : public ArrayHandleNewStyle<T, vtkm::cont::StorageTagBasic>
-{
-  using Superclass = ArrayHandleNewStyle<T, vtkm::cont::StorageTagBasic>;
-
-public:
-  VTKM_CONT
-  ArrayHandle()
-    : Superclass()
-  {
-  }
-
-  VTKM_CONT
-  ArrayHandle(const ArrayHandle<T, vtkm::cont::StorageTagBasic>& src)
-    : Superclass(src)
-  {
-  }
-
-  VTKM_CONT
-  ArrayHandle(ArrayHandle<T, vtkm::cont::StorageTagBasic>&& src) noexcept
-    : Superclass(std::move(src))
-  {
-  }
-
-  VTKM_CONT
-  ArrayHandle(const ArrayHandleNewStyle<T, vtkm::cont::StorageTagBasic>& src)
-    : Superclass(src)
-  {
-  }
-
-  VTKM_CONT
-  ArrayHandle(ArrayHandleNewStyle<T, vtkm::cont::StorageTagBasic>&& src) noexcept
-    : Superclass(std::move(src))
-  {
-  }
-
-  VTKM_CONT ArrayHandle(
-    const vtkm::cont::internal::Buffer* buffers,
-    const typename Superclass::StorageType& storage = typename Superclass::StorageType())
-    : Superclass(buffers, storage)
-  {
-  }
-
-  VTKM_CONT ArrayHandle(
-    const std::vector<vtkm::cont::internal::Buffer>& buffers,
-    const typename Superclass::StorageType& storage = typename Superclass::StorageType())
-    : Superclass(buffers, storage)
-  {
-  }
-
-  VTKM_CONT
-  ArrayHandle<T, vtkm::cont::StorageTagBasic>& operator=(
-    const ArrayHandle<T, vtkm::cont::StorageTagBasic>& src)
-  {
-    this->Superclass::operator=(src);
-    return *this;
-  }
-
-  VTKM_CONT
-  ArrayHandle<T, vtkm::cont::StorageTagBasic>& operator=(
-    ArrayHandle<T, vtkm::cont::StorageTagBasic>&& src) noexcept
-  {
-    this->Superclass::operator=(std::move(src));
-    return *this;
-  }
-
-  VTKM_CONT ~ArrayHandle() {}
-};
+VTKM_ARRAY_HANDLE_NEW_STYLE(T, vtkm::cont::StorageTagBasic);

 template <typename T>
 class VTKM_ALWAYS_EXPORT ArrayHandleBasic : public ArrayHandle<T, vtkm::cont::StorageTagBasic>
@ -160,13 +85,13 @@ public:
    vtkm::Id numberOfValues,
    vtkm::cont::internal::BufferInfo::Deleter deleter,
    vtkm::cont::internal::BufferInfo::Reallocater reallocater = internal::InvalidRealloc)
-    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
-        vtkm::cont::internal::MakeBuffer(vtkm::cont::DeviceAdapterTagUndefined{},
-                                         array,
-                                         array,
-                                         internal::detail::NumberOfBytes(numberOfValues, sizeof(T)),
-                                         deleter,
-                                         reallocater) })
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{ vtkm::cont::internal::MakeBuffer(
+        vtkm::cont::DeviceAdapterTagUndefined{},
+        array,
+        array,
+        vtkm::internal::NumberOfValuesToNumberOfBytes<T>(numberOfValues),
+        deleter,
+        reallocater) })
  {
  }

@ -176,13 +101,13 @@ public:
    vtkm::cont::DeviceAdapterId device,
    vtkm::cont::internal::BufferInfo::Deleter deleter,
    vtkm::cont::internal::BufferInfo::Reallocater reallocater = internal::InvalidRealloc)
-    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
-        vtkm::cont::internal::MakeBuffer(device,
-                                         array,
-                                         array,
-                                         internal::detail::NumberOfBytes(numberOfValues, sizeof(T)),
-                                         deleter,
-                                         reallocater) })
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{ vtkm::cont::internal::MakeBuffer(
+        device,
+        array,
+        array,
+        vtkm::internal::NumberOfValuesToNumberOfBytes<T>(numberOfValues),
+        deleter,
+        reallocater) })
  {
  }

@ -192,13 +117,13 @@ public:
    vtkm::Id numberOfValues,
    vtkm::cont::internal::BufferInfo::Deleter deleter,
    vtkm::cont::internal::BufferInfo::Reallocater reallocater = internal::InvalidRealloc)
-    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
-        vtkm::cont::internal::MakeBuffer(vtkm::cont::DeviceAdapterTagUndefined{},
-                                         array,
-                                         container,
-                                         internal::detail::NumberOfBytes(numberOfValues, sizeof(T)),
-                                         deleter,
-                                         reallocater) })
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{ vtkm::cont::internal::MakeBuffer(
+        vtkm::cont::DeviceAdapterTagUndefined{},
+        array,
+        container,
+        vtkm::internal::NumberOfValuesToNumberOfBytes<T>(numberOfValues),
+        deleter,
+        reallocater) })
  {
  }

@ -209,13 +134,13 @@ public:
    vtkm::cont::DeviceAdapterId device,
    vtkm::cont::internal::BufferInfo::Deleter deleter,
    vtkm::cont::internal::BufferInfo::Reallocater reallocater = internal::InvalidRealloc)
-    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
-        vtkm::cont::internal::MakeBuffer(device,
-                                         array,
-                                         container,
-                                         internal::detail::NumberOfBytes(numberOfValues, sizeof(T)),
-                                         deleter,
-                                         reallocater) })
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{ vtkm::cont::internal::MakeBuffer(
+        device,
+        array,
+        container,
+        vtkm::internal::NumberOfValuesToNumberOfBytes<T>(numberOfValues),
+        deleter,
+        reallocater) })
  {
  }

@ -434,10 +359,10 @@ namespace internal

 /// \cond
 /// Make doxygen ignore this section
-#define VTKM_STORAGE_EXPORT(Type)                                                                  \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT Storage<Type, StorageTagBasic>;                  \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT Storage<vtkm::Vec<Type, 2>, StorageTagBasic>;    \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT Storage<vtkm::Vec<Type, 3>, StorageTagBasic>;    \
+#define VTKM_STORAGE_EXPORT(Type)                                                               \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT Storage<Type, StorageTagBasic>;               \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT Storage<vtkm::Vec<Type, 2>, StorageTagBasic>; \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT Storage<vtkm::Vec<Type, 3>, StorageTagBasic>; \
  extern template class VTKM_CONT_TEMPLATE_EXPORT Storage<vtkm::Vec<Type, 4>, StorageTagBasic>;

 VTKM_STORAGE_EXPORT(char)
@ -457,13 +382,13 @@ VTKM_STORAGE_EXPORT(vtkm::Float64)

 } // namespace internal

-#define VTKM_ARRAYHANDLE_EXPORT(Type)                                                              \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandleNewStyle<Type, StorageTagBasic>;      \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT                                                  \
-    ArrayHandleNewStyle<vtkm::Vec<Type, 2>, StorageTagBasic>;                                      \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT                                                  \
-    ArrayHandleNewStyle<vtkm::Vec<Type, 3>, StorageTagBasic>;                                      \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT                                                  \
+#define VTKM_ARRAYHANDLE_EXPORT(Type)                                                         \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandleNewStyle<Type, StorageTagBasic>; \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT                                             \
+    ArrayHandleNewStyle<vtkm::Vec<Type, 2>, StorageTagBasic>;                                 \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT                                             \
+    ArrayHandleNewStyle<vtkm::Vec<Type, 3>, StorageTagBasic>;                                 \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT                                             \
    ArrayHandleNewStyle<vtkm::Vec<Type, 4>, StorageTagBasic>;

 VTKM_ARRAYHANDLE_EXPORT(char)
--- a/vtkm/cont/ArrayHandleBitField.h
+++ b/vtkm/cont/ArrayHandleBitField.h
@ -29,10 +29,16 @@ public:
  using ValueType = bool;

  VTKM_EXEC_CONT
-  explicit ArrayPortalBitField(const BitPortalType& portal) noexcept : BitPortal{ portal } {}
+  explicit ArrayPortalBitField(const BitPortalType& portal) noexcept
+    : BitPortal{ portal }
+  {
+  }

  VTKM_EXEC_CONT
-  explicit ArrayPortalBitField(BitPortalType&& portal) noexcept : BitPortal{ std::move(portal) } {}
+  explicit ArrayPortalBitField(BitPortalType&& portal) noexcept
+    : BitPortal{ std::move(portal) }
+  {
+  }

  ArrayPortalBitField() noexcept = default;
  ArrayPortalBitField(const ArrayPortalBitField&) noexcept = default;
@ -65,107 +71,73 @@ struct VTKM_ALWAYS_EXPORT StorageTagBitField
 template <>
 class Storage<bool, StorageTagBitField>
 {
-  using BitPortalType = vtkm::cont::detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>;
-  using BitPortalConstType =
-    vtkm::cont::detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>;
+  using BitPortalType = vtkm::cont::detail::BitPortal;
+  using BitPortalConstType = vtkm::cont::detail::BitPortalConst;
+
+  using WordType = vtkm::WordTypeDefault;
+  static constexpr vtkm::Id BlockSize = vtkm::cont::detail::BitFieldTraits::BlockSize;
+  VTKM_STATIC_ASSERT(BlockSize >= static_cast<vtkm::Id>(sizeof(WordType)));

 public:
-  using ValueType = bool;
-  using PortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalType>;
-  using PortalConstType = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstType>;
+  using ReadPortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstType>;
+  using WritePortalType = vtkm::cont::internal::ArrayPortalBitField<BitPortalType>;

-  explicit VTKM_CONT Storage(const vtkm::cont::BitField& data)
-    : Data{ data }
+  VTKM_CONT static vtkm::IdComponent GetNumberOfBuffers() { return 1; }
+
+  VTKM_CONT static void ResizeBuffers(vtkm::Id numberOfBits,
+                                      vtkm::cont::internal::Buffer* buffers,
+                                      vtkm::CopyFlag preserve,
+                                      vtkm::cont::Token& token)
  {
+    const vtkm::Id bytesNeeded = (numberOfBits + CHAR_BIT - 1) / CHAR_BIT;
+    const vtkm::Id blocksNeeded = (bytesNeeded + BlockSize - 1) / BlockSize;
+    const vtkm::Id numBytes = blocksNeeded * BlockSize;
+
+    VTKM_LOG_F(vtkm::cont::LogLevel::MemCont,
+               "BitField Allocation: %llu bits, blocked up to %s bytes.",
+               static_cast<unsigned long long>(numberOfBits),
+               vtkm::cont::GetSizeString(static_cast<vtkm::UInt64>(numBytes)).c_str());
+
+    buffers[0].SetNumberOfBytes(numBytes, preserve, token);
+    vtkm::cont::detail::GetBitFieldMetaData(buffers[0])->NumberOfBits = numberOfBits;
  }

-  explicit VTKM_CONT Storage(vtkm::cont::BitField&& data) noexcept : Data{ std::move(data) } {}
-
-  VTKM_CONT Storage() = default;
-  VTKM_CONT Storage(const Storage&) = default;
-  VTKM_CONT Storage(Storage&&) noexcept = default;
-  VTKM_CONT Storage& operator=(const Storage&) = default;
-  VTKM_CONT Storage& operator=(Storage&&) noexcept = default;
-
-  VTKM_CONT
-  PortalType GetPortal() { return PortalType{ this->Data.WritePortal() }; }
-
-  VTKM_CONT
-  PortalConstType GetPortalConst() { return PortalConstType{ this->Data.ReadPortal() }; }
-
-  VTKM_CONT vtkm::Id GetNumberOfValues() const { return this->Data.GetNumberOfBits(); }
-  VTKM_CONT void Allocate(vtkm::Id numberOfValues) { this->Data.Allocate(numberOfValues); }
-  VTKM_CONT void Shrink(vtkm::Id numberOfValues) { this->Data.Shrink(numberOfValues); }
-  VTKM_CONT void ReleaseResources() { this->Data.ReleaseResources(); }
-
-  VTKM_CONT vtkm::cont::BitField GetBitField() const { return this->Data; }
-
-private:
-  vtkm::cont::BitField Data;
-};
-
-template <typename Device>
-class ArrayTransfer<bool, StorageTagBitField, Device>
-{
-  using AtomicInterface = AtomicInterfaceExecution<Device>;
-  using StorageType = Storage<bool, StorageTagBitField>;
-  using BitPortalExecution = vtkm::cont::detail::BitPortal<AtomicInterface>;
-  using BitPortalConstExecution = vtkm::cont::detail::BitPortalConst<AtomicInterface>;
-
-public:
-  using ValueType = bool;
-  using PortalControl = typename StorageType::PortalType;
-  using PortalConstControl = typename StorageType::PortalConstType;
-  using PortalExecution = vtkm::cont::internal::ArrayPortalBitField<BitPortalExecution>;
-  using PortalConstExecution = vtkm::cont::internal::ArrayPortalBitField<BitPortalConstExecution>;
-
-  VTKM_CONT
-  explicit ArrayTransfer(StorageType* storage)
-    : Data{ storage->GetBitField() }
+  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
  {
+    vtkm::Id numberOfBits = vtkm::cont::detail::GetBitFieldMetaData(buffers[0])->NumberOfBits;
+    VTKM_ASSERT((buffers[0].GetNumberOfBytes() * CHAR_BIT) >= numberOfBits);
+    return numberOfBits;
  }

-  VTKM_CONT
-  vtkm::Id GetNumberOfValues() const { return this->Data.GetNumberOfBits(); }
-
-  VTKM_CONT
-  PortalConstExecution PrepareForInput(bool vtkmNotUsed(updateData), vtkm::cont::Token& token)
+  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
+                                                   vtkm::cont::DeviceAdapterId device,
+                                                   vtkm::cont::Token& token)
  {
-    return PortalConstExecution{ this->Data.PrepareForInput(Device{}, token) };
+    vtkm::Id numberOfBits = GetNumberOfValues(buffers);
+    VTKM_ASSERT((buffers[0].GetNumberOfBytes() * CHAR_BIT) >= numberOfBits);
+
+    return ReadPortalType(
+      BitPortalConstType(buffers[0].ReadPointerDevice(device, token), numberOfBits));
  }

-  VTKM_CONT
-  PortalExecution PrepareForInPlace(bool vtkmNotUsed(updateData), vtkm::cont::Token& token)
+  VTKM_CONT static WritePortalType CreateWritePortal(const vtkm::cont::internal::Buffer* buffers,
+                                                     vtkm::cont::DeviceAdapterId device,
+                                                     vtkm::cont::Token& token)
  {
-    return PortalExecution{ this->Data.PrepareForInPlace(Device{}, token) };
+    vtkm::Id numberOfBits = GetNumberOfValues(buffers);
+    VTKM_ASSERT((buffers[0].GetNumberOfBytes() * CHAR_BIT) >= numberOfBits);
+
+    return WritePortalType(
+      BitPortalType(buffers[0].WritePointerDevice(device, token), numberOfBits));
  }
-
-  VTKM_CONT
-  PortalExecution PrepareForOutput(vtkm::Id numberOfValues, vtkm::cont::Token& token)
-  {
-    return PortalExecution{ this->Data.PrepareForOutput(numberOfValues, Device{}, token) };
-  }
-
-  VTKM_CONT
-  void RetrieveOutputData(StorageType* vtkmNotUsed(storage)) const
-  {
-    // Implementation of this method should be unnecessary. The internal
-    // bitfield should automatically retrieve the output data as necessary.
-  }
-
-  VTKM_CONT
-  void Shrink(vtkm::Id numberOfValues) { this->Data.Shrink(numberOfValues); }
-
-  VTKM_CONT
-  void ReleaseResources() { this->Data.ReleaseResources(); }
-
-private:
-  vtkm::cont::BitField Data;
 };

 } // end namespace internal


+template <typename T>
+VTKM_ARRAY_HANDLE_NEW_STYLE(T, vtkm::cont::internal::StorageTagBitField);
+
 /// The ArrayHandleBitField class is a boolean-valued ArrayHandle that is backed
 /// by a BitField.
 ///
@ -177,18 +149,9 @@ public:

  VTKM_CONT
  explicit ArrayHandleBitField(const vtkm::cont::BitField& bitField)
-    : Superclass{ StorageType{ bitField } }
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>(1, bitField.GetBuffer()))
  {
  }
-
-  VTKM_CONT
-  explicit ArrayHandleBitField(vtkm::cont::BitField&& bitField) noexcept
-    : Superclass{ StorageType{ std::move(bitField) } }
-  {
-  }
-
-  VTKM_CONT
-  vtkm::cont::BitField GetBitField() const { return this->GetStorage().GetBitField(); }
 };

 VTKM_CONT inline vtkm::cont::ArrayHandleBitField make_ArrayHandleBitField(
--- a/vtkm/cont/ArrayHandleCast.h
+++ b/vtkm/cont/ArrayHandleCast.h
@ -135,14 +135,11 @@ struct Storage<TargetT, vtkm::cont::StorageTagCast<SourceT, SourceStorage>>

 template <typename TargetT, typename SourceT, typename SourceStorage, typename Device>
 struct ArrayTransfer<TargetT, vtkm::cont::StorageTagCast<SourceT, SourceStorage>, Device>
-  : detail::ArrayHandleCastTraits<TargetT,
-                                  SourceT,
-                                  SourceStorage>::template ArrayTransferSuperclass<Device>
+  : detail::ArrayHandleCastTraits<TargetT, SourceT, SourceStorage>::
+      template ArrayTransferSuperclass<Device>
 {
-  using Superclass =
-    typename detail::ArrayHandleCastTraits<TargetT,
-                                           SourceT,
-                                           SourceStorage>::template ArrayTransferSuperclass<Device>;
+  using Superclass = typename detail::ArrayHandleCastTraits<TargetT, SourceT, SourceStorage>::
+    template ArrayTransferSuperclass<Device>;

  using Superclass::Superclass;
 };
--- a/vtkm/cont/ArrayHandleCompositeVector.h
+++ b/vtkm/cont/ArrayHandleCompositeVector.h
@ -500,9 +500,9 @@ public:
  using PortalControl = typename StorageType::PortalType;
  using PortalConstControl = typename StorageType::PortalConstType;

-  using PortalExecution = ArrayPortalCompositeVector<
-    typename vtkm::cont::ArrayHandle<T,
-                                     StorageTags>::template ExecutionTypes<DeviceTag>::Portal...>;
+  using PortalExecution =
+    ArrayPortalCompositeVector<typename vtkm::cont::ArrayHandle<T, StorageTags>::
+                                 template ExecutionTypes<DeviceTag>::Portal...>;
  using PortalConstExecution =
    ArrayPortalCompositeVector<typename vtkm::cont::ArrayHandle<T, StorageTags>::
                                 template ExecutionTypes<DeviceTag>::PortalConst...>;
--- a/vtkm/cont/ArrayHandleConcatenate.h
+++ b/vtkm/cont/ArrayHandleConcatenate.h
@ -63,6 +63,7 @@ public:
    return this->portal1.GetNumberOfValues() + this->portal2.GetNumberOfValues();
  }

+  VTKM_SUPPRESS_EXEC_WARNINGS
  VTKM_EXEC_CONT
  ValueType Get(vtkm::Id index) const
  {
@ -76,6 +77,7 @@ public:
    }
  }

+  VTKM_SUPPRESS_EXEC_WARNINGS
  template <typename Writable_ = Writable,
            typename = typename std::enable_if<Writable_::value>::type>
  VTKM_EXEC_CONT void Set(vtkm::Id index, const ValueType& value) const
--- a/vtkm/cont/ArrayHandleDecorator.h
+++ b/vtkm/cont/ArrayHandleDecorator.h
@ -483,10 +483,10 @@ struct DecoratorStorageTraits
  }

  // Static dispatch for calling AllocateSourceArrays on supported implementations:
-  VTKM_CONT[[noreturn]] static void CallAllocate(std::false_type,
-                                                 const DecoratorImplT&,
-                                                 vtkm::Id,
-                                                 ArrayTs&...)
+  VTKM_CONT [[noreturn]] static void CallAllocate(std::false_type,
+                                                  const DecoratorImplT&,
+                                                  vtkm::Id,
+                                                  ArrayTs&...)
  {
    throw vtkm::cont::ErrorBadType("Allocate not supported by this ArrayHandleDecorator.");
  }
@ -500,10 +500,10 @@ struct DecoratorStorageTraits
  }

  // Static dispatch for calling ShrinkSourceArrays on supported implementations.
-  VTKM_CONT[[noreturn]] static void CallShrink(std::false_type,
-                                               const DecoratorImplT&,
-                                               vtkm::Id,
-                                               ArrayTs&...)
+  VTKM_CONT [[noreturn]] static void CallShrink(std::false_type,
+                                                const DecoratorImplT&,
+                                                vtkm::Id,
+                                                ArrayTs&...)
  {
    throw vtkm::cont::ErrorBadType("Shrink not supported by this ArrayHandleDecorator.");
  }
--- a/vtkm/cont/ArrayHandleImplicit.h
+++ b/vtkm/cont/ArrayHandleImplicit.h
@ -51,6 +51,7 @@ public:
  using ValueType = typename ArrayHandleImplicitTraits<FunctorType_>::ValueType;
  using FunctorType = FunctorType_;

+  VTKM_SUPPRESS_EXEC_WARNINGS
  VTKM_EXEC_CONT
  ArrayPortalImplicit()
    : Functor()
@ -58,6 +59,7 @@ public:
  {
  }

+  VTKM_SUPPRESS_EXEC_WARNINGS
  VTKM_EXEC_CONT
  ArrayPortalImplicit(FunctorType f, vtkm::Id numValues)
    : Functor(f)
--- a/vtkm/cont/ArrayHandleMultiplexer.h
+++ b/vtkm/cont/ArrayHandleMultiplexer.h
@ -101,7 +101,8 @@ struct ArrayPortalMultiplexer
  ArrayPortalMultiplexer& operator=(const ArrayPortalMultiplexer&) = default;

  template <typename Portal>
-  VTKM_EXEC_CONT ArrayPortalMultiplexer(const Portal& src) noexcept : PortalVariant(src)
+  VTKM_EXEC_CONT ArrayPortalMultiplexer(const Portal& src) noexcept
+    : PortalVariant(src)
  {
  }

--- a/vtkm/cont/ArrayHandleSOA.cxx
+++ b/vtkm/cont/ArrayHandleSOA.cxx
@ -16,10 +16,10 @@ namespace vtkm
 namespace cont
 {

-#define VTKM_ARRAYHANDLE_SOA_INSTANTIATE(Type)                                                     \
-  template class VTKM_CONT_EXPORT ArrayHandle<Type, StorageTagSOA>;                                \
-  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<Type, 2>, StorageTagSOA>;                  \
-  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<Type, 3>, StorageTagSOA>;                  \
+#define VTKM_ARRAYHANDLE_SOA_INSTANTIATE(Type)                                    \
+  template class VTKM_CONT_EXPORT ArrayHandle<Type, StorageTagSOA>;               \
+  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<Type, 2>, StorageTagSOA>; \
+  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<Type, 3>, StorageTagSOA>; \
  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<Type, 4>, StorageTagSOA>;

 VTKM_ARRAYHANDLE_SOA_INSTANTIATE(char)
--- a/vtkm/cont/ArrayHandleSOA.h
+++ b/vtkm/cont/ArrayHandleSOA.h
@ -15,6 +15,7 @@
 #include <vtkm/Math.h>
 #include <vtkm/VecTraits.h>

+#include <vtkm/internal/ArrayPortalBasic.h>
 #include <vtkm/internal/ArrayPortalHelpers.h>

 #include <vtkmstd/integer_sequence.h>
@ -33,21 +34,21 @@ namespace internal
 ///
 /// This will only work if \c VecTraits is defined for the type.
 ///
-template <typename ValueType_, typename SourcePortalType>
+template <typename ValueType_, typename ComponentPortalType>
 class ArrayPortalSOA
 {
 public:
  using ValueType = ValueType_;

 private:
-  using ComponentType = typename SourcePortalType::ValueType;
+  using ComponentType = typename ComponentPortalType::ValueType;

  VTKM_STATIC_ASSERT(vtkm::HasVecTraits<ValueType>::value);
  using VTraits = vtkm::VecTraits<ValueType>;
  VTKM_STATIC_ASSERT((std::is_same<typename VTraits::ComponentType, ComponentType>::value));
  static constexpr vtkm::IdComponent NUM_COMPONENTS = VTraits::NUM_COMPONENTS;

-  SourcePortalType Portals[NUM_COMPONENTS];
+  ComponentPortalType Portals[NUM_COMPONENTS];
  vtkm::Id NumberOfValues;

 public:
@ -58,14 +59,14 @@ public:
  }

  VTKM_SUPPRESS_EXEC_WARNINGS
-  VTKM_EXEC_CONT void SetPortal(vtkm::IdComponent index, const SourcePortalType& portal)
+  VTKM_EXEC_CONT void SetPortal(vtkm::IdComponent index, const ComponentPortalType& portal)
  {
    this->Portals[index] = portal;
  }

  VTKM_EXEC_CONT vtkm::Id GetNumberOfValues() const { return this->NumberOfValues; }

-  template <typename SPT = SourcePortalType,
+  template <typename SPT = ComponentPortalType,
            typename Supported = typename vtkm::internal::PortalSupportsGets<SPT>::type,
            typename = typename std::enable_if<Supported::value>::type>
  VTKM_EXEC_CONT ValueType Get(vtkm::Id valueIndex) const
@ -73,7 +74,7 @@ public:
    return this->Get(valueIndex, vtkmstd::make_index_sequence<NUM_COMPONENTS>());
  }

-  template <typename SPT = SourcePortalType,
+  template <typename SPT = ComponentPortalType,
            typename Supported = typename vtkm::internal::PortalSupportsSets<SPT>::type,
            typename = typename std::enable_if<Supported::value>::type>
  VTKM_EXEC_CONT void Set(vtkm::Id valueIndex, const ValueType& value) const
@ -82,6 +83,7 @@ public:
  }

 private:
+  VTKM_SUPPRESS_EXEC_WARNINGS
  template <std::size_t I>
  VTKM_EXEC_CONT ComponentType GetComponent(vtkm::Id valueIndex) const
  {
@ -94,6 +96,7 @@ private:
    return ValueType{ this->GetComponent<I>(valueIndex)... };
  }

+  VTKM_SUPPRESS_EXEC_WARNINGS
  template <std::size_t I>
  VTKM_EXEC_CONT bool SetComponent(vtkm::Id valueIndex, const ValueType& value) const
  {
@ -124,351 +127,174 @@ struct VTKM_ALWAYS_EXPORT StorageTagSOA
 namespace internal
 {

-namespace detail
+template <typename T>
+class VTKM_ALWAYS_EXPORT Storage<T, vtkm::cont::StorageTagSOA>
 {
+  VTKM_STATIC_ASSERT(vtkm::HasVecTraits<T>::value);
+  using VTraits = vtkm::VecTraits<T>;

-template <typename ValueType, typename PortalType, typename IsTrueVec>
-struct SOAPortalChooser;
-
-template <typename ValueType, typename PortalType>
-struct SOAPortalChooser<ValueType, PortalType, std::true_type>
-{
-  using Type = vtkm::internal::ArrayPortalSOA<ValueType, PortalType>;
-};
-
-template <typename ValueType, typename PortalType>
-struct SOAPortalChooser<ValueType, PortalType, std::false_type>
-{
-  using Type = PortalType;
-};
-
-template <typename ReturnType, typename ValueType, std::size_t NUM_COMPONENTS, typename PortalMaker>
-ReturnType MakeSOAPortal(std::array<vtkm::cont::ArrayHandle<ValueType, vtkm::cont::StorageTagBasic>,
-                                    NUM_COMPONENTS> arrays,
-                         vtkm::Id numValues,
-                         const PortalMaker& portalMaker)
-{
-  ReturnType portal(numValues);
-  for (std::size_t componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
-  {
-    portal.SetPortal(static_cast<vtkm::IdComponent>(componentIndex),
-                     portalMaker(arrays[componentIndex]));
-    VTKM_ASSERT(arrays[componentIndex].GetNumberOfValues() == numValues);
-  }
-  return portal;
-}
-
-template <typename ReturnType, typename ValueType, typename PortalMaker>
-ReturnType MakeSOAPortal(
-  std::array<vtkm::cont::ArrayHandle<ValueType, vtkm::cont::StorageTagBasic>, 1> arrays,
-  vtkm::Id vtkmNotUsed(numValues),
-  const PortalMaker& portalMaker)
-{
-  return portalMaker(arrays[0]);
-}
-
-} // namespace detail
-
-template <typename ValueType>
-struct ArrayHandleSOATraits
-{
-  using VTraits = vtkm::VecTraits<ValueType>;
  using ComponentType = typename VTraits::ComponentType;
-  using BaseArrayType = vtkm::cont::ArrayHandle<ComponentType, vtkm::cont::StorageTagBasic>;
  static constexpr vtkm::IdComponent NUM_COMPONENTS = VTraits::NUM_COMPONENTS;
-  VTKM_STATIC_ASSERT_MSG(NUM_COMPONENTS > 0,
-                         "ArrayHandleSOA requires a type with at least 1 component.");
-
-  using IsTrueVec = std::integral_constant<bool, (NUM_COMPONENTS > 1)>;
-
-  using PortalControl = typename detail::SOAPortalChooser<ValueType,
-                                                          typename BaseArrayType::WritePortalType,
-                                                          IsTrueVec>::Type;
-  using PortalConstControl =
-    typename detail::SOAPortalChooser<ValueType,
-                                      typename BaseArrayType::ReadPortalType,
-                                      IsTrueVec>::Type;
-
-  template <typename Device>
-  using PortalExecution = typename detail::SOAPortalChooser<
-    ValueType,
-    typename BaseArrayType::template ExecutionTypes<Device>::Portal,
-    IsTrueVec>::Type;
-  template <typename Device>
-  using PortalConstExecution = typename detail::SOAPortalChooser<
-    ValueType,
-    typename BaseArrayType::template ExecutionTypes<Device>::PortalConst,
-    IsTrueVec>::Type;
-};
-
-template <typename ValueType_>
-class Storage<ValueType_, vtkm::cont::StorageTagSOA>
-{
-  using Traits = ArrayHandleSOATraits<ValueType_>;
-  static constexpr vtkm::IdComponent NUM_COMPONENTS = Traits::NUM_COMPONENTS;
-  using BaseArrayType = typename Traits::BaseArrayType;
-
-  std::array<BaseArrayType, NUM_COMPONENTS> Arrays;
-
-  VTKM_CONT bool IsValidImpl(std::true_type) const
-  {
-    vtkm::Id size = this->Arrays[0].GetNumberOfValues();
-    for (vtkm::IdComponent componentIndex = 1; componentIndex < NUM_COMPONENTS; ++componentIndex)
-    {
-      if (this->GetArray(componentIndex).GetNumberOfValues() != size)
-      {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  VTKM_CONT constexpr bool IsValidImpl(std::false_type) const { return true; }

 public:
-  using ValueType = ValueType_;
-  using PortalType = typename Traits::PortalControl;
-  using PortalConstType = typename Traits::PortalConstControl;
+  using ReadPortalType =
+    vtkm::internal::ArrayPortalSOA<T, vtkm::internal::ArrayPortalBasicRead<ComponentType>>;
+  using WritePortalType =
+    vtkm::internal::ArrayPortalSOA<T, vtkm::internal::ArrayPortalBasicWrite<ComponentType>>;

-  VTKM_CONT bool IsValid() const { return this->IsValidImpl(typename Traits::IsTrueVec{}); }
+  VTKM_CONT static vtkm::IdComponent GetNumberOfBuffers() { return NUM_COMPONENTS; }

-  Storage() = default;
-
-  VTKM_CONT Storage(std::array<BaseArrayType, NUM_COMPONENTS>&& arrays)
-    : Arrays(std::move(arrays))
-  {
-    VTKM_ASSERT(IsValid());
-  }
-
-  // For this constructor to work, all types have to be
-  // vtkm::cont::ArrayHandle<ValueType, StorageTagBasic>
-  template <typename... ArrayTypes>
-  VTKM_CONT Storage(const BaseArrayType& array0, const ArrayTypes&... arrays)
-    : Arrays{ { array0, arrays... } }
-  {
-    VTKM_ASSERT(IsValid());
-  }
-
-  VTKM_CONT BaseArrayType& GetArray(vtkm::IdComponent index)
-  {
-    return this->Arrays[static_cast<std::size_t>(index)];
-  }
-
-  VTKM_CONT const BaseArrayType& GetArray(vtkm::IdComponent index) const
-  {
-    return this->Arrays[static_cast<std::size_t>(index)];
-  }
-
-  VTKM_CONT std::array<BaseArrayType, NUM_COMPONENTS>& GetArrays() { return this->Arrays; }
-
-  VTKM_CONT const std::array<BaseArrayType, NUM_COMPONENTS>& GetArrays() const
-  {
-    return this->Arrays;
-  }
-
-  VTKM_CONT void SetArray(vtkm::IdComponent index, const BaseArrayType& array)
-  {
-    this->Arrays[static_cast<std::size_t>(index)] = array;
-  }
-
-  VTKM_CONT vtkm::Id GetNumberOfValues() const
-  {
-    VTKM_ASSERT(IsValid());
-    return this->GetArray(0).GetNumberOfValues();
-  }
-
-  VTKM_CONT PortalType GetPortal()
-  {
-    VTKM_ASSERT(this->IsValid());
-    return detail::MakeSOAPortal<PortalType>(
-      this->Arrays, this->GetNumberOfValues(), [](BaseArrayType& array) {
-        return array.WritePortal();
-      });
-  }
-
-  VTKM_CONT PortalConstType GetPortalConst() const
-  {
-    VTKM_ASSERT(this->IsValid());
-    return detail::MakeSOAPortal<PortalConstType>(
-      this->Arrays, this->GetNumberOfValues(), [](const BaseArrayType& array) {
-        return array.ReadPortal();
-      });
-  }
-
-  VTKM_CONT void Allocate(vtkm::Id numValues)
+  VTKM_CONT static void ResizeBuffers(vtkm::Id numValues,
+                                      vtkm::cont::internal::Buffer* buffers,
+                                      vtkm::CopyFlag preserve,
+                                      vtkm::cont::Token& token)
  {
+    vtkm::BufferSizeType numBytes =
+      vtkm::internal::NumberOfValuesToNumberOfBytes<ComponentType>(numValues);
    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
    {
-      this->GetArray(componentIndex).Allocate(numValues);
+      buffers[componentIndex].SetNumberOfBytes(numBytes, preserve, token);
    }
  }

-  VTKM_CONT void Shrink(vtkm::Id numValues)
+  VTKM_CONT static vtkm::Id GetNumberOfValues(const vtkm::cont::internal::Buffer* buffers)
  {
+    // Assume all buffers are the same size.
+    return static_cast<vtkm::Id>(buffers[0].GetNumberOfBytes()) /
+      static_cast<vtkm::Id>(sizeof(ComponentType));
+  }
+
+  VTKM_CONT static ReadPortalType CreateReadPortal(const vtkm::cont::internal::Buffer* buffers,
+                                                   vtkm::cont::DeviceAdapterId device,
+                                                   vtkm::cont::Token& token)
+  {
+    vtkm::Id numValues = GetNumberOfValues(buffers);
+    ReadPortalType portal(numValues);
    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
    {
-      this->GetArray(componentIndex).Shrink(numValues);
+      VTKM_ASSERT(buffers[0].GetNumberOfBytes() == buffers[componentIndex].GetNumberOfBytes());
+      portal.SetPortal(componentIndex,
+                       vtkm::internal::ArrayPortalBasicRead<ComponentType>(
+                         reinterpret_cast<const ComponentType*>(
+                           buffers[componentIndex].ReadPointerDevice(device, token)),
+                         numValues));
    }
+    return portal;
  }

-  VTKM_CONT void ReleaseResources()
+  VTKM_CONT static WritePortalType CreateWritePortal(vtkm::cont::internal::Buffer* buffers,
+                                                     vtkm::cont::DeviceAdapterId device,
+                                                     vtkm::cont::Token& token)
  {
+    vtkm::Id numValues = GetNumberOfValues(buffers);
+    WritePortalType portal(numValues);
    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
    {
-      this->GetArray(componentIndex).ReleaseResources();
-    }
-  }
-};
-
-template <typename ValueType_, typename Device>
-class ArrayTransfer<ValueType_, vtkm::cont::StorageTagSOA, Device>
-{
-  VTKM_IS_DEVICE_ADAPTER_TAG(Device);
-
-  using StorageType = vtkm::cont::internal::Storage<ValueType_, vtkm::cont::StorageTagSOA>;
-
-  using Traits = ArrayHandleSOATraits<ValueType_>;
-  using BaseArrayType = typename Traits::BaseArrayType;
-  static constexpr vtkm::IdComponent NUM_COMPONENTS = Traits::NUM_COMPONENTS;
-
-  StorageType* Storage;
-
-public:
-  using ValueType = ValueType_;
-
-  using PortalControl = typename StorageType::PortalType;
-  using PortalConstControl = typename StorageType::PortalConstType;
-
-  using PortalExecution = typename Traits::template PortalExecution<Device>;
-  using PortalConstExecution = typename Traits::template PortalConstExecution<Device>;
-
-  VTKM_CONT ArrayTransfer(StorageType* storage)
-    : Storage(storage)
-  {
-  }
-
-  VTKM_CONT vtkm::Id GetNumberOfValues() const { return this->Storage->GetNumberOfValues(); }
-
-  VTKM_CONT PortalConstExecution PrepareForInput(bool vtkmNotUsed(updateData),
-                                                 vtkm::cont::Token& token) const
-  {
-    return detail::MakeSOAPortal<PortalConstExecution>(
-      this->Storage->GetArrays(), this->GetNumberOfValues(), [&token](const BaseArrayType& array) {
-        return array.PrepareForInput(Device{}, token);
-      });
-  }
-
-  VTKM_CONT PortalExecution PrepareForInPlace(bool vtkmNotUsed(updateData),
-                                              vtkm::cont::Token& token) const
-  {
-    return detail::MakeSOAPortal<PortalExecution>(
-      this->Storage->GetArrays(), this->GetNumberOfValues(), [&token](BaseArrayType& array) {
-        return array.PrepareForInPlace(Device{}, token);
-      });
-  }
-
-  VTKM_CONT PortalExecution PrepareForOutput(vtkm::Id numValues, vtkm::cont::Token& token) const
-  {
-    return detail::MakeSOAPortal<PortalExecution>(
-      this->Storage->GetArrays(), numValues, [numValues, &token](BaseArrayType& array) {
-        return array.PrepareForOutput(numValues, Device{}, token);
-      });
-  }
-
-  VTKM_CONT
-  void RetrieveOutputData(StorageType* vtkmNotUsed(storage)) const
-  {
-    // Implementation of this method should be unnecessary. The internal
-    // array handle should automatically retrieve the output data as
-    // necessary.
-  }
-
-  VTKM_CONT void Shrink(vtkm::Id numValues)
-  {
-    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
-    {
-      this->Storage->GetArray(componentIndex).Shrink(numValues);
-    }
-  }
-
-  VTKM_CONT void ReleaseResources()
-  {
-    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
-    {
-      this->Storage->GetArray(componentIndex).ReleaseResourcesExecution();
+      VTKM_ASSERT(buffers[0].GetNumberOfBytes() == buffers[componentIndex].GetNumberOfBytes());
+      portal.SetPortal(componentIndex,
+                       vtkm::internal::ArrayPortalBasicWrite<ComponentType>(
+                         reinterpret_cast<ComponentType*>(
+                           buffers[componentIndex].WritePointerDevice(device, token)),
+                         numValues));
    }
+    return portal;
  }
 };

 } // namespace internal

-/// \brief An \c ArrayHandle that for Vecs stores each component in a separate physical array.
+template <typename T>
+VTKM_ARRAY_HANDLE_NEW_STYLE(T, vtkm::cont::StorageTagSOA);
+
+/// \brief An `ArrayHandle` that for Vecs stores each component in a separate physical array.
 ///
-/// \c ArrayHandleSOA behaves like a regular \c ArrayHandle (with a basic storage) except that
-/// if you specify a \c ValueType of a \c Vec or a \c Vec-like, it will actually store each
+/// `ArrayHandleSOA` behaves like a regular `ArrayHandle` (with a basic storage) except that
+/// if you specify a `ValueType` of a `Vec` or a `Vec-like`, it will actually store each
 /// component in a separate physical array. When data are retrieved from the array, they are
-/// reconstructed into \c Vec objects as expected.
+/// reconstructed into `Vec` objects as expected.
 ///
 /// The intention of this array type is to help cover the most common ways data is lain out in
 /// memory. Typically, arrays of data are either an "array of structures" like the basic storage
-/// where you have a single array of structures (like \c Vec) or a "structure of arrays" where
-/// you have an array of a basic type (like \c float) for each component of the data being
-/// represented. The\c ArrayHandleSOA makes it easy to cover this second case without creating
+/// where you have a single array of structures (like `Vec`) or a "structure of arrays" where
+/// you have an array of a basic type (like `float`) for each component of the data being
+/// represented. The `ArrayHandleSOA` makes it easy to cover this second case without creating
 /// special types.
 ///
-/// \c ArrayHandleSOA can be constructed from a collection of \c ArrayHandle with basic storage.
-/// This allows you to construct \c Vec arrays from components without deep copies.
+/// `ArrayHandleSOA` can be constructed from a collection of `ArrayHandle` with basic storage.
+/// This allows you to construct `Vec` arrays from components without deep copies.
 ///
-template <typename ValueType_>
-class ArrayHandleSOA : public ArrayHandle<ValueType_, vtkm::cont::StorageTagSOA>
+template <typename T>
+class ArrayHandleSOA : public ArrayHandle<T, vtkm::cont::StorageTagSOA>
 {
-  using Traits = vtkm::cont::internal::ArrayHandleSOATraits<ValueType_>;
-  using ComponentType = typename Traits::ComponentType;
-  using BaseArrayType = typename Traits::BaseArrayType;
+  using ComponentType = typename vtkm::VecTraits<T>::ComponentType;
+  static constexpr vtkm::IdComponent NUM_COMPONENTS = vtkm::VecTraits<T>::NUM_COMPONENTS;

-  using StorageType = vtkm::cont::internal::Storage<ValueType_, vtkm::cont::StorageTagSOA>;
+  using StorageType = vtkm::cont::internal::Storage<T, vtkm::cont::StorageTagSOA>;
+
+  using ComponentArrayType = vtkm::cont::ArrayHandle<ComponentType, vtkm::cont::StorageTagBasic>;

 public:
  VTKM_ARRAY_HANDLE_SUBCLASS(ArrayHandleSOA,
-                             (ArrayHandleSOA<ValueType_>),
-                             (ArrayHandle<ValueType_, vtkm::cont::StorageTagSOA>));
+                             (ArrayHandleSOA<T>),
+                             (ArrayHandle<T, vtkm::cont::StorageTagSOA>));

-  ArrayHandleSOA(std::array<BaseArrayType, Traits::NUM_COMPONENTS>&& componentArrays)
-    : Superclass(StorageType(std::move(componentArrays)))
+  ArrayHandleSOA(std::initializer_list<vtkm::cont::internal::Buffer>&& componentBuffers)
+    : Superclass(std::move(componentBuffers))
  {
  }

-  ArrayHandleSOA(std::initializer_list<BaseArrayType>&& componentArrays)
+  ArrayHandleSOA(const std::array<ComponentArrayType, NUM_COMPONENTS>& componentArrays)
  {
-    VTKM_ASSERT(componentArrays.size() == Traits::NUM_COMPONENTS);
-    std::copy(
-      componentArrays.begin(), componentArrays.end(), this->GetStorage().GetArrays().begin());
+    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
+    {
+      this->SetArray(componentIndex, componentArrays[componentIndex]);
+    }
+  }
+
+  ArrayHandleSOA(const std::vector<ComponentArrayType>& componentArrays)
+  {
+    VTKM_ASSERT(componentArrays.size() == NUM_COMPONENTS);
+    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
+    {
+      this->SetArray(componentIndex, componentArrays[componentIndex]);
+    }
+  }
+
+  ArrayHandleSOA(std::initializer_list<ComponentArrayType>&& componentArrays)
+  {
+    VTKM_ASSERT(componentArrays.size() == NUM_COMPONENTS);
+    vtkm::IdComponent componentIndex = 0;
+    for (auto&& array : componentArrays)
+    {
+      this->SetArray(componentIndex, array);
+      ++componentIndex;
+    }
  }

  ArrayHandleSOA(std::initializer_list<std::vector<ComponentType>>&& componentVectors)
  {
-    VTKM_ASSERT(componentVectors.size() == Traits::NUM_COMPONENTS);
+    VTKM_ASSERT(componentVectors.size() == NUM_COMPONENTS);
    vtkm::IdComponent componentIndex = 0;
-    for (auto&& vectorIter = componentVectors.begin(); vectorIter != componentVectors.end();
-         ++vectorIter)
+    for (auto&& vector : componentVectors)
    {
      // Note, std::vectors that come from std::initializer_list must be copied because the scope
      // of the objects in the initializer list disappears.
-      this->SetArray(componentIndex, vtkm::cont::make_ArrayHandle(*vectorIter, vtkm::CopyFlag::On));
+      this->SetArray(componentIndex, vtkm::cont::make_ArrayHandle(vector, vtkm::CopyFlag::On));
      ++componentIndex;
    }
  }

  // This only works if all the templated arguments are of type std::vector<ComponentType>.
-  template <typename... RemainingVectors>
+  template <typename Allocator, typename... RemainingVectors>
  ArrayHandleSOA(vtkm::CopyFlag copy,
-                 const std::vector<ComponentType>& vector0,
+                 const std::vector<ComponentType, Allocator>& vector0,
                 RemainingVectors&&... componentVectors)
-    : Superclass(StorageType(
-        vtkm::cont::make_ArrayHandle(vector0, copy),
-        vtkm::cont::make_ArrayHandle(std::forward<RemainingVectors>(componentVectors), copy)...))
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
+        vtkm::cont::make_ArrayHandle(vector0, copy).GetBuffers()[0],
+        vtkm::cont::make_ArrayHandle(std::forward<RemainingVectors>(componentVectors), copy)
+          .GetBuffers()[0]... })
  {
-    VTKM_STATIC_ASSERT(sizeof...(RemainingVectors) + 1 == Traits::NUM_COMPONENTS);
+    VTKM_STATIC_ASSERT(sizeof...(RemainingVectors) + 1 == NUM_COMPONENTS);
  }

  // This only works if all the templated arguments are of type std::vector<ComponentType>.
@ -476,11 +302,12 @@ public:
  ArrayHandleSOA(vtkm::CopyFlag copy,
                 std::vector<ComponentType>&& vector0,
                 RemainingVectors&&... componentVectors)
-    : Superclass(StorageType(
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
        vtkm::cont::make_ArrayHandle(std::move(vector0), copy),
-        vtkm::cont::make_ArrayHandle(std::forward<RemainingVectors>(componentVectors), copy)...))
+        vtkm::cont::make_ArrayHandle(std::forward<RemainingVectors>(componentVectors), copy)
+          .GetBuffers()[0]... })
  {
-    VTKM_STATIC_ASSERT(sizeof...(RemainingVectors) + 1 == Traits::NUM_COMPONENTS);
+    VTKM_STATIC_ASSERT(sizeof...(RemainingVectors) + 1 == NUM_COMPONENTS);
  }

  // This only works if all the templated arguments are of type std::vector<ComponentType>.
@ -492,18 +319,20 @@ public:
 #endif
    ArrayHandleSOA(const std::vector<ComponentType>& vector0,
                   const RemainingVectors&... componentVectors)
-    : Superclass(
-        StorageType(vtkm::cont::make_ArrayHandle(vector0, vtkm::CopyFlag::Off),
-                    vtkm::cont::make_ArrayHandle(componentVectors, vtkm::CopyFlag::Off)...))
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
+        vtkm::cont::make_ArrayHandle(vector0, vtkm::CopyFlag::Off).GetBuffers()[0],
+        vtkm::cont::make_ArrayHandle(std::forward<RemainingVectors>(componentVectors),
+                                     vtkm::CopyFlag::Off)
+          .GetBuffers()[0]... })
  {
-    VTKM_STATIC_ASSERT(sizeof...(RemainingVectors) + 1 == Traits::NUM_COMPONENTS);
+    VTKM_STATIC_ASSERT(sizeof...(RemainingVectors) + 1 == NUM_COMPONENTS);
  }

  ArrayHandleSOA(std::initializer_list<const ComponentType*> componentArrays,
                 vtkm::Id length,
                 vtkm::CopyFlag copy)
  {
-    VTKM_ASSERT(componentArrays.size() == Traits::NUM_COMPONENTS);
+    VTKM_ASSERT(componentArrays.size() == NUM_COMPONENTS);
    vtkm::IdComponent componentIndex = 0;
    for (auto&& vectorIter = componentArrays.begin(); vectorIter != componentArrays.end();
         ++vectorIter)
@ -516,7 +345,7 @@ public:
  VTKM_DEPRECATED(1.6, "Specify a vtkm::CopyFlag or use a move version of make_ArrayHandle.")
  ArrayHandleSOA(std::initializer_list<const ComponentType*> componentArrays, vtkm::Id length)
  {
-    VTKM_ASSERT(componentArrays.size() == Traits::NUM_COMPONENTS);
+    VTKM_ASSERT(componentArrays.size() == NUM_COMPONENTS);
    vtkm::IdComponent componentIndex = 0;
    for (auto&& vectorIter = componentArrays.begin(); vectorIter != componentArrays.end();
         ++vectorIter)
@ -533,10 +362,11 @@ public:
                 vtkm::CopyFlag copy,
                 const ComponentType* array0,
                 const RemainingArrays&... componentArrays)
-    : Superclass(StorageType(vtkm::cont::make_ArrayHandle(array0, length, copy),
-                             vtkm::cont::make_ArrayHandle(componentArrays, length, copy)...))
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
+        vtkm::cont::make_ArrayHandle(array0, length, copy).GetBuffers()[0],
+        vtkm::cont::make_ArrayHandle(componentArrays, length, copy).GetBuffers()[0]... })
  {
-    VTKM_STATIC_ASSERT(sizeof...(RemainingArrays) + 1 == Traits::NUM_COMPONENTS);
+    VTKM_STATIC_ASSERT(sizeof...(RemainingArrays) + 1 == NUM_COMPONENTS);
  }

  // This only works if all the templated arguments are of type std::vector<ComponentType>.
@ -549,26 +379,22 @@ public:
    ArrayHandleSOA(vtkm::Id length,
                   const ComponentType* array0,
                   const RemainingArrays&... componentArrays)
-    : Superclass(
-        StorageType(vtkm::cont::make_ArrayHandle(array0, length, vtkm::CopyFlag::Off),
-                    vtkm::cont::make_ArrayHandle(componentArrays, length, vtkm::CopyFlag::Off)...))
+    : Superclass(std::vector<vtkm::cont::internal::Buffer>{
+        vtkm::cont::make_ArrayHandle(array0, length, vtkm::CopyFlag::Off).GetBuffers()[0],
+        vtkm::cont::make_ArrayHandle(componentArrays, length, vtkm::CopyFlag::Off)
+          .GetBuffers()[0]... })
  {
-    VTKM_STATIC_ASSERT(sizeof...(RemainingArrays) + 1 == Traits::NUM_COMPONENTS);
+    VTKM_STATIC_ASSERT(sizeof...(RemainingArrays) + 1 == NUM_COMPONENTS);
  }

-  VTKM_CONT BaseArrayType& GetArray(vtkm::IdComponent index)
+  VTKM_CONT vtkm::cont::ArrayHandleBasic<ComponentType> GetArray(vtkm::IdComponent index) const
  {
-    return this->GetStorage().GetArray(index);
+    return ComponentArrayType(&this->GetBuffers()[index]);
  }

-  VTKM_CONT const BaseArrayType& GetArray(vtkm::IdComponent index) const
+  VTKM_CONT void SetArray(vtkm::IdComponent index, const ComponentArrayType& array)
  {
-    return this->GetStorage().GetArray(index);
-  }
-
-  VTKM_CONT void SetArray(vtkm::IdComponent index, const BaseArrayType& array)
-  {
-    this->GetStorage().SetArray(index, array);
+    this->SetBuffer(index, array.GetBuffers()[0]);
  }
 };

@ -735,25 +561,24 @@ template <typename ValueType>
 struct Serialization<vtkm::cont::ArrayHandleSOA<ValueType>>
 {
  using BaseType = vtkm::cont::ArrayHandle<ValueType, vtkm::cont::StorageTagSOA>;
-  using Traits = vtkm::cont::internal::ArrayHandleSOATraits<ValueType>;
-  static constexpr vtkm::IdComponent NUM_COMPONENTS = Traits::NUM_COMPONENTS;
+  static constexpr vtkm::IdComponent NUM_COMPONENTS = vtkm::VecTraits<ValueType>::NUM_COMPONENTS;

  static VTKM_CONT void save(BinaryBuffer& bb, const BaseType& obj)
  {
    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
    {
-      vtkmdiy::save(bb, obj.GetStorage().GetArray(componentIndex));
+      vtkmdiy::save(bb, obj.GetBuffers()[componentIndex]);
    }
  }

  static VTKM_CONT void load(BinaryBuffer& bb, BaseType& obj)
  {
-    for (vtkm::IdComponent componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
+    std::vector<vtkm::cont::internal::Buffer> buffers(NUM_COMPONENTS);
+    for (std::size_t componentIndex = 0; componentIndex < NUM_COMPONENTS; ++componentIndex)
    {
-      typename Traits::BaseArrayType componentArray;
-      vtkmdiy::load(bb, componentArray);
-      obj.GetStorage().SetArray(componentIndex, componentArray);
+      vtkmdiy::load(bb, buffers[componentIndex]);
    }
+    obj = BaseType(buffers);
  }
 };

@ -776,10 +601,10 @@ namespace vtkm
 namespace cont
 {

-#define VTKM_ARRAYHANDLE_SOA_EXPORT(Type)                                                          \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<Type, StorageTagSOA>;                \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<vtkm::Vec<Type, 2>, StorageTagSOA>;  \
-  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<vtkm::Vec<Type, 3>, StorageTagSOA>;  \
+#define VTKM_ARRAYHANDLE_SOA_EXPORT(Type)                                                         \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<Type, StorageTagSOA>;               \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<vtkm::Vec<Type, 2>, StorageTagSOA>; \
+  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<vtkm::Vec<Type, 3>, StorageTagSOA>; \
  extern template class VTKM_CONT_TEMPLATE_EXPORT ArrayHandle<vtkm::Vec<Type, 4>, StorageTagSOA>;

 VTKM_ARRAYHANDLE_SOA_EXPORT(char)
--- a/vtkm/cont/ArrayHandleVirtual.cxx
+++ b/vtkm/cont/ArrayHandleVirtual.cxx
@ -15,14 +15,14 @@ namespace vtkm
 namespace cont
 {

-#define VTK_M_ARRAY_HANDLE_VIRTUAL_INSTANTIATE(T)                                                  \
-  template class VTKM_CONT_EXPORT ArrayHandle<T, StorageTagVirtual>;                               \
-  template class VTKM_CONT_EXPORT ArrayHandleVirtual<T>;                                           \
-  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<T, 2>, StorageTagVirtual>;                 \
-  template class VTKM_CONT_EXPORT ArrayHandleVirtual<vtkm::Vec<T, 2>>;                             \
-  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<T, 3>, StorageTagVirtual>;                 \
-  template class VTKM_CONT_EXPORT ArrayHandleVirtual<vtkm::Vec<T, 3>>;                             \
-  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<T, 4>, StorageTagVirtual>;                 \
+#define VTK_M_ARRAY_HANDLE_VIRTUAL_INSTANTIATE(T)                                  \
+  template class VTKM_CONT_EXPORT ArrayHandle<T, StorageTagVirtual>;               \
+  template class VTKM_CONT_EXPORT ArrayHandleVirtual<T>;                           \
+  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<T, 2>, StorageTagVirtual>; \
+  template class VTKM_CONT_EXPORT ArrayHandleVirtual<vtkm::Vec<T, 2>>;             \
+  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<T, 3>, StorageTagVirtual>; \
+  template class VTKM_CONT_EXPORT ArrayHandleVirtual<vtkm::Vec<T, 3>>;             \
+  template class VTKM_CONT_EXPORT ArrayHandle<vtkm::Vec<T, 4>, StorageTagVirtual>; \
  template class VTKM_CONT_EXPORT ArrayHandleVirtual<vtkm::Vec<T, 4>>

 VTK_M_ARRAY_HANDLE_VIRTUAL_INSTANTIATE(char);
--- a/vtkm/cont/ArrayHandleZip.h
+++ b/vtkm/cont/ArrayHandleZip.h
@ -68,9 +68,11 @@ public:
  {
  }

+  VTKM_SUPPRESS_EXEC_WARNINGS
  VTKM_EXEC_CONT
  vtkm::Id GetNumberOfValues() const { return this->PortalFirst.GetNumberOfValues(); }

+  VTKM_SUPPRESS_EXEC_WARNINGS
  template <typename Readable_ = Readable,
            typename = typename std::enable_if<Readable_::value>::type>
  VTKM_EXEC_CONT ValueType Get(vtkm::Id index) const noexcept
@ -78,6 +80,7 @@ public:
    return vtkm::make_Pair(this->PortalFirst.Get(index), this->PortalSecond.Get(index));
  }

+  VTKM_SUPPRESS_EXEC_WARNINGS
  template <typename Writable_ = Writable,
            typename = typename std::enable_if<Writable_::value>::type>
  VTKM_EXEC_CONT void Set(vtkm::Id index, const ValueType& value) const noexcept
--- a/vtkm/cont/ArrayRangeCompute.cxx
+++ b/vtkm/cont/ArrayRangeCompute.cxx
@ -20,22 +20,22 @@ void ThrowArrayRangeComputeFailed()
  throw vtkm::cont::ErrorExecution("Failed to run ArrayRangeComputation on any device.");
 }

-#define VTKM_ARRAY_RANGE_COMPUTE_IMPL_T(T, Storage)                                                \
-  VTKM_CONT                                                                                        \
-  vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute(                                          \
-    const vtkm::cont::ArrayHandle<T, Storage>& input, vtkm::cont::DeviceAdapterId device)          \
-  {                                                                                                \
-    return detail::ArrayRangeComputeImpl(input, device);                                           \
-  }                                                                                                \
+#define VTKM_ARRAY_RANGE_COMPUTE_IMPL_T(T, Storage)                                       \
+  VTKM_CONT                                                                               \
+  vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute(                                 \
+    const vtkm::cont::ArrayHandle<T, Storage>& input, vtkm::cont::DeviceAdapterId device) \
+  {                                                                                       \
+    return detail::ArrayRangeComputeImpl(input, device);                                  \
+  }                                                                                       \
  struct SwallowSemicolon
-#define VTKM_ARRAY_RANGE_COMPUTE_IMPL_VEC(T, N, Storage)                                           \
-  VTKM_CONT                                                                                        \
-  vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute(                                          \
-    const vtkm::cont::ArrayHandle<vtkm::Vec<T, N>, Storage>& input,                                \
-    vtkm::cont::DeviceAdapterId device)                                                            \
-  {                                                                                                \
-    return detail::ArrayRangeComputeImpl(input, device);                                           \
-  }                                                                                                \
+#define VTKM_ARRAY_RANGE_COMPUTE_IMPL_VEC(T, N, Storage)            \
+  VTKM_CONT                                                         \
+  vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute(           \
+    const vtkm::cont::ArrayHandle<vtkm::Vec<T, N>, Storage>& input, \
+    vtkm::cont::DeviceAdapterId device)                             \
+  {                                                                 \
+    return detail::ArrayRangeComputeImpl(input, device);            \
+  }                                                                 \
  struct SwallowSemicolon

 VTKM_ARRAY_RANGE_COMPUTE_IMPL_T(char, vtkm::cont::StorageTagBasic);
--- a/vtkm/cont/ArrayRangeCompute.h
+++ b/vtkm/cont/ArrayRangeCompute.h
@ -43,17 +43,17 @@ VTKM_CONT vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute(
  vtkm::cont::DeviceAdapterId device = vtkm::cont::DeviceAdapterTagAny());

 // Precompiled versions of ArrayRangeCompute
-#define VTK_M_ARRAY_RANGE_COMPUTE_EXPORT_T(T, Storage)                                             \
-  VTKM_CONT_EXPORT                                                                                 \
-  VTKM_CONT                                                                                        \
-  vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute(                                          \
-    const vtkm::cont::ArrayHandle<T, Storage>& input,                                              \
+#define VTK_M_ARRAY_RANGE_COMPUTE_EXPORT_T(T, Storage)    \
+  VTKM_CONT_EXPORT                                        \
+  VTKM_CONT                                               \
+  vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute( \
+    const vtkm::cont::ArrayHandle<T, Storage>& input,     \
    vtkm::cont::DeviceAdapterId device = vtkm::cont::DeviceAdapterTagAny())
-#define VTK_M_ARRAY_RANGE_COMPUTE_EXPORT_VEC(T, N, Storage)                                        \
-  VTKM_CONT_EXPORT                                                                                 \
-  VTKM_CONT                                                                                        \
-  vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute(                                          \
-    const vtkm::cont::ArrayHandle<vtkm::Vec<T, N>, Storage>& input,                                \
+#define VTK_M_ARRAY_RANGE_COMPUTE_EXPORT_VEC(T, N, Storage)         \
+  VTKM_CONT_EXPORT                                                  \
+  VTKM_CONT                                                         \
+  vtkm::cont::ArrayHandle<vtkm::Range> ArrayRangeCompute(           \
+    const vtkm::cont::ArrayHandle<vtkm::Vec<T, N>, Storage>& input, \
    vtkm::cont::DeviceAdapterId device = vtkm::cont::DeviceAdapterTagAny())

 VTK_M_ARRAY_RANGE_COMPUTE_EXPORT_T(char, vtkm::cont::StorageTagBasic);
--- a/vtkm/cont/AssignerPartitionedDataSet.cxx
+++ b/vtkm/cont/AssignerPartitionedDataSet.cxx
@ -50,9 +50,7 @@ AssignerPartitionedDataSet::AssignerPartitionedDataSet(vtkm::Id num_partitions)
 }

 VTKM_CONT
-AssignerPartitionedDataSet::~AssignerPartitionedDataSet()
-{
-}
+AssignerPartitionedDataSet::~AssignerPartitionedDataSet() {}

 VTKM_CONT
 void AssignerPartitionedDataSet::local_gids(int my_rank, std::vector<int>& gids) const
--- a/vtkm/cont/BitField.cxx
+++ b/vtkm/cont/BitField.cxx
@ -0,0 +1,187 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+
+#include <vtkm/cont/BitField.h>
+#include <vtkm/cont/Logging.h>
+
+namespace
+{
+
+struct DeviceCheckFunctor
+{
+  vtkm::cont::DeviceAdapterId FoundDevice = vtkm::cont::DeviceAdapterTagUndefined{};
+
+  VTKM_CONT void operator()(vtkm::cont::DeviceAdapterId device,
+                            const vtkm::cont::internal::Buffer& buffer)
+  {
+    if (this->FoundDevice == vtkm::cont::DeviceAdapterTagUndefined{})
+    {
+      if (buffer.IsAllocatedOnDevice(device))
+      {
+        this->FoundDevice = device;
+      }
+    }
+  }
+};
+
+} // anonymous namespace
+
+namespace vtkm
+{
+namespace cont
+{
+
+namespace detail
+{
+
+vtkm::cont::internal::BitFieldMetaData* GetBitFieldMetaData(
+  const vtkm::cont::internal::Buffer& buffer)
+{
+  vtkm::cont::internal::BufferMetaData* generalMetaData = buffer.GetMetaData();
+  if (generalMetaData == nullptr)
+  {
+    VTKM_LOG_F(vtkm::cont::LogLevel::Warn, "BitField has buffer with no metadata. Setting.");
+    const_cast<vtkm::cont::internal::Buffer&>(buffer).SetMetaData(
+      vtkm::cont::internal::BitFieldMetaData{});
+    generalMetaData = buffer.GetMetaData();
+    VTKM_ASSERT(generalMetaData != nullptr);
+  }
+
+  vtkm::cont::internal::BitFieldMetaData* metadata =
+    dynamic_cast<vtkm::cont::internal::BitFieldMetaData*>(generalMetaData);
+  if (metadata == nullptr)
+  {
+    VTKM_LOG_F(vtkm::cont::LogLevel::Error,
+               "BitField has a buffer with metadata of the wrong type. "
+               "Replacing, but this will likely cause problems.");
+    const_cast<vtkm::cont::internal::Buffer&>(buffer).SetMetaData(
+      vtkm::cont::internal::BitFieldMetaData{});
+    generalMetaData = buffer.GetMetaData();
+    metadata = dynamic_cast<vtkm::cont::internal::BitFieldMetaData*>(generalMetaData);
+    VTKM_ASSERT(metadata != nullptr);
+  }
+
+  // Check to make sure that the buffer is at least as larger as needed for buffer size.
+  VTKM_ASSERT(buffer.GetNumberOfBytes() * CHAR_BIT >= metadata->NumberOfBits);
+
+  return metadata;
+}
+
+} // namespace detail
+
+namespace internal
+{
+
+BitFieldMetaData::~BitFieldMetaData() {}
+
+std::unique_ptr<vtkm::cont::internal::BufferMetaData> BitFieldMetaData::DeepCopy() const
+{
+  return std::unique_ptr<vtkm::cont::internal::BufferMetaData>(new BitFieldMetaData(*this));
+}
+
+} // namespace internal
+
+BitField::BitField()
+{
+  this->Buffer.SetMetaData(internal::BitFieldMetaData{});
+}
+
+vtkm::Id BitField::GetNumberOfBits() const
+{
+  auto metadata = detail::GetBitFieldMetaData(this->Buffer);
+  return metadata->NumberOfBits;
+}
+
+void BitField::Allocate(vtkm::Id numberOfBits,
+                        vtkm::CopyFlag preserve,
+                        vtkm::cont::Token& token) const
+{
+  const vtkm::BufferSizeType bytesNeeded = (numberOfBits + CHAR_BIT - 1) / CHAR_BIT;
+  const vtkm::BufferSizeType blocksNeeded = (bytesNeeded + BlockSize - 1) / BlockSize;
+  const vtkm::BufferSizeType numBytes = blocksNeeded * BlockSize;
+
+  VTKM_LOG_F(vtkm::cont::LogLevel::MemCont,
+             "BitField Allocation: %llu bits, blocked up to %s bytes.",
+             static_cast<unsigned long long>(numberOfBits),
+             vtkm::cont::GetSizeString(static_cast<vtkm::UInt64>(numBytes)).c_str());
+
+  this->Buffer.SetNumberOfBytes(numBytes, preserve, token);
+  detail::GetBitFieldMetaData(this->Buffer)->NumberOfBits = numberOfBits;
+}
+
+void BitField::ReleaseResourcesExecution()
+{
+  this->Buffer.ReleaseDeviceResources();
+}
+
+void BitField::ReleaseResources()
+{
+  vtkm::cont::Token token;
+  this->Buffer.SetNumberOfBytes(0, vtkm::CopyFlag::Off, token);
+  detail::GetBitFieldMetaData(this->Buffer)->NumberOfBits = 0;
+}
+
+void BitField::SyncControlArray() const
+{
+  vtkm::cont::Token token;
+  this->Buffer.ReadPointerHost(token);
+}
+
+bool BitField::IsOnDevice(vtkm::cont::DeviceAdapterId device) const
+{
+  return this->Buffer.IsAllocatedOnDevice(device);
+}
+
+vtkm::cont::DeviceAdapterId BitField::GetDeviceAdapterId() const
+{
+  DeviceCheckFunctor functor;
+  vtkm::ListForEach(functor, VTKM_DEFAULT_DEVICE_ADAPTER_LIST{}, this->Buffer);
+  return functor.FoundDevice;
+}
+
+BitField::WritePortalType BitField::WritePortal() const
+{
+  vtkm::cont::Token token;
+  return WritePortalType(this->Buffer.WritePointerHost(token),
+                         detail::GetBitFieldMetaData(this->Buffer)->NumberOfBits);
+}
+
+BitField::ReadPortalType BitField::ReadPortal() const
+{
+  vtkm::cont::Token token;
+  return ReadPortalType(this->Buffer.ReadPointerHost(token),
+                        detail::GetBitFieldMetaData(this->Buffer)->NumberOfBits);
+}
+
+BitField::ReadPortalType BitField::PrepareForInput(vtkm::cont::DeviceAdapterId device,
+                                                   vtkm::cont::Token& token) const
+{
+  return ReadPortalType(this->Buffer.ReadPointerDevice(device, token),
+                        detail::GetBitFieldMetaData(this->Buffer)->NumberOfBits);
+}
+
+BitField::WritePortalType BitField::PrepareForOutput(vtkm::Id numBits,
+                                                     vtkm::cont::DeviceAdapterId device,
+                                                     vtkm::cont::Token& token) const
+{
+  this->Allocate(numBits, vtkm::CopyFlag::Off, token);
+  return WritePortalType(this->Buffer.WritePointerDevice(device, token),
+                         detail::GetBitFieldMetaData(this->Buffer)->NumberOfBits);
+}
+
+BitField::WritePortalType BitField::PrepareForInPlace(vtkm::cont::DeviceAdapterId device,
+                                                      vtkm::cont::Token& token) const
+{
+  return WritePortalType(this->Buffer.WritePointerDevice(device, token),
+                         detail::GetBitFieldMetaData(this->Buffer)->NumberOfBits);
+}
+
+}
+} // namespace vtkm::cont
--- a/vtkm/cont/BitField.h
+++ b/vtkm/cont/BitField.h
@ -11,12 +11,10 @@
 #ifndef vtk_m_cont_BitField_h
 #define vtk_m_cont_BitField_h

-#include <vtkm/cont/internal/AtomicInterfaceControl.h>
-#include <vtkm/cont/internal/AtomicInterfaceExecution.h>
-
 #include <vtkm/cont/ArrayHandle.h>
-#include <vtkm/cont/Logging.h>
+#include <vtkm/cont/vtkm_cont_export.h>

+#include <vtkm/Atomic.h>
 #include <vtkm/Deprecated.h>
 #include <vtkm/List.h>
 #include <vtkm/Types.h>
@ -33,6 +31,22 @@ namespace cont

 class BitField;

+namespace internal
+{
+
+struct StorageTagBitField;
+
+struct VTKM_CONT_EXPORT BitFieldMetaData : vtkm::cont::internal::BufferMetaData
+{
+  vtkm::Id NumberOfBits = 0;
+
+  VTKM_CONT ~BitFieldMetaData() override;
+
+  VTKM_CONT std::unique_ptr<vtkm::cont::internal::BufferMetaData> DeepCopy() const override;
+};
+
+}
+
 namespace detail
 {

@ -61,7 +75,7 @@ struct BitFieldTraits

  /// Require an unsigned integral type that is <= BlockSize bytes, and is
  /// is supported by the specified AtomicInterface.
-  template <typename WordType, typename AtomicInterface>
+  template <typename WordType>
  using IsValidWordTypeAtomic =
    std::integral_constant<bool,
                           /* is unsigned */
@ -71,7 +85,7 @@ struct BitFieldTraits
                             /* BlockSize is a multiple of WordType */
                             static_cast<size_t>(BlockSize) % sizeof(WordType) == 0 &&
                             /* Supported by atomic interface */
-                             vtkm::ListHas<typename AtomicInterface::WordTypes, WordType>::value>;
+                             vtkm::ListHas<vtkm::AtomicTypesSupported, WordType>::value>;
 };

 /// Identifies a bit in a BitField by Word and BitOffset. Note that these
@ -85,10 +99,13 @@ struct BitCoordinate
  vtkm::Int32 BitOffset; // [0, bitsInWord)
 };

+VTKM_CONT_EXPORT vtkm::cont::internal::BitFieldMetaData* GetBitFieldMetaData(
+  const vtkm::cont::internal::Buffer& buffer);
+
 /// Portal for performing bit or word operations on a BitField.
 ///
 /// This is the implementation used by BitPortal and BitPortalConst.
-template <typename AtomicInterface_, bool IsConst>
+template <bool IsConst>
 class BitPortalBase
 {
  // Checks if PortalType has a GetIteratorBegin() method that returns a
@ -105,12 +122,8 @@ class BitPortalBase
  using BufferType = MaybeConstPointer<void>; // void* or void const*, as appropriate

 public:
-  /// The atomic interface used to carry out atomic operations. See
-  /// AtomicInterfaceExecution<Device> and AtomicInterfaceControl
-  using AtomicInterface = AtomicInterface_;
-
  /// The fastest word type for performing bitwise operations through AtomicInterface.
-  using WordTypePreferred = typename AtomicInterface::WordTypePreferred;
+  using WordTypePreferred = vtkm::AtomicTypePreferred;

  /// MPL check for whether a WordType may be used for non-atomic operations.
  template <typename WordType>
@ -118,7 +131,7 @@ public:

  /// MPL check for whether a WordType may be used for atomic operations.
  template <typename WordType>
-  using IsValidWordTypeAtomic = BitFieldTraits::IsValidWordTypeAtomic<WordType, AtomicInterface>;
+  using IsValidWordTypeAtomic = BitFieldTraits::IsValidWordTypeAtomic<WordType>;

  VTKM_STATIC_ASSERT_MSG(IsValidWordType<WordTypeDefault>::value,
                         "Internal error: Default word type is invalid.");
@ -132,16 +145,13 @@ public:

 protected:
  friend class vtkm::cont::BitField;
+  friend class vtkm::cont::internal::Storage<bool, vtkm::cont::internal::StorageTagBitField>;

-  /// Construct a BitPortal from an ArrayHandle with basic storage's portal.
-  template <typename PortalType>
-  VTKM_EXEC_CONT BitPortalBase(const PortalType& portal, vtkm::Id numberOfBits)
-    : Data{ portal.GetIteratorBegin() }
+  /// Construct a BitPortal from a raw array.
+  VTKM_CONT BitPortalBase(BufferType rawArray, vtkm::Id numberOfBits)
+    : Data{ rawArray }
    , NumberOfBits{ numberOfBits }
  {
-    VTKM_STATIC_ASSERT_MSG(HasPointerAccess<PortalType>::value,
-                           "Source portal must return a pointer from "
-                           "GetIteratorBegin().");
  }

 public:
@ -281,7 +291,7 @@ public:
    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
                           "Requested WordType does not support atomic"
                           " operations on target execution platform.");
-    AtomicInterface::Store(this->GetWordAddress<WordType>(wordIdx), word);
+    vtkm::AtomicStore(this->GetWordAddress<WordType>(wordIdx), word);
  }

  /// Get the word (of type @a WordType) at @a wordIdx using non-atomic
@ -300,7 +310,7 @@ public:
    VTKM_STATIC_ASSERT_MSG(IsValidWordTypeAtomic<WordType>::value,
                           "Requested WordType does not support atomic"
                           " operations on target execution platform.");
-    return AtomicInterface::Load(this->GetWordAddress<WordType>(wordIdx));
+    return vtkm::AtomicLoad(this->GetWordAddress<WordType>(wordIdx));
  }

  /// Toggle the bit at @a bitIdx, returning the original value. This method
@ -326,7 +336,7 @@ public:
                           "Requested WordType does not support atomic"
                           " operations on target execution platform.");
    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
-    return AtomicInterface::Not(addr);
+    return vtkm::AtomicNot(addr);
  }

  /// Perform an "and" operation between the bit at @a bitIdx and @a val,
@ -356,7 +366,7 @@ public:
                           "Requested WordType does not support atomic"
                           " operations on target execution platform.");
    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
-    return AtomicInterface::And(addr, wordmask);
+    return vtkm::AtomicAnd(addr, wordmask);
  }

  /// Perform an "of" operation between the bit at @a bitIdx and @a val,
@ -386,7 +396,7 @@ public:
                           "Requested WordType does not support atomic"
                           " operations on target execution platform.");
    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
-    return AtomicInterface::Or(addr, wordmask);
+    return vtkm::AtomicOr(addr, wordmask);
  }

  /// Perform an "xor" operation between the bit at @a bitIdx and @a val,
@ -416,7 +426,7 @@ public:
                           "Requested WordType does not support atomic"
                           " operations on target execution platform.");
    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
-    return AtomicInterface::Xor(addr, wordmask);
+    return vtkm::AtomicXor(addr, wordmask);
  }

  /// Perform an atomic compare-and-swap operation on the bit at @a bitIdx.
@ -469,7 +479,7 @@ public:
                           "Requested WordType does not support atomic"
                           " operations on target execution platform.");
    WordType* addr = this->GetWordAddress<WordType>(wordIdx);
-    return AtomicInterface::CompareAndSwap(addr, newWord, expected);
+    return vtkm::AtomicCompareAndSwap(addr, newWord, expected);
  }

 private:
@ -484,70 +494,75 @@ private:
  vtkm::Id NumberOfBits{ 0 };
 };

-template <typename AtomicOps>
-using BitPortal = BitPortalBase<AtomicOps, false>;
+using BitPortal = BitPortalBase<false>;

-template <typename AtomicOps>
-using BitPortalConst = BitPortalBase<AtomicOps, true>;
+using BitPortalConst = BitPortalBase<true>;
+
+template <typename WordType, typename Device>
+struct IsValidWordTypeDeprecated
+{
+  using type VTKM_DEPRECATED(
+    1.6,
+    "BitField::IsValidWordTypeAtomic no longer takes a second Device parameter.") =
+    detail::BitFieldTraits::IsValidWordTypeAtomic<WordType>;
+};
+
+template <typename WordType>
+struct IsValidWordTypeDeprecated<WordType, void>
+{
+  using type = detail::BitFieldTraits::IsValidWordTypeAtomic<WordType>;
+};

 } // end namespace detail

-class BitField
+class VTKM_CONT_EXPORT BitField
 {
  static constexpr vtkm::Id BlockSize = detail::BitFieldTraits::BlockSize;

 public:
  /// The type array handle used to store the bit data internally:
-  using ArrayHandleType = ArrayHandle<WordTypeDefault, StorageTagBasic>;
+  using ArrayHandleType VTKM_DEPRECATED(1.6, "BitField now uses a Buffer to store data.") =
+    ArrayHandle<vtkm::WordTypeDefault, StorageTagBasic>;

  /// The BitPortal used in the control environment.
-  using WritePortalType = detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>;
+  using WritePortalType = detail::BitPortal;

  /// A read-only BitPortal used in the control environment.
-  using ReadPortalType = detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>;
+  using ReadPortalType = detail::BitPortalConst;

-  using PortalControl VTKM_DEPRECATED(1.6, "Use BitField::WritePortalType instead.") =
-    detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>;
+  using PortalControl VTKM_DEPRECATED(1.6,
+                                      "Use BitField::WritePortalType instead.") = detail::BitPortal;
  using PortalConstControl VTKM_DEPRECATED(1.6, "Use ArrayBitField::ReadPortalType instead.") =
-    detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>;
+    detail::BitPortalConst;

  template <typename Device>
  struct ExecutionTypes
  {
-    /// The AtomicInterfaceExecution implementation used by the specified device.
-    using AtomicInterface = vtkm::cont::internal::AtomicInterfaceExecution<Device>;
-
    /// The preferred word type used by the specified device.
-    using WordTypePreferred = typename AtomicInterface::WordTypePreferred;
+    using WordTypePreferred = vtkm::AtomicTypePreferred;

    /// A BitPortal that is usable on the specified device.
-    using Portal = detail::BitPortal<AtomicInterface>;
+    using Portal = detail::BitPortal;

    /// A read-only BitPortal that is usable on the specified device.
-    using PortalConst = detail::BitPortalConst<AtomicInterface>;
+    using PortalConst = detail::BitPortalConst;
  };

  /// Check whether a word type is valid for non-atomic operations.
  template <typename WordType>
  using IsValidWordType = detail::BitFieldTraits::IsValidWordType<WordType>;

-  /// Check whether a word type is valid for atomic operations on a specific
-  /// device.
-  template <typename WordType, typename Device>
-  using IsValidWordTypeAtomic = detail::BitFieldTraits::
-    IsValidWordTypeAtomic<WordType, vtkm::cont::internal::AtomicInterfaceExecution<Device>>;
+  /// Check whether a word type is valid for atomic operations.
+  template <typename WordType, typename Device = void>
+  using IsValidWordTypeAtomic = detail::BitFieldTraits::IsValidWordTypeAtomic<WordType>;

  /// Check whether a word type is valid for atomic operations from the control
  /// environment.
  template <typename WordType>
-  using IsValidWordTypeAtomicControl =
-    detail::BitFieldTraits::IsValidWordTypeAtomic<WordType,
-                                                  vtkm::cont::internal::AtomicInterfaceControl>;
+  using IsValidWordTypeAtomicControl VTKM_DEPRECATED(1.6, "Use IsValidWordTypeAtomic instead.") =
+    detail::BitFieldTraits::IsValidWordTypeAtomic<WordType>;

-  VTKM_CONT BitField()
-    : Internals{ std::make_shared<InternalStruct>() }
-  {
-  }
+  VTKM_CONT BitField();
  VTKM_CONT BitField(const BitField&) = default;
  VTKM_CONT BitField(BitField&&) noexcept = default;
  VTKM_CONT ~BitField() = default;
@ -555,22 +570,23 @@ public:
  VTKM_CONT BitField& operator=(BitField&&) noexcept = default;

  VTKM_CONT
-  bool operator==(const BitField& rhs) const { return this->Internals == rhs.Internals; }
+  bool operator==(const BitField& rhs) const { return this->Buffer == rhs.Buffer; }

  VTKM_CONT
-  bool operator!=(const BitField& rhs) const { return this->Internals != rhs.Internals; }
+  bool operator!=(const BitField& rhs) const { return this->Buffer != rhs.Buffer; }
+
+  /// Return the internal `Buffer` used to store the `BitField`.
+  VTKM_CONT vtkm::cont::internal::Buffer GetBuffer() const { return this->Buffer; }

  /// Return the internal ArrayHandle used to store the BitField.
-  VTKM_CONT
-  ArrayHandleType& GetData() { return this->Internals->Data; }
-
-  /// Return the internal ArrayHandle used to store the BitField.
-  VTKM_CONT
-  const ArrayHandleType& GetData() const { return this->Internals->Data; }
+  VTKM_CONT VTKM_DEPRECATED(1.6, "BitField now uses a Buffer to store data.")
+    ArrayHandle<vtkm::WordTypeDefault, StorageTagBasic> GetData() const
+  {
+    return vtkm::cont::ArrayHandle<vtkm::WordTypeDefault, StorageTagBasic>(&this->Buffer);
+  }

  /// Return the number of bits stored by this BitField.
-  VTKM_CONT
-  vtkm::Id GetNumberOfBits() const { return this->Internals->NumberOfBits; }
+  VTKM_CONT vtkm::Id GetNumberOfBits() const;

  /// Return the number of words (of @a WordType) stored in this bit fields.
  ///
@ -579,85 +595,69 @@ public:
  {
    VTKM_STATIC_ASSERT(IsValidWordType<WordType>::value);
    static constexpr vtkm::Id WordBits = static_cast<vtkm::Id>(sizeof(WordType) * CHAR_BIT);
-    return (this->Internals->NumberOfBits + WordBits - 1) / WordBits;
+    return (this->GetNumberOfBits() + WordBits - 1) / WordBits;
  }

  /// Allocate the requested number of bits.
-  VTKM_CONT
-  void Allocate(vtkm::Id numberOfBits)
+  VTKM_CONT void Allocate(vtkm::Id numberOfBits,
+                          vtkm::CopyFlag preserve,
+                          vtkm::cont::Token& token) const;
+
+  /// Allocate the requested number of bits.
+  VTKM_CONT void Allocate(vtkm::Id numberOfBits,
+                          vtkm::CopyFlag preserve = vtkm::CopyFlag::Off) const
  {
-    const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numberOfBits);
-
-    VTKM_LOG_F(vtkm::cont::LogLevel::MemCont,
-               "BitField Allocation: %llu bits, blocked up to %s.",
-               static_cast<unsigned long long>(numberOfBits),
-               vtkm::cont::GetSizeString(
-                 static_cast<vtkm::UInt64>(static_cast<size_t>(numWords) * sizeof(WordTypeDefault)))
-                 .c_str());
-
-    this->Internals->Data.Allocate(numWords);
-    this->Internals->NumberOfBits = numberOfBits;
+    vtkm::cont::Token token;
+    this->Allocate(numberOfBits, preserve, token);
  }

  /// Shrink the bit field to the requested number of bits.
-  VTKM_CONT
-  void Shrink(vtkm::Id numberOfBits)
+  VTKM_CONT VTKM_DEPRECATED(1.6,
+                            "Use Allocate with preserve = On.") void Shrink(vtkm::Id numberOfBits)
  {
-    const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numberOfBits);
-    this->Internals->Data.Shrink(numWords);
-    this->Internals->NumberOfBits = numberOfBits;
+    this->Allocate(numberOfBits, vtkm::CopyFlag::On);
  }

  /// Release all execution-side resources held by this BitField.
-  VTKM_CONT
-  void ReleaseResourcesExecution() { this->Internals->Data.ReleaseResourcesExecution(); }
+  VTKM_CONT void ReleaseResourcesExecution();

  /// Release all resources held by this BitField and reset to empty.
-  VTKM_CONT
-  void ReleaseResources()
-  {
-    this->Internals->Data.ReleaseResources();
-    this->Internals->NumberOfBits = 0;
-  }
+  VTKM_CONT void ReleaseResources();

  /// Force the control array to sync with the last-used device.
-  VTKM_CONT
-  void SyncControlArray() const { this->Internals->Data.SyncControlArray(); }
+  VTKM_CONT void SyncControlArray() const;

-  /// The id of the device where the most up-to-date copy of the data is
-  /// currently resident. If the data is on the host, DeviceAdapterTagUndefined
-  /// is returned.
-  VTKM_CONT
-  DeviceAdapterId GetDeviceAdapterId() const { return this->Internals->Data.GetDeviceAdapterId(); }
+  /// Returns true if the `BitField`'s data is on the given device. If the data are on the given
+  /// device, then preparing for that device should not require any data movement.
+  ///
+  VTKM_CONT bool IsOnDevice(vtkm::cont::DeviceAdapterId device) const;
+
+  /// Returns true if the `BitField`'s data is on the host. If the data are on the given
+  /// device, then calling `ReadPortal` or `WritePortal` should not require any data movement.
+  ///
+  VTKM_CONT bool IsOnHost() const
+  {
+    return this->IsOnDevice(vtkm::cont::DeviceAdapterTagUndefined{});
+  }
+
+  VTKM_CONT VTKM_DEPRECATED(1.6, "Data can be on multiple devices. Use IsOnDevice.")
+    vtkm::cont::DeviceAdapterId GetDeviceAdapterId() const;

  /// \brief Get a portal to the data that is usable from the control environment.
  ///
  /// As long as this portal is in scope, no one else will be able to read or write the BitField.
-  VTKM_CONT WritePortalType WritePortal() const
-  {
-    auto dataPortal = this->Internals->Data.WritePortal();
-    return WritePortalType{ dataPortal, this->Internals->NumberOfBits };
-  }
+  VTKM_CONT WritePortalType WritePortal() const;

  /// \brief Get a read-only portal to the data that is usable from the control environment.
  ///
  /// As long as this portal is in scope, no one else will be able to write in the BitField.
-  VTKM_CONT ReadPortalType ReadPortal() const
-  {
-    auto dataPortal = this->Internals->Data.ReadPortal();
-    return ReadPortalType{ dataPortal, this->Internals->NumberOfBits };
-  }
+  VTKM_CONT ReadPortalType ReadPortal() const;

  VTKM_CONT
  VTKM_DEPRECATED(1.6,
                  "Use BitField::WritePortal() instead. "
                  "Note that the returned portal will lock the array while it is in scope.")
-  detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl> GetPortalControl()
-  {
-    return detail::BitPortal<vtkm::cont::internal::AtomicInterfaceControl>{
-      this->Internals->Data.WritePortal(), this->Internals->NumberOfBits
-    };
-  }
+  detail::BitPortal GetPortalControl() { return this->WritePortal(); }

  /// Get a read-only portal to the data that is usable from the control
  /// environment.
@ -665,27 +665,15 @@ public:
  VTKM_DEPRECATED(1.6,
                  "Use BitField::ReadPortal() instead. "
                  "Note that the returned portal will lock the array while it is in scope.")
-  detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl> GetPortalConstControl() const
-  {
-    return detail::BitPortalConst<vtkm::cont::internal::AtomicInterfaceControl>{
-      this->Internals->Data.ReadPortal(), this->Internals->NumberOfBits
-    };
-  }
+  detail::BitPortalConst GetPortalConstControl() const { return this->ReadPortal(); }

  /// Prepares this BitField to be used as an input to an operation in the
  /// execution environment. If necessary, copies data to the execution
  /// environment. Can throw an exception if this BitField does not yet contain
  /// any data. Returns a portal that can be used in code running in the
  /// execution environment.
-  template <typename DeviceAdapterTag>
-  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::PortalConst PrepareForInput(
-    DeviceAdapterTag device,
-    vtkm::cont::Token& token) const
-  {
-    using PortalType = typename ExecutionTypes<DeviceAdapterTag>::PortalConst;
-    return PortalType{ this->Internals->Data.PrepareForInput(device, token),
-                       this->Internals->NumberOfBits };
-  }
+  VTKM_CONT ReadPortalType PrepareForInput(vtkm::cont::DeviceAdapterId device,
+                                           vtkm::cont::Token& token) const;

  template <typename DeviceAdapterTag>
  VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForInput now requires a vtkm::cont::Token object.")
@ -702,24 +690,9 @@ public:
  /// that the array will be filled soon (i.e. before any other methods of this
  /// object are called). Returns a portal that can be used in code running in
  /// the execution environment.
-  template <typename DeviceAdapterTag>
-  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal
-  PrepareForOutput(vtkm::Id numBits, DeviceAdapterTag device, vtkm::cont::Token& token) const
-  {
-    using PortalType = typename ExecutionTypes<DeviceAdapterTag>::Portal;
-    const vtkm::Id numWords = this->BitsToAllocatedStorageWords(numBits);
-
-    VTKM_LOG_F(vtkm::cont::LogLevel::MemExec,
-               "BitField Allocation: %llu bits, blocked up to %s.",
-               static_cast<unsigned long long>(numBits),
-               vtkm::cont::GetSizeString(
-                 static_cast<vtkm::UInt64>(static_cast<size_t>(numWords) * sizeof(WordTypeDefault)))
-                 .c_str());
-
-    auto portal = this->Internals->Data.PrepareForOutput(numWords, device, token);
-    this->Internals->NumberOfBits = numBits;
-    return PortalType{ portal, numBits };
-  }
+  VTKM_CONT WritePortalType PrepareForOutput(vtkm::Id numBits,
+                                             vtkm::cont::DeviceAdapterId device,
+                                             vtkm::cont::Token& token) const;

  template <typename DeviceAdapterTag>
  VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForOutput now requires a vtkm::cont::Token object.")
@ -735,15 +708,8 @@ public:
  /// the execution environment. Can throw an exception if this BitField does
  /// not yet contain any data. Returns a portal that can be used in code
  /// running in the execution environment.
-  template <typename DeviceAdapterTag>
-  VTKM_CONT typename ExecutionTypes<DeviceAdapterTag>::Portal PrepareForInPlace(
-    DeviceAdapterTag device,
-    vtkm::cont::Token& token) const
-  {
-    using PortalType = typename ExecutionTypes<DeviceAdapterTag>::Portal;
-    return PortalType{ this->Internals->Data.PrepareForInPlace(device, token),
-                       this->Internals->NumberOfBits };
-  }
+  VTKM_CONT WritePortalType PrepareForInPlace(vtkm::cont::DeviceAdapterId device,
+                                              vtkm::cont::Token& token) const;

  template <typename DeviceAdapterTag>
  VTKM_CONT VTKM_DEPRECATED(1.6, "PrepareForInPlace now requires a vtkm::cont::Token object.")
@ -755,27 +721,7 @@ public:
  }

 private:
-  /// Returns the number of words, padded out to respect BlockSize.
-  VTKM_CONT
-  static vtkm::Id BitsToAllocatedStorageWords(vtkm::Id numBits)
-  {
-    static constexpr vtkm::Id InternalWordSize = static_cast<vtkm::Id>(sizeof(WordTypeDefault));
-
-    // Round up to BlockSize bytes:
-    const vtkm::Id bytesNeeded = (numBits + CHAR_BIT - 1) / CHAR_BIT;
-    const vtkm::Id blocksNeeded = (bytesNeeded + BlockSize - 1) / BlockSize;
-    const vtkm::Id numBytes = blocksNeeded * BlockSize;
-    const vtkm::Id numWords = numBytes / InternalWordSize;
-    return numWords;
-  }
-
-  struct VTKM_ALWAYS_EXPORT InternalStruct
-  {
-    ArrayHandleType Data;
-    vtkm::Id NumberOfBits;
-  };
-
-  std::shared_ptr<InternalStruct> Internals;
+  mutable vtkm::cont::internal::Buffer Buffer;
 };
 }
 } // end namespace vtkm::cont
--- a/vtkm/cont/CMakeLists.txt
+++ b/vtkm/cont/CMakeLists.txt
@ -139,6 +139,7 @@ set(sources
  ArrayHandle.cxx
  ArrayHandleBasic.cxx
  ArrayHandleSOA.cxx
+  BitField.cxx
  ColorTablePresets.cxx
  DeviceAdapterTag.cxx
  EnvironmentTracker.cxx
@ -239,6 +240,7 @@ add_subdirectory(serial)
 add_subdirectory(tbb)
 add_subdirectory(openmp)
 add_subdirectory(cuda)
+add_subdirectory(kokkos)

 set(backends )
 if(TARGET vtkm::tbb)
@ -250,6 +252,9 @@ endif()
 if(TARGET vtkm::openmp)
  list(APPEND backends vtkm::openmp)
 endif()
+if(TARGET vtkm::kokkos)
+  list(APPEND backends vtkm::kokkos)
+endif()

 target_link_libraries(vtkm_cont PUBLIC vtkm_compiler_flags ${backends})
 target_link_libraries(vtkm_cont PUBLIC Threads::Threads)
--- a/vtkm/cont/CellLocatorUniformBins.cxx
+++ b/vtkm/cont/CellLocatorUniformBins.cxx
@ -70,9 +70,10 @@ VTKM_EXEC static BinsBBox ComputeIntersectingBins(const Bounds cellBounds, const

 VTKM_EXEC static vtkm::Id GetNumberOfBins(const BinsBBox& binsBBox)
 {
-  return binsBBox.Empty() ? 0 : ((binsBBox.Max[0] - binsBBox.Min[0] + 1) *
-                                 (binsBBox.Max[1] - binsBBox.Min[1] + 1) *
-                                 (binsBBox.Max[2] - binsBBox.Min[2] + 1));
+  return binsBBox.Empty()
+    ? 0
+    : ((binsBBox.Max[0] - binsBBox.Min[0] + 1) * (binsBBox.Max[1] - binsBBox.Min[1] + 1) *
+       (binsBBox.Max[2] - binsBBox.Min[2] + 1));
 }

 class BBoxIterator
--- a/vtkm/cont/CellSet.cxx
+++ b/vtkm/cont/CellSet.cxx
@ -15,8 +15,6 @@ namespace vtkm
 namespace cont
 {

-CellSet::~CellSet()
-{
-}
+CellSet::~CellSet() {}
 }
 } // namespace vtkm::cont
--- a/vtkm/cont/CellSetExplicit.h
+++ b/vtkm/cont/CellSetExplicit.h
@ -46,6 +46,7 @@ struct NumIndicesDecorator
  {
    OffsetsPortal Offsets;

+    VTKM_SUPPRESS_EXEC_WARNINGS
    VTKM_EXEC_CONT
    vtkm::IdComponent operator()(vtkm::Id cellId) const
    {
@ -130,9 +131,8 @@ class VTKM_ALWAYS_EXPORT CellSetExplicit : public CellSet
  struct ConnectivityChooser
  {
  private:
-    using Chooser = typename detail::CellSetExplicitConnectivityChooser<Thisclass,
-                                                                        VisitTopology,
-                                                                        IncidentTopology>;
+    using Chooser = typename detail::
+      CellSetExplicitConnectivityChooser<Thisclass, VisitTopology, IncidentTopology>;

  public:
    using ConnectivityType = typename Chooser::ConnectivityType;
--- a/vtkm/cont/CellSetExtrude.cxx
+++ b/vtkm/cont/CellSetExtrude.cxx
@ -64,18 +64,18 @@ CellSetExtrude::CellSetExtrude(const CellSetExtrude& src)
 }

 CellSetExtrude::CellSetExtrude(CellSetExtrude&& src) noexcept
-  : CellSet(std::forward<CellSet>(src)),
-    IsPeriodic(src.IsPeriodic),
-    NumberOfPointsPerPlane(src.NumberOfPointsPerPlane),
-    NumberOfCellsPerPlane(src.NumberOfCellsPerPlane),
-    NumberOfPlanes(src.NumberOfPlanes),
-    Connectivity(std::move(src.Connectivity)),
-    NextNode(std::move(src.NextNode)),
-    ReverseConnectivityBuilt(src.ReverseConnectivityBuilt),
-    RConnectivity(std::move(src.RConnectivity)),
-    ROffsets(std::move(src.ROffsets)),
-    RCounts(std::move(src.RCounts)),
-    PrevNode(std::move(src.PrevNode))
+  : CellSet(std::forward<CellSet>(src))
+  , IsPeriodic(src.IsPeriodic)
+  , NumberOfPointsPerPlane(src.NumberOfPointsPerPlane)
+  , NumberOfCellsPerPlane(src.NumberOfCellsPerPlane)
+  , NumberOfPlanes(src.NumberOfPlanes)
+  , Connectivity(std::move(src.Connectivity))
+  , NextNode(std::move(src.NextNode))
+  , ReverseConnectivityBuilt(src.ReverseConnectivityBuilt)
+  , RConnectivity(std::move(src.RConnectivity))
+  , ROffsets(std::move(src.ROffsets))
+  , RCounts(std::move(src.RCounts))
+  , PrevNode(std::move(src.PrevNode))
 {
 }

@ -117,9 +117,7 @@ CellSetExtrude& CellSetExtrude::operator=(CellSetExtrude&& src) noexcept
  return *this;
 }

-CellSetExtrude::~CellSetExtrude()
-{
-}
+CellSetExtrude::~CellSetExtrude() {}

 vtkm::Int32 CellSetExtrude::GetNumberOfPlanes() const
 {
--- a/vtkm/cont/CellSetListTag.h
+++ b/vtkm/cont/CellSetListTag.h
@ -20,13 +20,13 @@

 #include <vtkm/cont/CellSetList.h>

-#define VTK_M_OLD_CELL_LIST_DEFINITION(name)                                                       \
-  struct VTKM_ALWAYS_EXPORT VTKM_DEPRECATED(                                                       \
-    1.6,                                                                                           \
-    "CellSetListTag" #name " replaced by CellSetList" #name ". "                                   \
-    "Note that the new CellSetList" #name " cannot be subclassed.") CellSetListTag##name           \
-    : vtkm::internal::ListAsListTag<CellSetList##name>                                             \
-  {                                                                                                \
+#define VTK_M_OLD_CELL_LIST_DEFINITION(name)                                             \
+  struct VTKM_ALWAYS_EXPORT VTKM_DEPRECATED(                                             \
+    1.6,                                                                                 \
+    "CellSetListTag" #name " replaced by CellSetList" #name ". "                         \
+    "Note that the new CellSetList" #name " cannot be subclassed.") CellSetListTag##name \
+    : vtkm::internal::ListAsListTag<CellSetList##name>                                   \
+  {                                                                                      \
  }

 namespace vtkm
--- a/vtkm/cont/CellSetPermutation.h
+++ b/vtkm/cont/CellSetPermutation.h
@ -172,9 +172,8 @@ private:


 public:
-  using ConnectivityArrays = vtkm::cont::internal::RConnBuilderInputData<ConnectivityStorageTag,
-                                                                         OffsetsStorageTag,
-                                                                         NumIndicesStorageTag>;
+  using ConnectivityArrays = vtkm::cont::internal::
+    RConnBuilderInputData<ConnectivityStorageTag, OffsetsStorageTag, NumIndicesStorageTag>;

  template <typename Device>
  static ConnectivityArrays Get(const CellSetPermutationType& cellset, Device)
--- a/vtkm/cont/CellSetSingleType.h
+++ b/vtkm/cont/CellSetSingleType.h
@ -61,10 +61,11 @@ public:
  }

  VTKM_CONT
-  CellSetSingleType(Thisclass&& src) noexcept : Superclass(std::forward<Superclass>(src)),
-                                                ExpectedNumberOfCellsAdded(-1),
-                                                CellShapeAsId(src.CellShapeAsId),
-                                                NumberOfPointsPerCell(src.NumberOfPointsPerCell)
+  CellSetSingleType(Thisclass&& src) noexcept
+    : Superclass(std::forward<Superclass>(src))
+    , ExpectedNumberOfCellsAdded(-1)
+    , CellShapeAsId(src.CellShapeAsId)
+    , NumberOfPointsPerCell(src.NumberOfPointsPerCell)
  {
  }

--- a/vtkm/cont/CellSetStructured.h
+++ b/vtkm/cont/CellSetStructured.h
@ -150,8 +150,9 @@ public:
    return *this;
  }

-  CellSetStructured(CellSetStructured&& src) noexcept : CellSet(),
-                                                        Structure(std::move(src.Structure))
+  CellSetStructured(CellSetStructured&& src) noexcept
+    : CellSet()
+    , Structure(std::move(src.Structure))
  {
  }

--- a/vtkm/cont/CellSetStructured.hxx
+++ b/vtkm/cont/CellSetStructured.hxx
@ -26,13 +26,12 @@ typename CellSetStructured<DIMENSION>::SchedulingRangeType

 template <vtkm::IdComponent DIMENSION>
 template <typename DeviceAdapter, typename VisitTopology, typename IncidentTopology>
-typename CellSetStructured<DIMENSION>::template ExecutionTypes<DeviceAdapter,
-                                                               VisitTopology,
-                                                               IncidentTopology>::ExecObjectType
-CellSetStructured<DIMENSION>::PrepareForInput(DeviceAdapter,
-                                              VisitTopology,
-                                              IncidentTopology,
-                                              vtkm::cont::Token&) const
+typename CellSetStructured<DIMENSION>::
+  template ExecutionTypes<DeviceAdapter, VisitTopology, IncidentTopology>::ExecObjectType
+  CellSetStructured<DIMENSION>::PrepareForInput(DeviceAdapter,
+                                                VisitTopology,
+                                                IncidentTopology,
+                                                vtkm::cont::Token&) const
 {
  using ConnectivityType =
    typename ExecutionTypes<DeviceAdapter, VisitTopology, IncidentTopology>::ExecObjectType;
--- a/vtkm/cont/ColorTable.cxx
+++ b/vtkm/cont/ColorTable.cxx
@ -110,9 +110,7 @@ ColorTable::ColorTable(const std::string& name,
 }

 //----------------------------------------------------------------------------
-ColorTable::~ColorTable()
-{
-}
+ColorTable::~ColorTable() {}

 //----------------------------------------------------------------------------
 const std::string& ColorTable::GetName() const
--- a/vtkm/cont/DataSetBuilderExplicit.cxx
+++ b/vtkm/cont/DataSetBuilderExplicit.cxx
@ -16,9 +16,7 @@ namespace cont
 {

 VTKM_CONT
-DataSetBuilderExplicitIterative::DataSetBuilderExplicitIterative()
-{
-}
+DataSetBuilderExplicitIterative::DataSetBuilderExplicitIterative() {}


 VTKM_CONT
--- a/Show More
+++ b/Show More