Merge remote-tracking branch 'vtkm/master'

2024-09-20 02:55:47 +00:00 · 2019-06-26 21:58:55 -07:00 · 2019-06-26 21:58:55 -07:00 · 10da5e13e5
commit 10da5e13e5
parent 7001563eda 903c2604df
49 changed files with 3164 additions and 1298 deletions
--- a/CMake/FindMPI.cmake
+++ b/CMake/FindMPI.cmake
--- a/CMake/VTKmMPI.cmake
+++ b/CMake/VTKmMPI.cmake
@ -0,0 +1,24 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+
+if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
+  if(CMAKE_VERSION VERSION_LESS 3.15)
+    #While CMake 3.10 introduced the new MPI module.
+    #Fixes related to MPI+CUDA that VTK-m needs are
+    #only found in CMake 3.15+.
+    find_package(MPI REQUIRED MODULE)
+  else()
+    #clunky but we need to make sure we use the upstream module if it exists
+    set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
+    set(CMAKE_MODULE_PATH "")
+    find_package(MPI MODULE)
+    set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
+  endif()
+endif()
--- a/CMake/VTKmWrappers.cmake
+++ b/CMake/VTKmWrappers.cmake
@ -12,6 +12,7 @@ include(CMakeParseArguments)

 include(VTKmDeviceAdapters)
 include(VTKmCPUVectorization)
+include(VTKmMPI)

 #-----------------------------------------------------------------------------
 # Utility to build a kit name from the current directory.
--- a/CMake/testing/VTKmTestInstall.cmake
+++ b/CMake/testing/VTKmTestInstall.cmake
@ -62,13 +62,13 @@ file(GENERATE
  OUTPUT "${${file_loc_var}}"
  CONTENT
 "
-set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING \"\")
-set(CMAKE_PREFIX_PATH ${install_prefix} CACHE STRING \"\")
-set(CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER} CACHE FILEPATH \"\")
-set(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} CACHE STRING \"\")
-set(CMAKE_CUDA_COMPILER ${CMAKE_CUDA_COMPILER} CACHE FILEPATH \"\")
-set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} CACHE STRING \"\")
-set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CUDA_HOST_COMPILER} CACHE FILEPATH \"\")
+set(CMAKE_MAKE_PROGRAM \"${CMAKE_MAKE_PROGRAM}\" CACHE FILEPATH \"\")
+set(CMAKE_PREFIX_PATH \"${CMAKE_PREFIX_PATH};${install_prefix}/\" CACHE STRING \"\")
+set(CMAKE_CXX_COMPILER \"${CMAKE_CXX_COMPILER}\" CACHE FILEPATH \"\")
+set(CMAKE_CXX_FLAGS \"$CACHE{CMAKE_CXX_FLAGS}\" CACHE STRING \"\")
+set(CMAKE_CUDA_COMPILER \"${CMAKE_CUDA_COMPILER}\" CACHE FILEPATH \"\")
+set(CMAKE_CUDA_FLAGS \"$CACHE{CMAKE_CUDA_FLAGS}\" CACHE STRING \"\")
+set(CMAKE_CUDA_HOST_COMPILER \"${CMAKE_CUDA_HOST_COMPILER}\" CACHE FILEPATH \"\")
 "
 )

@ -81,8 +81,34 @@ function(vtkm_test_against_install dir)
  set(src_dir "${CMAKE_CURRENT_SOURCE_DIR}/${name}/")
  set(build_dir "${VTKm_BINARY_DIR}/CMakeFiles/_tmp_build/test_${name}/")

-  set(build_config "${build_dir}/build_options.cmake")
-  vtkm_generate_install_build_options(build_config)
+  set(args )
+  if(CMAKE_VERSION VERSION_LESS 3.13)
+    #Before 3.13 the config file passing to cmake via ctest --build-options
+    #was broken
+    set(args
+      -DCMAKE_MAKE_PROGRAM:FILEPATH=${CMAKE_MAKE_PROGRAM}
+      -DCMAKE_PREFIX_PATH:STRING=${install_prefix}
+      -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER}
+      -DCMAKE_CUDA_COMPILER:FILEPATH=${CMAKE_CUDA_COMPILER}
+      -DCMAKE_CUDA_HOST_COMPILER:FILEPATH=${CMAKE_CUDA_HOST_COMPILER}
+      -DCMAKE_CXX_FLAGS:STRING=$CACHE{CMAKE_CXX_FLAGS}
+      -DCMAKE_CUDA_FLAGS:STRING=$CACHE{CMAKE_CUDA_FLAGS}
+    )
+  else()
+    set(build_config "${build_dir}build_options.cmake")
+    vtkm_generate_install_build_options(build_config)
+    set(args -C ${build_config})
+  endif()
+
+  if(WIN32 AND TARGET vtkm::tbb)
+    #on windows we need to specify these as FindTBB won't
+    #find the installed version just with the prefix path
+    list(APPEND args
+      -DTBB_LIBRARY_DEBUG:FILEPATH=${TBB_LIBRARY_DEBUG}
+      -DTBB_LIBRARY_RELEASE:FILEPATH=${TBB_LIBRARY_RELEASE}
+      -DTBB_INCLUDE_DIR:PATH=${TBB_INCLUDE_DIR}
+    )
+  endif()

  #determine if the test is expected to compile or fail to build. We use
  #this information to built the test name to make it clear to the user
@ -93,10 +119,13 @@ function(vtkm_test_against_install dir)

  add_test(NAME ${build_name}
           COMMAND ${CMAKE_CTEST_COMMAND}
+           -C $<CONFIG>
           --build-and-test ${src_dir} ${build_dir}
           --build-generator ${CMAKE_GENERATOR}
           --build-makeprogram ${CMAKE_MAKE_PROGRAM}
-           --build-options -C "${build_config}"
+           --build-options
+            ${args}
+            --no-warn-unused-cli
           )

  set_tests_properties(${build_name} PROPERTIES LABELS ${test_label} )
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -182,14 +182,6 @@ check_type_size("long long" VTKm_SIZE_LONG_LONG BUILTIN_TYPES_ONLY)

 #-----------------------------------------------------------------------------
 # Add subdirectories
-if(VTKm_ENABLE_MPI)
-  # This `if` is temporary and will be removed once `diy` supports building
-  # without MPI.
-  if (NOT MPI_C_FOUND)
-    find_package(MPI ${VTKm_FIND_PACKAGE_QUIETLY})
-  endif()
-endif()
-
 add_subdirectory(vtkm)

 #-----------------------------------------------------------------------------
@ -241,6 +233,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
  install(
    FILES
      ${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
+      ${VTKm_SOURCE_DIR}/CMake/FindMPI.cmake
      ${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
      ${VTKm_SOURCE_DIR}/CMake/FindOpenMP.cmake
    DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -253,6 +246,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
      ${VTKm_SOURCE_DIR}/CMake/VTKmDetectCUDAVersion.cu
      ${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
      ${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
+      ${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
      ${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake
      ${VTKm_SOURCE_DIR}/CMake/VTKmWrappers.cmake
    DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -315,6 +309,4 @@ endif()

 #-----------------------------------------------------------------------------
 # Build examples
-if(VTKm_ENABLE_EXAMPLES)
-  add_subdirectory(examples)
-endif(VTKm_ENABLE_EXAMPLES)
+add_subdirectory(examples)
--- a/LICENSE.txt
+++ b/LICENSE.txt
@ -49,7 +49,6 @@ contents of these for details on the specifics of their respective
 licenses.
 - - - - - - - - - - - - - - - - - - - - - - - - do not remove this line
 CMake/FindTBB.cmake
-CMake/FindGLEW.cmake
 Utilities
 vtkm/cont/tbb/internal/parallel_sort.h
 vtkm/cont/tbb/internal/parallel_radix_sort_tbb.h
--- a/docs/changelog/1.4/release-notes.md
+++ b/docs/changelog/1.4/release-notes.md
--- a/docs/changelog/StorageBase-StealArray-returns-delete-function.md
+++ b/docs/changelog/StorageBase-StealArray-returns-delete-function.md
@ -1,24 +0,0 @@
-## `StorageBasic` StealArray() now provides delete function to new owner
-
-Memory that is stolen from VTK-m has to be freed correctly. This is required
-as the memory could have been allocated with `new`, `malloc` or even `cudaMallocManaged`.
-
-Previously it was very easy to transfer ownership of memory out of VTK-m and
-either fail to capture the free function, or ask for it after the transfer
-operation which would return a nullptr. Now stealing an array also
-provides the free function reducing one source of memory leaks.
-
-To properly steal memory from VTK-m you do the following:
-```cpp
-  vtkm::cont::ArrayHandle<T> arrayHandle;
-  
-  ...
-  
-  auto* stolen = arrayHandle.StealArray();
-  T* ptr = stolen.first;
-  auto free_function = stolen.second;
-  
-  ...
-
-  free_function(ptr);
-```
--- a/docs/changelog/Variant_AsVirtual_force_cast.md
+++ b/docs/changelog/Variant_AsVirtual_force_cast.md
@ -1,6 +0,0 @@
-# VariantArrayHandle::AsVirtual<T>() performs casting
-
-The AsVirtual<T> method of VariantArrayHandle now works for any arithmetic type,
-not just the actual type of the underlying array. This works by inserting an
-ArrayHandleCast between the underlying concrete array and the new
-ArrayHandleVirtual when needed.
--- a/docs/changelog/add-cuda-kernel-details-to-logging.md
+++ b/docs/changelog/add-cuda-kernel-details-to-logging.md
@ -1,12 +0,0 @@
-# VTK-m logs details about each CUDA kernel launch
-
-The VTK-m logging infrastructure has been extended with a new log level
-`KernelLaunches` which exists between `MemTransfer` and `Cast`.
-
-This log level reports the number of blocks, threads per block, and the
-PTX version of each CUDA kernel launched.
-
-This logging level was primarily introduced to help developers that are
-tracking down issues that occur when VTK-m components have been built with
-different `sm_XX` flags and help people looking to do kernel performance
-tuning.
--- a/docs/changelog/add-vtkm_filter-target.md
+++ b/docs/changelog/add-vtkm_filter-target.md
@ -1,4 +0,0 @@
-# VTK-m provides a vtkm_filter target
-
-VTK-m now provides a `vtkm_filter` that contains pre-built components
-of filters for consuming projects.
--- a/docs/changelog/array-virtual-not-special.md
+++ b/docs/changelog/array-virtual-not-special.md
@ -1,11 +0,0 @@
-# Make ArrayHandleVirtual conform with other ArrayHandle structure
-
-Previously, ArrayHandleVirtual was defined as a specialization of
-ArrayHandle with the virtual storage tag. This was because the storage
-object was polymorphic and needed to be handled special. These changes
-moved the existing storage definition to an internal class, and then
-managed the pointer to that implementation class in a Storage object that
-can be managed like any other storage object.
-    
-Also moved the implementation of StorageAny into the implementation of the
-internal storage object.
--- a/docs/changelog/arrayhandlevirtual.md
+++ b/docs/changelog/arrayhandlevirtual.md
@ -1,45 +0,0 @@
-# Add vtkm::cont::ArrayHandleVirtual 
-
-
-Added a new class named `ArrayHandleVirtual` that allows you to type erase an
-ArrayHandle storage type by using virtual calls. This simplification makes
-storing `Fields` and `Coordinates` significantly easier as VTK-m doesn't
-need to deduce both the storage and value type when executing worklets.
-
-To construct an `ArrayHandleVirtual` one can do one of the following:
-
-```cpp
-vtkm::cont::ArrayHandle<vtkm::Float32> pressure;
-vtkm::cont::ArrayHandleConstant<vtkm::Float32> constant(42.0f);
-
-
-// constrcut from an array handle
-vtkm::cont::ArrayHandleVirtual<vtkm::Float32> v(pressure);
-
-// or assign from an array handle
-v = constant;
-
-```
-
-To help maintain performance `ArrayHandleVirtual` provides a collection of helper
-functions/methods to query and cast back to the concrete storage and value type:
-```cpp
-vtkm::cont::ArrayHandleConstant<vtkm::Float32> constant(42.0f);
-vtkm::cont::ArrayHandleVirtual<vtkm::Float32> v = constant;
-
-bool isConstant = vtkm::cont::IsType< decltype(constant) >(v);
-if(isConstant)
-  vtkm::cont::ArrayHandleConstant<vtkm::Float32> t = vtkm::cont::Cast< decltype(constant) >(v);
-
-```
-
-Lastly, a common operation of calling code using `ArrayHandleVirtual` is a desire to construct a new instance
-of an existing virtual handle with the same storage type. This can be done by using the `NewInstance` method
-as seen below
-```cpp
-vtkm::cont::ArrayHandle<vtkm::Float32> pressure;
-vtkm::cont::ArrayHandleVirtual<vtkm::Float32> v = pressure;
-
-vtkm::cont::ArrayHandleVirtual<vtkm::Float32> newArray = v->NewInstance();
-bool isConstant = vtkm::cont::IsType< vtkm::cont::ArrayHandle<vtkm::Float32> >(newArray); //will be true
-```
--- a/docs/changelog/arrayhandlezip-handles-writing-to-implicit-handles.md
+++ b/docs/changelog/arrayhandlezip-handles-writing-to-implicit-handles.md
@ -1,9 +0,0 @@
-# vtkm::cont::ArrayHandleZip provides a consistent API even with non-writable handles
-
-Previously ArrayHandleZip could not wrap an implicit handle and provide a consistent experience.
-The primary issue was that if you tried to use the PortalType returned by GetPortalControl() you
-would get a compile failure. This would occur as the PortalType returned would try to call `Set`
-on an ImplicitPortal which doesn't have a set method. 
-
-Now with this change, the `ZipPortal` use SFINAE to determine if `Set` and `Get` should call the
-underlying zipped portals.
--- a/docs/changelog/asynchronize-device-independent-timer.md
+++ b/docs/changelog/asynchronize-device-independent-timer.md
@ -1,65 +0,0 @@
-# Introduce asynchronous and device independent timer
-
-The timer class now is asynchronous and device independent. it's using an
-similiar API as vtkOpenGLRenderTimer with Start(), Stop(), Reset(), Ready(),
-and GetElapsedTime() function. For convenience and backward compability, Each
-Start() function call will call Reset() internally. GetElapsedTime() function
-can be used multiple times to time sequential operations and Stop() function
-can be helpful when you want to get the elapsed time latter.
-
-Bascially it can be used in two modes:
-
-* Create a Timer without any device info.
-  * It would enable the timer for all enabled devices on the machine. Users can get a
-specific elapsed time by passing a device id into the GetElapsedTime function.
-If no device is provided, it would pick the maximum of all timer results - the
-logic behind this decision is that if cuda is disabled, openmp, serial and tbb
-roughly give the same results; if cuda is enabled it's safe to return the
-maximum elapsed time since users are more interested in the device execution
-time rather than the kernal launch time. The Ready function can be handy here
-to query the status of the timer.
-
-``` Construct a generic timer
-// Assume CUDA is enabled on the machine
-vtkm::cont::Timer timer;
-timer.Start();
-// Run the algorithm
-
-auto timeHost = timer.GetElapsedTime(vtkm::cont::DeviceAdapterTagSerial());
-// To avoid the expensive device synchronization, we query is ready here.
-if (timer.IsReady())
-{
-  auto timeDevice = timer.GetElapsedTime(vtkm::cont::DeviceAdapterTagCuda());
-}
-// Force the synchronization. Ideally device execution time would be returned
-which takes longer time than ther kernal call
-auto timeGeneral = timer.GetElapsedTime();
-```
-
-* Create a Timer with a specific device.
-  * It works as the old timer that times for a specific device id.
-``` Construct a device specific timer
-// Assume TBB is enabled on the machine
-vtkm::cont::Timer timer{vtkm::cont::DeviceAdaptertagTBB()};
-timer.Start(); // t0
-// Run the algorithm
-
-// Timer would just return 0 and warn the user in the logger that an invalid
-// device is used to query elapsed time
-auto timeInvalid = timer.GetElapsedTime(vtkm::cont::DeviceAdapterTagSerial());
-if timer.IsReady()
-{
-  // Either will work and mark t1, return t1-t0
-  auto time1TBB = timer.GetElapsedTime(vtkm::cont::DeviceAdapterTagTBB());
-  auto time1General = timer.GetElapsedTime();
-}
-
-// Do something
-auto time2 = timer.GetElapsedTime(); // t2 will be marked and t2-t0 will be returned
-
-// Do something
-timer.Stop() // t3 marked
-
-// Do something then summarize latter
-auto timeFinal = timer.GetElapsedTime(); // t3-t0
-```
--- a/docs/changelog/bitfields.md
+++ b/docs/changelog/bitfields.md
@ -1,51 +0,0 @@
-# Add support for BitFields.
-
-BitFields are:
- Stored in memory using a contiguous buffer of bits.
- Accessible via portals, a la ArrayHandle.
- Portals operate on individual bits or words.
- Operations may be atomic for safe use from concurrent kernels.
-
-The new BitFieldToUnorderedSet device algorithm produces an
-ArrayHandle containing the indices of all set bits, in no particular
-order.
-
-The new AtomicInterface classes provide an abstraction into bitwise
-atomic operations across control and execution environments and are
-used to implement the BitPortals.
-
-BitFields may be used as boolean-typed ArrayHandles using the
-ArrayHandleBitField adapter. ArrayHandleBitField uses atomic operations to read
-and write bits in the BitField, and is safe to use in concurrent code.
-
-For example, a simple worklet that merges two arrays based on a boolean
-condition is tested in TestingBitField:
-
-```
-class ConditionalMergeWorklet : public vtkm::worklet::WorkletMapField
-{
-public:
-using ControlSignature = void(FieldIn cond,
-                              FieldIn trueVals,
-                              FieldIn falseVals,
-                              FieldOut result);
-using ExecutionSignature = _4(_1, _2, _3);
-
-template <typename T>
-VTKM_EXEC T operator()(bool cond, const T& trueVal, const T& falseVal) const
-{
-  return cond ? trueVal : falseVal;
-}
-
-};
-
-BitField bits = ...;
-auto condArray = vtkm::cont::make_ArrayHandleBitField(bits);
-auto trueArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(20, 2, NUM_BITS);
-auto falseArray = vtkm::cont::make_ArrayHandleCounting<vtkm::Id>(13, 2, NUM_BITS);
-vtkm::cont::ArrayHandle<vtkm::Id> output;
-
-vtkm::worklet::DispatcherMapField<ConditionalMergeWorklet> dispatcher;
-dispatcher.Invoke(condArray, trueArray, falseArray, output);
-
-```
--- a/docs/changelog/bounding-interval-hierarchy-in-vtkm-cont.md
+++ b/docs/changelog/bounding-interval-hierarchy-in-vtkm-cont.md
@ -1,10 +0,0 @@
-# Put CellLocatorBoundingIntervalHierarchy in vtkm_cont library
-
-All of the methods in CellLocatorBoundingIntervalHierarchy were listed in
-header files. This is sometimes problematic with virtual methods. Since
-everything implemented in it can just be embedded in a library, move the
-code into the vtkm_cont library.
-
-These changes caused some warnings in clang to show up based on virtual
-methods in other cell locators. Hence, the rest of the cell locators
-have also had some of their code moved to vtkm_cont.
--- a/docs/changelog/case-insensitive-device-from-string.md
+++ b/docs/changelog/case-insensitive-device-from-string.md
@ -1,14 +0,0 @@
-# VTK-m  `vtkm::cont::DeviceAdapterId` construction from string are now case-insensitive
-
-You can now construct a `vtkm::cont::DeviceAdapterId` from a string no matter
-the case of it. The following all will construct the same `vtkm::cont::DeviceAdapterId`.
-
-```cpp
-vtkm::cont::DeviceAdapterId id1 = vtkm::cont::make_DeviceAdapterId("cuda");
-vtkm::cont::DeviceAdapterId id2 = vtkm::cont::make_DeviceAdapterId("CUDA");
-vtkm::cont::DeviceAdapterId id3 = vtkm::cont::make_DeviceAdapterId("Cuda");
-
-auto& tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
-vtkm::cont::DeviceAdapterId id4 = tracker.GetDeviceAdapterId("cuda");
-vtkm::cont::DeviceAdapterId id5 = tracker.GetDeviceAdapterId("CUDA");
-vtkm::cont::DeviceAdapterId id6 = tracker.GetDeviceAdapterId("Cuda");
--- a/docs/changelog/cast-variant-to-storage.md
+++ b/docs/changelog/cast-variant-to-storage.md
@ -1,58 +0,0 @@
-# Allow VariantArrayHandle CastAndCall to cast to concrete types
-
-Previously, the `VariantArrayHandle::CastAndCall` (and indirect calls through
-`vtkm::cont::CastAndCall`) attempted to cast to only
-`vtkm::cont::ArrayHandleVirtual` with different value types. That worked, but
-it meant that whatever was called had to operate through virtual functions.
-
-Under most circumstances, it is worthwhile to also check for some common
-storage types that, when encountered, can be accessed much faster. This
-change provides the casting to concrete storage types and now uses
-`vtkm::cont::ArrayHandleVirtual` as a fallback when no concrete storage
-type is found.
-
-By default, `CastAndCall` checks all the storage types in
-`VTKM_DEFAULT_STORAGE_LIST_TAG`, which typically contains only the basic
-storage. The `ArrayHandleVirtual::CastAndCall` method also allows you to
-override this behavior by specifying a different type list in the first
-argument. If the first argument is a list type, `CastAndCall` assumes that
-all the types in the list are storage tags. If you pass in
-`vtkm::ListTagEmpty`, then `CastAndCall` will always cast to an
-`ArrayHandleVirtual` (the previous behavior). Alternately, you can pass in
-storage tags that might be likely under the current usage.
-
-As an example, consider the following simple code.
-
-``` cpp
-vtkm::cont::VariantArrayHandle array;
-
-// stuff happens
-
-array.CastAndCall(myFunctor);
-```
-
-Previously, `myFunctor` would be called with
-`vtkm::cont::ArrayHandleVirtual<T>` with different type `T`s. After this
-change, `myFunctor` will be called with that and with
-`vtkm::cont::ArrayHandle<T>` of the same type `T`s.
-
-If you want to only call `myFunctor` with
-`vtkm::cont::ArrayHandleVirtual<T>`, then replace the previous line with
-
-``` cpp
-array.CastAndCall(vtkm::ListTagEmpty(), myFunctor);
-```
-
-Let's say that additionally using `vtkm::cont::ArrayHandleIndex` was also
-common. If you want to also specialize for that array, you can do so with
-the following line.
-
-``` cpp
-array.CastAndCall(vtkm::ListTagBase<vtkm::cont::StorageBasic, 
-                                    vtkm::cont::ArrayHandleIndex::StorageTag>,
-                  myFunctor);
-```
-
-Note that `myFunctor` will be called with
-`vtkm::cont::ArrayHandle<T,vtkm::cont::ArrayHandleIndex::StorageTag>`, not
-`vtkm::cont::ArrayHandleIndex`.
--- a/docs/changelog/cmake-38-required.md
+++ b/docs/changelog/cmake-38-required.md
@ -1,10 +0,0 @@
-# CMake 3.8 Required to build VTK-m
-
-While VTK-m has always required a fairly recent version
-of CMake when building for Visual Studio, or if OpenMP or 
-CUDA are enabled, it has supported building with the TBB
-device with CMake 3.3.
-
-Given the fact that our primary consumer (VTK) has moved
-to require CMake 3.8, it doesn't make sense to require
-CMake 3.3 and we have moved to a minimum of 3.8.
--- a/docs/changelog/connected-components.md
+++ b/docs/changelog/connected-components.md
@ -1,9 +0,0 @@
-# Add connected component worklets and filters
-
-We have added the `ImageConnectivity` and `CellSetConnectivity` worklets and
-the corresponding filters to identify connected components in DataSet. The ImageConnectivity
-identify connected components in CellSetStructured, based on same field value of neighboring
-cells and the CellSetConnective identify connected components based on cell connectivity.
-Currently Moore neighborhood (i.e. 8 neighboring pixels for 2D and 27 neighboring pixels
-for 3D) is used for ImageConnectivity. For CellSetConnectivity, neighborhood is defined
-as cells sharing a common edge.
--- a/docs/changelog/cuda-allocator-disable-managed-memory-from-code.md
+++ b/docs/changelog/cuda-allocator-disable-managed-memory-from-code.md
@ -1,6 +0,0 @@
-# CudaAllocator Managed Memory can be disabled from C++
-
-Previously it was impossible for calling code to explicitly
-disable managed memory. This can be desirable for projects
-that know they don't need managed memory and are super
-performance critical.
--- a/docs/changelog/cuda-separable-compilation-enabled.md
+++ b/docs/changelog/cuda-separable-compilation-enabled.md
@ -1,4 +0,0 @@
-# VTK-m now requires CUDA separable compilation to build
-
-With the introduction of `vtkm::cont::ArrayHandleVirtual` and the related infrastructure, vtk-m now
-requires that all CUDA code be compiled using separable compilation ( -rdc ).
--- a/docs/changelog/field-tags-no-template.md
+++ b/docs/changelog/field-tags-no-template.md
@ -1,132 +0,0 @@
-# Remove templates from ControlSignature field tags
-
-Previously, several of the `ControlSignature` tags had a template to
-specify a type list. This was to specify potential valid value types for an
-input array. The importance of this typelist was to limit the number of
-code paths created when resolving a `vtkm::cont::VariantArrayHandle`
-(formerly a `DynamicArrayHandle`). This (potentially) reduced the compile
-time, the size of libraries/executables, and errors from unexpected types.
-
-Much has changed since this feature was originally implemented. Since then,
-the filter infrastructure has been created, and it is through this that
-most dynamic worklet invocations happen. However, since the filter
-infrastrcture does its own type resolution (and has its own policies) the
-type arguments in `ControlSignature` are now of little value.
-
-## Script to update code
-
-This update requires changes to just about all code implementing a VTK-m
-worklet. To facilitate the update of this code to these new changes (not to
-mention all the code in VTK-m) a script is provided to automatically remove
-these template parameters from VTK-m code.
-
-This script is at
-[Utilities/Scripts/update-control-signature-tags.sh](../../Utilities/Scripts/update-control-signature-tags.sh).
-It needs to be run in a Unix-compatible shell. It takes a single argument,
-which is a top level directory to modify files. The script processes all C++
-source files recursively from that directory.
-
-## Selecting data types for auxiliary filter fields
-
-The main rational for making these changes is that the types of the inputs
-to worklets is almost always already determined by the calling filter.
-However, although it is straightforward to specify the type of the "main"
-(active) scalars in a filter, it is less clear what to do for additional
-fields if a filter needs a second or third field.
-
-Typically, in the case of a second or third field, it is up to the
-`DoExecute` method in the filter implementation to apply a policy to that
-field. When applying a policy, you give it a policy object (nominally
-passed by the user) and a traits of the filter. Generally, the accepted
-list of types for a field should be part of the filter's traits. For
-example, consider the `WarpVector` filter. This filter only works on
-`Vec`s of size 3, so its traits class looks like this.
-
-``` cpp
-template <>
-class FilterTraits<WarpVector>
-{
-public:
-  // WarpVector can only applies to Float and Double Vec3 arrays
-  using InputFieldTypeList = vtkm::TypeListTagFieldVec3;
-};
-```
-
-However, the `WarpVector` filter also requires two fields instead of one.
-The first (active) field is handled by its superclass (`FilterField`), but
-the second (auxiliary) field must be managed in the `DoExecute`. Generally,
-this can be done by simply applying the policy with the filter traits.
-
-## The corner cases
-
-Most of the calls to worklets happen within filter implementations, which
-have their own way of narrowing down potential types (as previously
-described). The majority of the remainder either use static types or work
-with a variety of types.
-
-However, there is a minority of corner cases that require a reduction of
-types. Since the type argument of the worklet `ControlSignature` arguments
-are no longer available, the narrowing of types must be done before the
-call to `Invoke`.
-
-This narrowing of arguments is not particularly difficult. Such type-unsure
-arguments usually come from a `VariantArrayHandle` (or something that uses
-one). You can select the types from a `VariantArrayHandle` simply by using
-the `ResetTypes` method. For example, say you know that a variant array is
-supposed to be a scalar.
-
-``` cpp
-dispatcher.Invoke(variantArray.ResetTypes(vtkm::TypeListTagFieldScalar()),
-                  staticArray);
-```
-
-Even more common is to have a `vtkm::cont::Field` object. A `Field` object
-internally holds a `VariantArrayHandle`, which is accessible via the
-`GetData` method.
-
-``` cpp
-dispatcher.Invoke(field.GetData().ResetTypes(vtkm::TypeListTagFieldScalar()),
-                  staticArray);
-```
-
-## Change in executable size
-
-The whole intention of these template parameters in the first place was to
-reduce the number of code paths compiled. The hypothesis of this change was
-that in the current structure the code paths were not being reduced much
-if at all. If that is true, the size of executables and libraries should
-not change.
-
-Here is a recording of the library and executable sizes before this change
-(using `ds -h`).
-
-```
-3.0M    libvtkm_cont-1.2.1.dylib
-6.2M    libvtkm_rendering-1.2.1.dylib
-312K    Rendering_SERIAL
-312K    Rendering_TBB
- 22M    Worklets_SERIAL
- 23M    Worklets_TBB
- 22M    UnitTests_vtkm_filter_testing
-5.7M    UnitTests_vtkm_cont_serial_testing
-6.0M    UnitTests_vtkm_cont_tbb_testing
-7.1M    UnitTests_vtkm_cont_testing
-```
-
-After the changes, the executable sizes are as follows.
-
-```
-3.0M    libvtkm_cont-1.2.1.dylib
-6.0M    libvtkm_rendering-1.2.1.dylib
-312K    Rendering_SERIAL
-312K    Rendering_TBB
- 21M    Worklets_SERIAL
- 21M    Worklets_TBB
- 22M    UnitTests_vtkm_filter_testing
-5.6M    UnitTests_vtkm_cont_serial_testing
-6.0M    UnitTests_vtkm_cont_tbb_testing
-7.1M    UnitTests_vtkm_cont_testing
-```
-
-As we can see, the built sizes have not changed significantly. (If
-anything, the build is a little smaller.)
--- a/docs/changelog/improve-cuda-scheduling.md
+++ b/docs/changelog/improve-cuda-scheduling.md
@ -1,45 +0,0 @@
-# VTK-m CUDA kernel scheduling including improved defaults, and user customization
-
-VTK-m now offers a more GPU aware set of defaults for kernel scheduling.
-When VTK-m first launches a kernel we do system introspection and determine
-what GPU's are on the machine and than match this information to a preset
-table of values. The implementation is designed in a way that allows for
-VTK-m to offer both specific presets for a given GPU ( V100 ) or for
-an entire generation of cards ( Pascal ).
-
-Currently VTK-m offers preset tables for the following GPU's:
- Tesla V100
- Tesla P100
-
-If the hardware doesn't match a specific GPU card we than try to find the
-nearest know hardware generation and use those defaults. Currently we offer
-defaults for
- Older than Pascal Hardware
- Pascal Hardware
- Volta+ Hardware
-
-Some users have workloads that don't align with the defaults provided by
-VTK-m. When that is the cause, it is possible to override the defaults
-by binding a custom function to `vtkm::cont::cuda::InitScheduleParameters`.
-As shown below:
-
-```cpp
-  ScheduleParameters CustomScheduleValues(char const* name,
-                                          int major,
-                                          int minor,
-                                          int multiProcessorCount,
-                                          int maxThreadsPerMultiProcessor,
-                                          int maxThreadsPerBlock)
-  {
-
-    ScheduleParameters params  {
-        64 * multiProcessorCount,  //1d blocks
-        64,                        //1d threads per block
-        64 * multiProcessorCount,  //2d blocks
-        { 8, 8, 1 },               //2d threads per block
-        64 * multiProcessorCount,  //3d blocks
-        { 4, 4, 4 } };             //3d threads per block
-    return params;
-  }
-  vtkm::cont::cuda::InitScheduleParameters(&CustomScheduleValues);
-```
--- a/docs/changelog/initialize.md
+++ b/docs/changelog/initialize.md
@ -1,20 +0,0 @@
-# vtkm::cont::Initialize
-
-A new initialization function, vtkm::cont::Initialize, has been added.
-Initialization is not required, but will configure the logging utilities (when
-enabled) and allows forcing a device via a `-d` or `--device` command line
-option.
-
-
-Usage:
-
-```
-#include <vtkm/cont/Initialize.h>
-
-int main(int argc, char *argv[])
-{
-  auto config = vtkm::cont::Initialize(argc, argv);
-
-  ...
-}
-```
--- a/docs/changelog/invoker-supports-scatter-types.md
+++ b/docs/changelog/invoker-supports-scatter-types.md
@ -1,21 +0,0 @@
-# `vtkm::worklet::Invoker` now able to worklets that have non-default scatter type
-
-This change allows the `Invoker` class to support launching worklets that require
-a custom scatter operation. This is done by providing the scatter as the second
-argument when launch a worklet with the `()` operator.
-
-The following example shows a scatter being provided with a worklet launch.
-
-```cpp
-struct CheckTopology : vtkm::worklet::WorkletMapPointToCell
-{
-  using ControlSignature = void(CellSetIn cellset, FieldOutCell);
-  using ExecutionSignature = _2(FromIndices);
-  using ScatterType = vtkm::worklet::ScatterPermutation<>;
-  ...
-};
-
-
-vtkm::worklet::Ivoker invoke;
-invoke( CheckTopology{}, vtkm::worklet::ScatterPermutation{}, cellset, result );
-```
--- a/docs/changelog/lodepng.md
+++ b/docs/changelog/lodepng.md
@ -1,5 +0,0 @@
-# LodePNG added as a thirdparty
-
-The lodepng library was brought is an thirdparty library. 
-This has allowed the VTK-m rendering library to have a robust
-png decode functionality.
--- a/docs/changelog/mask-worklets.md
+++ b/docs/changelog/mask-worklets.md
@ -1,104 +0,0 @@
-# Allow masking of worklet invocations
-
-There have recently been use cases where it would be helpful to mask out
-some of the invocations of a worklet. The idea is that when invoking a
-worklet with a mask array on the input domain, you might implement your
-worklet more-or-less like the following.
-
-```cpp
-VTKM_EXEC void operator()(bool mask, /* other parameters */)
-{
-  if (mask)
-  {
-    // Do interesting stuff
-  }
-}
-```
-
-This works, but what if your mask has mostly false values? In that case,
-you are spending tons of time loading data to and from memory where fields
-are stored for no reason.
-
-You could potentially get around this problem by adding a scatter to the
-worklet. However, that will compress the output arrays to only values that
-are active in the mask. That is problematic if you want the masked output
-in the appropriate place in the original arrays. You will have to do some
-complex (and annoying and possibly expensive) permutations of the output
-arrays.
-
-Thus, we would like a new feature similar to scatter that instead masks out
-invocations so that the worklet is simply not run on those outputs.
-
-## New Interface
-
-The new "Mask" feature that is similar (and orthogonal) to the existing
-"Scatter" feature. Worklet objects now define a `MaskType` that provides on
-object that manages the selections of which invocations are skipped. The
-following Mask objects are defined.
-
-  * `MaskNone` - This removes any mask of the output. All outputs are
-    generated. This is the default if no `MaskType` is explicitly defined.
-  * `MaskSelect` - The user to provides an array that specifies whether
-    each output is created with a 1 to mean that the output should be
-    created an 0 the mean that it should not.
-  * `MaskIndices` - The user provides an array with a list of indices for
-    all outputs that should be created.
-  
-It will be straightforward to implement other versions of masks. (For
-example, you could make a mask class that selectes every Nth entry.) Those
-could be made on an as-needed basis.
-
-## Implementation
-
-The implementation follows the same basic idea of how scatters are
-implemented.
-
-### Mask Classes
-
-The mask class is required to implement the following items.
-
-  * `ThreadToOutputType` - A type for an array that maps a thread index (an
-    index in the array) to an output index. A reasonable type for this
-    could be `vtkm::cont::ArrayHandle<vtkm::Id>`.
-  * `GetThreadToOutputMap` - Given the range for the output (e.g. the
-    number of items in the output domain), returns an array of type
-    `ThreadToOutputType` that is the actual map.
-  * `GetThreadRange` - Given a range for the output (e.g. the number of
-    items in the output domain), returns the range for the threads (e.g.
-    the number of times the worklet will be invoked).
-
-### Dispatching
-
-The `vtkm::worklet::internal::DispatcherBase` manages a mask class in
-the same way it manages the scatter class. It gets the `MaskType` from
-the worklet it is templated on. It requires a `MaskType` object during
-its construction.
-
-Previously the dispatcher (and downstream) had to manage the range and
-indices of inputs and threads. They now have to also manage a separate
-output range/index as now all three may be different.
-
-The `vtkm::Invocation` is changed to hold the ThreadToOutputMap array from
-the mask. It likewises has a templated `ChangeThreadToOutputMap` method
-added (similar to those already existing for the arrays from a scatter).
-This method is used in `DispatcherBase::InvokeTransportParameters` to add
-the mask's array to the invocation before calling `InvokeSchedule`.
-
-### Thread Indices
-
-With the addition of masks, the `ThreadIndices` classes are changed to
-manage the actual output index. Previously, the output index was always the
-same as the thread index. However, now these two can be different. The
-`GetThreadIndices` methods of the worklet base classes have an argument
-added that is the portal to the ThreadToOutputMap.
-
-The worklet `GetThreadIndices` is called from the `Task` classes. These
-classes are changed to pass in this additional argument. Since the `Task`
-classes get an `Invocation` object from the dispatcher, which contains the
-`ThreadToOutputMap`, this change is trivial.
-
-## Interaction Between Mask and Scatter
-
-Although it seems weird, it should work fine to mix scatters and masks. The
-scatter will first be applied to the input to generate a (potential) list
-of output elements. The mask will then be applied to these output elements.
--- a/docs/changelog/merge-benchmark-executables.md
+++ b/docs/changelog/merge-benchmark-executables.md
@ -1,6 +0,0 @@
-# Merge benchmark executables into a device dependent shared library
-
-VTK-m has been updated to replace old per device benchmark executables with a device
-dependent shared library so that it's able to accept a device adapter at runtime through
-the "--device=" argument.
-
--- a/docs/changelog/merge-rendering-testing-executables.md
+++ b/docs/changelog/merge-rendering-testing-executables.md
@ -1,3 +0,0 @@
-Merge rendering testing executables to a shared library
-
-This commit allows rendering testing executables to select the device at runtime.
--- a/docs/changelog/merge-worklet-testing-executables.md
+++ b/docs/changelog/merge-worklet-testing-executables.md
@ -1,8 +0,0 @@
-# Merge worklet testing executables into a device dependent shared library
-
-VTK-m has been updated to replace old per device worklet testing executables with a device
-dependent shared library so that it's able to accept a device adapter at runtime through
-the "--device=" argument.
-
-
-
--- a/docs/changelog/optionparser-to-third-party.md
+++ b/docs/changelog/optionparser-to-third-party.md
@ -1,13 +0,0 @@
-# Wrap third party optionparser.h in vtkm/cont/internal/OptionParser.h
-
-Previously we just took the optionparser.h file and stuck it right in
-our source code. That was problematic for a variety of reasons.
-
-1. It incorrectly assigned our license to external code.
-2. It made lots of unnecessary changes to the original source (like
-   reformatting).
-3. It made it near impossible to track patches we make and updates to
-   the original software.
-
-Instead, use the third-party system to track changes to optionparser.h
-in a different repository and then pull that into ours.
--- a/docs/changelog/parse-some-options-in-initialize.md
+++ b/docs/changelog/parse-some-options-in-initialize.md
@ -1,91 +0,0 @@
-# Allow Initialize to parse only some arguments
-
-When a library requires reading some command line arguments through a
-function like Initialize, it is typical that it will parse through
-arguments it supports and then remove those arguments from `argc` and
-`argv` so that the remaining arguments can be parsed by the calling
-program. Recent changes to the `vtkm::cont::Initialize` function support
-that.
-
-## Use Case
-
-Say you are creating a simple benchmark where you want to provide a command
-line option `--size` that allows you to adjust the size of the data that
-you are working on. However, you also want to support flags like `--device`
-and `-v` that are performed by `vtkm::cont::Initialize`. Rather than have
-to re-implement all of `Initialize`'s parsing, you can now first call
-`Initialize` to handle its arguments and then parse the remaining objects.
-
-The following is a simple (and rather incomplete) example:
-
-```cpp
-int main(int argc, char** argv)
-{
-  vtkm::cont::InitializeResult initResult = vtkm::cont::Initialize(argc, argv);
-  
-  if ((argc > 1) && (strcmp(argv[1], "--size") == 0))
-  {
-    if (argc < 3)
-	{
-	  std::cerr << "--size option requires a numeric argument" << std::endl;
-	  std::cerr << "USAGE: " << argv[0] << " [options]" << std::endl;
-	  std::cerr << "Options are:" << std::endl;
-	  std::cerr << "  --size <number>\tSpecify the size of the data." << std::endl;
-	  std::cerr << initResult.Usage << std::endl;
-	  exit(1);
-	}
-	
-	g_size = atoi(argv[2]);
-  }
-  
-  std::cout << "Using device: " << initResult.Device.GetName() << std::endl;
-```
-
-## Additional Initialize Options
-
-Because `Initialize` no longer has the assumption that it is responsible
-for parsing _all_ arguments, some options have been added to
-`vtkm::cont::InitializeOptions` to manage these different use cases. The
-following options are now supported.
-
-  * `None` A placeholder for having all options off, which is the default.
-    (Same as before this change.)
-  * `RequireDevice` Issue an error if the device argument is not specified.
-    (Same as before this change.)
-  * `DefaultAnyDevice` If no device is specified, treat it as if the user
-    gave --device=Any. This means that DeviceAdapterTagUndefined will never
-    be return in the result.
-  * `AddHelp` Add a help argument. If `-h` or `--help` is provided, prints
-    a usage statement. Of course, the usage statement will only print out
-    arguments processed by VTK-m.
-  * `ErrorOnBadOption` If an unknown option is encountered, the program
-    terminates with an error and a usage statement is printed. If this
-    option is not provided, any unknown options are returned in `argv`. If
-    this option is used, it is a good idea to use `AddHelp` as well.
-  * `ErrorOnBadArgument` If an extra argument is encountered, the program
-    terminates with an error and a usage statement is printed. If this
-    option is not provided, any unknown arguments are returned in `argv`.
-  * `Strict` If supplied, Initialize treats its own arguments as the only
-    ones supported by the application and provides an error if not followed
-    exactly. This is a convenience option that is a combination of
-    `ErrorOnBadOption`, `ErrorOnBadArgument`, and `AddHelp`.
-
-## InitializeResult Changes
-
-The changes in `Initialize` have also necessitated the changing of some of
-the fields in the `InitializeResult` structure. The following fields are
-now provided in the `InitializeResult` struct.
-
-  * `Device` Returns the device selected in the command line arguments as a
-    `DeviceAdapterId`. If no device was selected,
-    `DeviceAdapterTagUndefined` is returned. (Same as before this change.)
-  * `Usage` Returns a string containing the usage for the options
-    recognized by `Initialize`. This can be used to build larger usage
-    statements containing options for both `Initialize` and the calling
-    program. See the example above.
-
-Note that the `Arguments` field has been removed from `InitializeResult`.
-This is because the unparsed arguments are now returned in the modified
-`argc` and `argv`, which provides a more complete result than the
-`Arguments` field did.
-
--- a/docs/changelog/point-merge.md
+++ b/docs/changelog/point-merge.md
@ -1,26 +0,0 @@
-# Add point merge capabilities to CleanGrid filter
-
-We have added a `PointMerge` worklet that uses a virtual grid approach to
-identify nearby points. The worklet works by creating a very fine but
-sparsely represented locator grid. It then groups points by grid bins and
-finds those within a specified radius.
-
-This functionality has been integrated into the `CleanGrid` filter. The
-following flags have been added to `CleanGrid` to modify the behavior of
-point merging.
-
-  * `Set`/`GetMergePoints` - a flag to turn on/off the merging of
-    duplicated coincident points. This extra operation will find points
-    spatially located near each other and merge them together.
-  * `Set`/`GetTolerance` - Defines the tolerance used when determining
-    whether two points are considered coincident. If the
-    `ToleranceIsAbsolute` flag is false (the default), then this tolerance
-    is scaled by the diagonal of the points. This parameter is only used
-    when merge points is on.
-  * `Set`/`GetToleranceIsAbsolute` - When ToleranceIsAbsolute is false (the
-     default) then the tolerance is scaled by the diagonal of the bounds of
-     the dataset. If true, then the tolerance is taken as the actual
-     distance to use. This parameter is only used when merge points is on.
-  * `Set`/`GetFastMerge` - When FastMerge is true (the default), some
-     corners are cut when computing coincident points. The point merge will
-     go faster but the tolerance will not be strictly followed.
--- a/docs/changelog/portal-value-reference-operators.md
+++ b/docs/changelog/portal-value-reference-operators.md
@ -1,12 +0,0 @@
-# Added specialized operators for ArrayPortalValueReference
-
-The ArrayPortalValueReference is supposed to behave just like the value it
-encapsulates and does so by automatically converting to the base type when
-necessary. However, when it is possible to convert that to something else,
-it is possible to get errors about ambiguous overloads. To avoid these, add
-specialized versions of the operators to specify which ones should be used.
-
-Also consolidated the CUDA version of an ArrayPortalValueReference to the
-standard one. The two implementations were equivalent and we would like
-changes to apply to both.
-
--- a/docs/changelog/redesign-runtime-device-tracking.md
+++ b/docs/changelog/redesign-runtime-device-tracking.md
@ -1,90 +0,0 @@
-# Redesign Runtime Device Tracking
-
-The device tracking infrastructure in VTK-m has been redesigned to
-remove multiple redundant codes paths and to simplify reasoning
-about around what an instance of RuntimeDeviceTracker will modify.
-
-`vtkm::cont::RuntimeDeviceTracker` tracks runtime information on
-a per-user thread basis. This is done to allow multiple calling
-threads to use different vtk-m backends such as seen in this
-example:
-
-```cpp
-  vtkm::cont::DeviceAdapterTagCuda cuda;
-  vtkm::cont::DeviceAdapterTagOpenMP openmp;
-  { // thread 1
-    auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
-    tracker->ForceDevice(cuda);
-    vtkm::worklet::Invoker invoke;
-    invoke(LightTask{}, input, output);
-    vtkm::cont::Algorithm::Sort(output);
-    invoke(HeavyTask{}, output);
-  }
-
- { // thread 2
-    auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
-    tracker->ForceDevice(openmp);
-    vtkm::worklet::Invoker invoke;
-    invoke(LightTask{}, input, output);
-    vtkm::cont::Algorithm::Sort(output);
-    invoke(HeavyTask{}, output);
-  }
-```
-
-While this address the ability for threads to specify what
-device they should run on. It doesn't make it easy to toggle
-the status of a device in a programmatic way, for example
-the following block forces execution to only occur on 
-`cuda` and doesn't restore previous active devices after 
-
-```cpp  
-  {
-  vtkm::cont::DeviceAdapterTagCuda cuda;
-  auto& tracker = vtkm::cont::GetRuntimeDeviceTracker();
-  tracker->ForceDevice(cuda);
-  vtkm::worklet::Invoker invoke;
-  invoke(LightTask{}, input, output);
-  } 
-  //openmp/tbb/... still inactive
-```
-
-To resolve those issues we have `vtkm::cont::ScopedRuntimeDeviceTracker` which
-has the same interface as `vtkm::cont::RuntimeDeviceTracker` but additionally
-resets any per-user thread modifications when it goes out of scope. So by
-switching over the previous example to use `ScopedRuntimeDeviceTracker` we
-correctly restore the threads `RuntimeDeviceTracker` state when `tracker`
-goes out of scope.
-```cpp  
-  {
-  vtkm::cont::DeviceAdapterTagCuda cuda;
-  vtkm::cont::ScopedRuntimeDeviceTracker tracker(cuda);
-  vtkm::worklet::Invoker invoke;
-  invoke(LightTask{}, input, output);
-  } 
-  //openmp/tbb/... are now again active
-```
-
-The  `vtkm::cont::ScopedRuntimeDeviceTracker` is not limited to forcing
-execution to occur on a single device. When constructed it can either force
-execution to a device, disable a device or enable a device. These options
-also work with the `DeviceAdapterTagAny`.
-
-
-```cpp  
-  {
-  //enable all devices 
-  vtkm::cont::DeviceAdapterTagAny any;
-  vtkm::cont::ScopedRuntimeDeviceTracker tracker(any, 
-                                                 vtkm::cont::RuntimeDeviceTrackerMode::Enable);
-  ...
-  }
-
-  {
-  //disable only cuda
-  vtkm::cont::DeviceAdapterTagCuda cuda;
-  vtkm::cont::ScopedRuntimeDeviceTracker tracker(cuda, 
-                                                 vtkm::cont::RuntimeDeviceTrackerMode::Disable);
-
-  ...
-  }
-```
--- a/docs/changelog/reduction-support-differing-input-output-types.md
+++ b/docs/changelog/reduction-support-differing-input-output-types.md
@ -1,53 +0,0 @@
-# DeviceAdapter Reduction supports differing input and output types
-
-It is common to want to perform a reduction where the input and output types
-are of differing types. A basic example would be when the input is `vtkm::UInt8`
-but the output is `vtkm::UInt64`. This has been supported since v1.2, as the input
-type can be implicitly convertible to the output type.
-
-What we now support is when the input type is not implicitly convertible to the output type,
-such as when the output type is `vtkm::Pair< vtkm::UInt64, vtkm::UInt64>`. For this to work
-we require that the custom binary operator implements also an `operator()` which handles
-the unary transformation of input to output. 
-
-An example of a custom reduction operator for differing input and output types is:
-
-```cxx
-
-  struct CustomMinAndMax
-  {
-    using OutputType = vtkm::Pair<vtkm::Float64, vtkm::Float64>;
-
-    VTKM_EXEC_CONT
-    OutputType operator()(vtkm::Float64 a) const
-    {
-    return OutputType(a, a);
-    }
-
-    VTKM_EXEC_CONT
-    OutputType operator()(vtkm::Float64 a, vtkm::Float64 b) const
-    {
-      return OutputType(vtkm::Min(a, b), vtkm::Max(a, b));
-    }
-
-    VTKM_EXEC_CONT
-    OutputType operator()(const OutputType& a, const OutputType& b) const
-    {
-      return OutputType(vtkm::Min(a.first, b.first), vtkm::Max(a.second, b.second));
-    }
-
-    VTKM_EXEC_CONT
-    OutputType operator()(vtkm::Float64 a, const OutputType& b) const
-    {
-      return OutputType(vtkm::Min(a, b.first), vtkm::Max(a, b.second));
-    }
-
-    VTKM_EXEC_CONT
-    OutputType operator()(const OutputType& a, vtkm::Float64 b) const
-    {
-      return OutputType(vtkm::Min(a.first, b), vtkm::Max(a.second, b));
-    }
-  };
-
-
-```
--- a/docs/changelog/rename-per-thread-runtime-tracker-method.md
+++ b/docs/changelog/rename-per-thread-runtime-tracker-method.md
@ -1,9 +0,0 @@
-# Renamed RuntimeDeviceTrackers to use the term Global
-
-The `GetGlobalRuntimeDeviceTracker` never actually returned a process wide
-runtime device tracker but always a unique one for each control side thread.
-This was the design as it would allow for different threads to have different
-runtime device settings.
-
-By removing the term Global from the name it becomes more clear what scope this
-class has.
--- a/docs/changelog/specialize-worklet-for-device.md
+++ b/docs/changelog/specialize-worklet-for-device.md
@ -1,147 +0,0 @@
-# Add ability to specialize a worklet for a device
-
-This change adds an execution signature tag named `Device` that passes
-a `DeviceAdapterTag` to the worklet's parenthesis operator. This allows the
-worklet to specialize its operation. This features is available in all
-worklets.
-
-The following example shows a worklet that specializes itself for the CUDA
-device.
-
-```cpp
-struct DeviceSpecificWorklet : vtkm::worklet::WorkletMapField
-{
-  using ControlSignature = void(FieldIn, FieldOut);
-  using ExecutionSignature = _2(_1, Device);
-  
-  // Specialization for the Cuda device.
-  template <typename T>
-  T operator()(T x, vtkm::cont::DeviceAdapterTagCuda) const
-  {
-    // Special cuda implementation
-  }
-  
-  // General implementation
-  template <typename T, typename Device>
-  T operator()(T x, Device) const
-  {
-    // General implementation
-  }
-};
-```
-
-## Effect on compile time and binary size
-
-This change necessitated adding a template parameter for the device that
-followed at least from the schedule all the way down. This has the
-potential for duplicating several of the support methods (like
-`DoWorkletInvokeFunctor`) that would otherwise have the same type. This is
-especially true between the devices that run on the CPU as they should all
-be sharing the same portals from `ArrayHandle`s. So the question is whether
-it causes compile to take longer or cause a significant increase in
-binaries.
-
-To informally test, I first ran a clean debug compile on my Windows machine
-with the serial and tbb devices. The build itself took **3 minutes, 50
-seconds**. Here is a list of the binary sizes in the bin directory:
-
-```
-kmorel2 0> du -sh *.exe *.dll
-200K    BenchmarkArrayTransfer_SERIAL.exe
-204K    BenchmarkArrayTransfer_TBB.exe
-424K    BenchmarkAtomicArray_SERIAL.exe
-424K    BenchmarkAtomicArray_TBB.exe
-440K    BenchmarkCopySpeeds_SERIAL.exe
-580K    BenchmarkCopySpeeds_TBB.exe
-4.1M    BenchmarkDeviceAdapter_SERIAL.exe
-5.3M    BenchmarkDeviceAdapter_TBB.exe
-7.9M    BenchmarkFieldAlgorithms_SERIAL.exe
-7.9M    BenchmarkFieldAlgorithms_TBB.exe
-22M     BenchmarkFilters_SERIAL.exe
-22M     BenchmarkFilters_TBB.exe
-276K    BenchmarkRayTracing_SERIAL.exe
-276K    BenchmarkRayTracing_TBB.exe
-4.4M    BenchmarkTopologyAlgorithms_SERIAL.exe
-4.4M    BenchmarkTopologyAlgorithms_TBB.exe
-712K    Rendering_SERIAL.exe
-712K    Rendering_TBB.exe
-708K    UnitTests_vtkm_cont_arg_testing.exe
-1.7M    UnitTests_vtkm_cont_internal_testing.exe
-13M     UnitTests_vtkm_cont_serial_testing.exe
-14M     UnitTests_vtkm_cont_tbb_testing.exe
-18M     UnitTests_vtkm_cont_testing.exe
-13M     UnitTests_vtkm_cont_testing_mpi.exe
-736K    UnitTests_vtkm_exec_arg_testing.exe
-136K    UnitTests_vtkm_exec_internal_testing.exe
-196K    UnitTests_vtkm_exec_serial_internal_testing.exe
-196K    UnitTests_vtkm_exec_tbb_internal_testing.exe
-2.0M    UnitTests_vtkm_exec_testing.exe
-83M     UnitTests_vtkm_filter_testing.exe
-476K    UnitTests_vtkm_internal_testing.exe
-148K    UnitTests_vtkm_interop_internal_testing.exe
-1.3M    UnitTests_vtkm_interop_testing.exe
-2.9M    UnitTests_vtkm_io_reader_testing.exe
-548K    UnitTests_vtkm_io_writer_testing.exe
-792K    UnitTests_vtkm_rendering_testing.exe
-3.7M    UnitTests_vtkm_testing.exe
-320K    UnitTests_vtkm_worklet_internal_testing.exe
-65M     UnitTests_vtkm_worklet_testing.exe
-11M     vtkm_cont-1.3.dll
-2.1M    vtkm_interop-1.3.dll
-21M     vtkm_rendering-1.3.dll
-3.9M    vtkm_worklet-1.3.dll
-```
-
-After making the singular change to the `Invocation` object to add the
-`DeviceAdapterTag` as a template parameter (which should cause any extra
-compile instances) the compile took **4 minuts and 5 seconds**. Here is the
-new list of binaries.
-
-```
-kmorel2 0> du -sh *.exe *.dll
-200K    BenchmarkArrayTransfer_SERIAL.exe
-204K    BenchmarkArrayTransfer_TBB.exe
-424K    BenchmarkAtomicArray_SERIAL.exe
-424K    BenchmarkAtomicArray_TBB.exe
-440K    BenchmarkCopySpeeds_SERIAL.exe
-580K    BenchmarkCopySpeeds_TBB.exe
-4.1M    BenchmarkDeviceAdapter_SERIAL.exe
-5.3M    BenchmarkDeviceAdapter_TBB.exe
-7.9M    BenchmarkFieldAlgorithms_SERIAL.exe
-7.9M    BenchmarkFieldAlgorithms_TBB.exe
-22M     BenchmarkFilters_SERIAL.exe
-22M     BenchmarkFilters_TBB.exe
-276K    BenchmarkRayTracing_SERIAL.exe
-276K    BenchmarkRayTracing_TBB.exe
-4.4M    BenchmarkTopologyAlgorithms_SERIAL.exe
-4.4M    BenchmarkTopologyAlgorithms_TBB.exe
-712K    Rendering_SERIAL.exe
-712K    Rendering_TBB.exe
-708K    UnitTests_vtkm_cont_arg_testing.exe
-1.7M    UnitTests_vtkm_cont_internal_testing.exe
-13M     UnitTests_vtkm_cont_serial_testing.exe
-14M     UnitTests_vtkm_cont_tbb_testing.exe
-19M     UnitTests_vtkm_cont_testing.exe
-13M     UnitTests_vtkm_cont_testing_mpi.exe
-736K    UnitTests_vtkm_exec_arg_testing.exe
-136K    UnitTests_vtkm_exec_internal_testing.exe
-196K    UnitTests_vtkm_exec_serial_internal_testing.exe
-196K    UnitTests_vtkm_exec_tbb_internal_testing.exe
-2.0M    UnitTests_vtkm_exec_testing.exe
-86M     UnitTests_vtkm_filter_testing.exe
-476K    UnitTests_vtkm_internal_testing.exe
-148K    UnitTests_vtkm_interop_internal_testing.exe
-1.3M    UnitTests_vtkm_interop_testing.exe
-2.9M    UnitTests_vtkm_io_reader_testing.exe
-548K    UnitTests_vtkm_io_writer_testing.exe
-792K    UnitTests_vtkm_rendering_testing.exe
-3.7M    UnitTests_vtkm_testing.exe
-320K    UnitTests_vtkm_worklet_internal_testing.exe
-68M     UnitTests_vtkm_worklet_testing.exe
-11M     vtkm_cont-1.3.dll
-2.1M    vtkm_interop-1.3.dll
-21M     vtkm_rendering-1.3.dll
-3.9M    vtkm_worklet-1.3.dll
-```
-
-So far the increase is quite negligible.
--- a/docs/changelog/update-CellLocatorTwoLevelUniformGrid.md
+++ b/docs/changelog/update-CellLocatorTwoLevelUniformGrid.md
@ -1,31 +0,0 @@
-# update-CellLocatorTwoLevelUniformGrid
-
-`CellLocatorTwoLevelUniformGrid` has been renamed to `CellLocatorUniformBins`
-for brevity. It has been modified to be a subclass of `vtkm::cont::CellLocator`
-and can be used wherever a `CellLocator` is accepted.
-
-`CellLocatorUniformBins` can work with all kinds of datasets, but there are cell
-locators that are more efficient for specific data sets. Therefore, a new cell
-locator - `CellLocatorGeneral` has been implemented that can be configured to use
-specialized cell locators based on its input data. A "configurator" function object
-can be specified using the `SetConfigurator` function. The configurator should
-have the following signature:
-
-```c++
-void (std::unique_ptr<vtkm::cont::CellLocator>&,
-     const vtkm::cont::DynamicCellSet&,
-     const vtkm::cont::CoordinateSystem&);
-```
-
-The configurator is invoked whenever the `Update` method is called and the input
-has changed. The current cell locator is passed in a `std::unique_ptr`. Based on
-the types of the input cellset and coordinates, and possibly some heuristics on
-their values, the current cell locator's parameters can be updated, or a different
-cell-locator can be instantiated and transferred to the `unique_ptr`. The default
-configurator configures a `CellLocatorUniformGrid` for uniform grid datasets,
-a `CellLocatorRecitlinearGrid` for rectilinear datasets, and `CellLocatorUniformBins`
-for all other dataset types.
-
-The class `CellLocatorHelper` that implemented similar functionality to
-`CellLocatorGeneral` has been removed.
-
--- a/docs/changelog/update-optional-parser.md
+++ b/docs/changelog/update-optional-parser.md
@ -1,3 +0,0 @@
-# Optional Parser is bumped from 1.3 to 1.7.
-
-VTK-m internal version of Optional Parser has been moved to 1.7
--- a/docs/changelog/variantarrayhandle.md
+++ b/docs/changelog/variantarrayhandle.md
@ -1,43 +0,0 @@
-# vtkm::cont::VariantArrayHandle replaces vtkm::cont::DynamicArrayHandle
-
-`ArrayHandleVariant` replaces `DynamicArrayHandle` as the primary method
-for holding onto a type erased `vtkm::cont::ArrayHandle`. The major difference
-between the two implementations is how they handle the Storage component of
-an array handle.
-
-`DynamicArrayHandle` approach was to find the fully deduced type of the `ArrayHandle`
-meaning it would check all value and storage types it knew about until it found a match.
-This cross product of values and storages would cause significant compilation times when
-a `DynamicArrayHandle` had multiple storage types.
-
-`VariantArrayHandle` approach is to only deduce the value type of the `ArrayHandle` and
-return a `vtkm::cont::ArrayHandleVirtual` which uses polymorpishm to hide the actual
-storage type. This approach allows for better compile times, and for calling code
-to always expect an `ArrayHandleVirtual` instead of the fully deduced type. This conversion
-to `ArrayHandleVirtual` is usually done internally within VTK-m when a  worklet or filter
-is invoked.
-
-In certain cases users of `VariantArrayHandle` want to be able to access the concrete 
-`ArrayHandle<T,S>` and not have it wrapped in a `ArrayHandleVirtual`. For those occurrences
-`VariantArrayHandle` provides a collection of helper functions/methods to query and
-cast back to the concrete storage and value type:
-```cpp
-vtkm::cont::ArrayHandleConstant<vtkm::Float32> constant(42.0f);
-vtkm::cont::ArrayHandleVariant v(constant);
-
-bool isConstant = vtkm::cont::IsType< decltype(constant) >(v);
-if(isConstant)
-  vtkm::cont::ArrayHandleConstant<vtkm::Float32> t = vtkm::cont::Cast< decltype(constant) >(v);
-
-```
-
-Lastly, a common operation of calling code using `VariantArrayHandle` is a desire to construct a new instance
-of an existing virtual handle with the same storage type. This can be done by using the `NewInstance` method
-as seen below
-```cpp
-vtkm::cont::ArrayHandle<vtkm::Float32> pressure;
-vtkm::cont::ArrayHandleVariant v(pressure);
-
-vtkm::cont::ArrayHandleVariant newArray = v->NewInstance();
-bool isConstant = vtkm::cont::IsType< decltype(pressure) >(newArray); //will be true
-```
--- a/docs/changelog/verify-cmake-install.md
+++ b/docs/changelog/verify-cmake-install.md
@ -1,9 +0,0 @@
-# VTK-m now can verify that it installs itself correctly
-
-It was a fairly common occurrence of VTK-m to have a broken install
-tree as it had no easy way to verify that all headers would be installed.
-
-Now VTK-m offers a testing infrastructure that creates a temporary installed
-version and is able to run tests with that VTK-m installed version. Currently
-the only test is to verify that each header listed in VTK-m is also installed,
-but this can expand in the future to include compilation tests.
--- a/docs/changelog/vtkm-handles-busy-cuda-devices-better.md
+++ b/docs/changelog/vtkm-handles-busy-cuda-devices-better.md
@ -1,17 +0,0 @@
-# VTK-m CUDA detection properly handles busy devices
-
-When an application that uses VTK-m is first launched it will
-do a check to see if CUDA is supported at runtime. If for
-some reason that CUDA card is not allowing kernel execution
-VTK-m would report the hardware doesn't have CUDA support.
-
-This was problematic as was over aggressive in disabling CUDA
-support for hardware that could support kernel execution in
-the future. With the fact that every VTK-m worklet is executed
-through a TryExecute it is no longer necessary to be so
-aggressive in disabling CUDA support.
-
-Now the behavior is that VTK-m considers a machine to have
-CUDA runtime support if it has 1+ GPU's of Kepler or
-higher hardware (SM_30+).
-
--- a/docs/changelog/vtkm-mangle-diy.md
+++ b/docs/changelog/vtkm-mangle-diy.md
@ -1,11 +0,0 @@
-# VTK-m thirdparty diy now can coexist with external diy
-
-Previously VTK-m would leak macros that would cause an
-external diy to be incorrectly mangled breaking consumers
-of VTK-m that used diy.
-
-Going forward to use `diy` from VTK-m all calls must use the
-`vtkmdiy` namespace instead of the `diy` namespace. This
-allows for VTK-m to properly forward calls to either
-the external or internal version correctly.
-
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@ -10,28 +10,28 @@

 #add the directory that contains the VTK-m config file to the cmake
 #path so that our examples can find VTK-m
-set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR})
-
-add_subdirectory(clipping)
-add_subdirectory(contour_tree)
-add_subdirectory(contour_tree_augmented)
-add_subdirectory(cosmotools)
-add_subdirectory(demo)
-add_subdirectory(game_of_life)
-add_subdirectory(hello_world)
-add_subdirectory(histogram)
-add_subdirectory(isosurface)
-add_subdirectory(lagrangian)
-add_subdirectory(multi_backend)
-add_subdirectory(oscillator)
-add_subdirectory(particle_advection)
-add_subdirectory(redistribute_points)
-add_subdirectory(rendering)
-add_subdirectory(streamline)
-add_subdirectory(temporal_advection)
-add_subdirectory(tetrahedra)
-# add_subdirectory(unified_memory)

+if(VTKm_ENABLE_EXAMPLES)
+  set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR})
+  add_subdirectory(clipping)
+  add_subdirectory(contour_tree)
+  add_subdirectory(contour_tree_augmented)
+  add_subdirectory(cosmotools)
+  add_subdirectory(demo)
+  add_subdirectory(game_of_life)
+  add_subdirectory(hello_world)
+  add_subdirectory(histogram)
+  add_subdirectory(isosurface)
+  add_subdirectory(lagrangian)
+  add_subdirectory(multi_backend)
+  add_subdirectory(oscillator)
+  add_subdirectory(particle_advection)
+  add_subdirectory(redistribute_points)
+  add_subdirectory(rendering)
+  add_subdirectory(streamline)
+  add_subdirectory(temporal_advection)
+  add_subdirectory(tetrahedra)
+endif()

 if (VTKm_ENABLE_TESTING)
  # These need to be fast to build as they will
--- a/version.txt
+++ b/version.txt
@ -1 +1 @@
-1.3.0
+1.4.0
--- a/vtkm/thirdparty/diy/CMakeLists.txt
+++ b/vtkm/thirdparty/diy/CMakeLists.txt
@ -24,23 +24,7 @@ target_include_directories(vtkm_diy INTERFACE
  $<INSTALL_INTERFACE:${VTKm_INSTALL_INCLUDE_DIR}/vtkm/thirdparty/diy>)

 if(VTKm_ENABLE_MPI)
-  set(arg)
-  foreach(apath IN LISTS MPI_C_INCLUDE_PATH MPI_CXX_INCLUDE_PATH)
-    list(APPEND arg $<BUILD_INTERFACE:${apath}>)
-  endforeach()
-  list(REMOVE_DUPLICATES arg)
-  target_include_directories(vtkm_diy INTERFACE ${arg})
-  target_link_libraries(vtkm_diy INTERFACE
-    $<BUILD_INTERFACE:${MPI_C_LIBRARIES}>
-    $<BUILD_INTERFACE:${MPI_CXX_LIBRARIES}>)
-  if(MPI_C_COMPILE_DEFINITIONS)
-    target_compile_definitions(vtkm_diy INTERFACE
-      $<$<COMPILE_LANGUAGE:C>:${MPI_C_COMPILE_DEFINITIONS}>)
-  endif()
-  if(MPI_CXX_COMPILE_DEFNITIONS)
-    target_compile_definitions(vtkm_diy INTERFACE
-      $<$<COMPILE_LANGUAGE:CXX>:${MPI_CXX_COMPILE_DEFNITIONS>)
-  endif()
+  target_link_libraries(vtkm_diy INTERFACE MPI::MPI_CXX)
 endif()

 install(TARGETS vtkm_diy
 @ -1 +1 @@
 .3.0
 .4.0