diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 5f779270a..a24bc3e9e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -133,6 +133,7 @@ stages:
       # The artifacts of the build.
       - build/bin/
       - build/include/
+      - build/vtkm/thirdparty/diy/vtkmdiy/
       - build/lib/
 
       # CTest and CMake install files.
diff --git a/.gitlab/ci/windows10.yml b/.gitlab/ci/windows10.yml
index 464f89acf..3cdb3efd8 100644
--- a/.gitlab/ci/windows10.yml
+++ b/.gitlab/ci/windows10.yml
@@ -18,6 +18,7 @@
       - build/bin/
       - build/include/
       - build/lib/
+      - build/vtkm/thirdparty/diy/vtkmdiy/include
 
       # CTest and CMake install files.
       # XXX(globbing): Can be simplified with support from
diff --git a/CMake/VTKmConfig.cmake.in b/CMake/VTKmConfig.cmake.in
index d2d251a43..24eecedfb 100644
--- a/CMake/VTKmConfig.cmake.in
+++ b/CMake/VTKmConfig.cmake.in
@@ -116,3 +116,7 @@ endif()
 # This includes a host of functions used by VTK-m CMake.
 include(VTKmWrappers)
 include(VTKmRenderingContexts)
+
+# Setup diy magic of chosing the appropriate mpi/no_mpi library to link against
+include(VTKmDIYUtils)
+vtkm_diy_init_target()
diff --git a/CMake/VTKmDIYUtils.cmake b/CMake/VTKmDIYUtils.cmake
new file mode 100644
index 000000000..1594ba1fe
--- /dev/null
+++ b/CMake/VTKmDIYUtils.cmake
@@ -0,0 +1,59 @@
+##============================================================================
+##  Copyright (c) Kitware, Inc.
+##  All rights reserved.
+##  See LICENSE.txt for details.
+##
+##  This software is distributed WITHOUT ANY WARRANTY; without even
+##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+##  PURPOSE.  See the above copyright notice for more information.
+##============================================================================
+
+macro(_vtkm_diy_target flag target)
+  set(${target} "vtkmdiympi")
+  if (NOT ${flag})
+    set(${target} "vtkmdiympi_nompi")
+  endif()
+endmacro()
+
+function(vtkm_diy_init_target)
+  set(vtkm_diy_default_flag "${VTKm_ENABLE_MPI}")
+  _vtkm_diy_target(vtkm_diy_default_flag vtkm_diy_default_target)
+
+  set_target_properties(vtkm_diy PROPERTIES
+    vtkm_diy_use_mpi_stack ${vtkm_diy_default_flag}
+    vtkm_diy_target ${vtkm_diy_default_target})
+endfunction()
+
+#-----------------------------------------------------------------------------
+function(vtkm_diy_use_mpi_push)
+  set(topval ${VTKm_ENABLE_MPI})
+  if (NOT ARGC EQUAL 0)
+    set(topval ${ARGV0})
+  endif()
+  get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
+  list (APPEND stack ${topval})
+  _vtkm_diy_target(topval target)
+  set_target_properties(vtkm_diy PROPERTIES
+    vtkm_diy_use_mpi_stack "${stack}"
+    vtkm_diy_target "${target}")
+endfunction()
+
+function(vtkm_diy_use_mpi value)
+  get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
+  list (REMOVE_AT stack -1)
+  list (APPEND stack ${value})
+  _vtkm_diy_target(value target)
+  set_target_properties(vtkm_diy PROPERTIES
+    vtkm_diy_use_mpi_stack "${stack}"
+    vtkm_diy_target "${target}")
+endfunction()
+
+function(vtkm_diy_use_mpi_pop)
+  get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
+  list (GET stack -1 value)
+  list (REMOVE_AT stack -1)
+  _vtkm_diy_target(value target)
+  set_target_properties(vtkm_diy PROPERTIES
+    vtkm_diy_use_mpi_stack "${stack}"
+    vtkm_diy_target "${target}")
+endfunction()
diff --git a/CMake/testing/VTKmCheckSourceInInstall.cmake b/CMake/testing/VTKmCheckSourceInInstall.cmake
index ebe550df3..fd0d771ca 100644
--- a/CMake/testing/VTKmCheckSourceInInstall.cmake
+++ b/CMake/testing/VTKmCheckSourceInInstall.cmake
@@ -111,6 +111,7 @@ function(do_verify root_dir prefix)
 
   set(file_exceptions
     cont/ColorTablePrivate.hxx
+    thirdparty/diy/vtkmdiy/cmake/mpi_types.h
     )
 
   #by default every header in a testing directory doesn't need to be installed
diff --git a/CMake/testing/VTKmTestWrappers.cmake b/CMake/testing/VTKmTestWrappers.cmake
index 8ec6042f9..b97ce7101 100644
--- a/CMake/testing/VTKmTestWrappers.cmake
+++ b/CMake/testing/VTKmTestWrappers.cmake
@@ -10,6 +10,69 @@
 
 include(VTKmWrappers)
 
+function(vtkm_create_test_executable
+  prog_name
+  sources
+  libraries
+  defines
+  is_mpi_test
+  use_mpi
+  enable_all_backends
+  use_job_pool)
+
+  vtkm_diy_use_mpi_push()
+
+  set(prog ${prog_name})
+
+  # for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
+  if (is_mpi_test)
+    set(extraArgs EXTRA_INCLUDE "vtkm/thirdparty/diy/environment.h")
+    set(CMAKE_TESTDRIVER_BEFORE_TESTMAIN "vtkmdiy::mpi::environment env(ac, av);")
+
+    if (use_mpi)
+      vtkm_diy_use_mpi(ON)
+      set(prog "${prog}_mpi")
+    else()
+      vtkm_diy_use_mpi(OFF)
+      set(prog "${prog}_nompi")
+    endif()
+  else()
+    set(CMAKE_TESTDRIVER_BEFORE_TESTMAIN "")
+  endif()
+
+  #the creation of the test source list needs to occur before the labeling as
+  #cuda. This is so that we get the correctly named entry points generated
+  create_test_sourcelist(test_sources ${prog}.cxx ${sources} ${extraArgs})
+
+  add_executable(${prog} ${prog}.cxx ${sources})
+  vtkm_add_drop_unused_function_flags(${prog})
+  target_compile_definitions(${prog} PRIVATE ${defines})
+
+  #if all backends are enabled, we can use cuda compiler to handle all possible backends.
+  set(device_sources)
+  if(TARGET vtkm::cuda AND enable_all_backends)
+    set(device_sources ${sources})
+  endif()
+  vtkm_add_target_information(${prog} DEVICE_SOURCES ${device_sources})
+
+  if(NOT VTKm_USE_DEFAULT_SYMBOL_VISIBILITY)
+    set_property(TARGET ${prog} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
+    set_property(TARGET ${prog} PROPERTY CXX_VISIBILITY_PRESET "hidden")
+  endif()
+  set_property(TARGET ${prog} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
+  set_property(TARGET ${prog} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
+  set_property(TARGET ${prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
+
+  target_link_libraries(${prog} PRIVATE vtkm_cont ${libraries})
+
+  if(use_job_pool)
+    vtkm_setup_job_pool()
+    set_property(TARGET ${prog} PROPERTY JOB_POOL_COMPILE vtkm_pool)
+  endif()
+
+  vtkm_diy_use_mpi_pop()
+endfunction()
+
 #-----------------------------------------------------------------------------
 # Declare unit tests, which should be in the same directory as a kit
 # (package, module, whatever you call it).  Usage:
@@ -36,7 +99,9 @@ include(VTKmWrappers)
 #               test executable
 #
 # [MPI]       : when specified, the tests should be run in parallel if
-#               MPI is enabled.
+#               MPI is enabled. The tests should also be able to build and run
+#               When MPI is not available, i.e., they should not make explicit
+#               use of MPI and instead completely rely on DIY.
 # [ALL_BACKENDS] : when specified, the tests would test against all enabled
 #                  backends. Otherwise we expect the tests to manage the
 #                  backends at runtime.
@@ -56,9 +121,6 @@ function(vtkm_unit_tests)
     )
   vtkm_parse_test_options(VTKm_UT_SOURCES "${options}" ${VTKm_UT_SOURCES})
 
-  set(test_prog)
-
-
   set(per_device_command_line_arguments "NONE")
   set(per_device_suffix "")
   set(per_device_timeout 180)
@@ -93,6 +155,7 @@ function(vtkm_unit_tests)
     endif()
   endif()
 
+  set(test_prog)
   if(VTKm_UT_NAME)
     set(test_prog "${VTKm_UT_NAME}")
   else()
@@ -110,43 +173,38 @@ function(vtkm_unit_tests)
   list(APPEND VTKm_UT_TEST_ARGS "--baseline-dir=${VTKm_SOURCE_DIR}/data/baseline")
 
   if(VTKm_UT_MPI)
-    # for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
-    set(test_prog "${test_prog}_mpi")
-    set(extraArgs EXTRA_INCLUDE "vtkm/cont/testing/Testing.h"
-                  FUNCTION "vtkm::cont::testing::Environment env")
+    if (VTKm_ENABLE_MPI)
+      vtkm_create_test_executable(
+        ${test_prog}
+        "${VTKm_UT_SOURCES}"
+        "${VTKm_UT_LIBRARIES}"
+        "${VTKm_UT_DEFINES}"
+        ON   # is_mpi_test
+        ON   # use_mpi
+        ${enable_all_backends}
+        ${VTKm_UT_USE_VTKM_JOB_POOL})
+    endif()
+    if ((NOT VTKm_ENABLE_MPI) OR VTKm_ENABLE_DIY_NOMPI)
+      vtkm_create_test_executable(
+        ${test_prog}
+        "${VTKm_UT_SOURCES}"
+        "${VTKm_UT_LIBRARIES}"
+        "${VTKm_UT_DEFINES}"
+        ON   # is_mpi_test
+        OFF  # use_mpi
+        ${enable_all_backends}
+        ${VTKm_UT_USE_VTKM_JOB_POOL})
+    endif()
   else()
-    set(extraArgs)
-  endif()
-
-  #the creation of the test source list needs to occur before the labeling as
-  #cuda. This is so that we get the correctly named entry points generated
-  create_test_sourcelist(test_sources ${test_prog}.cxx ${VTKm_UT_SOURCES} ${extraArgs})
-
-  add_executable(${test_prog} ${test_prog}.cxx ${VTKm_UT_SOURCES})
-  vtkm_add_drop_unused_function_flags(${test_prog})
-  target_compile_definitions(${test_prog} PRIVATE ${VTKm_UT_DEFINES})
-
-
-  #if all backends are enabled, we can use cuda compiler to handle all possible backends.
-  set(device_sources )
-  if(TARGET vtkm::cuda AND enable_all_backends)
-    set(device_sources ${VTKm_UT_SOURCES})
-  endif()
-  vtkm_add_target_information(${test_prog} DEVICE_SOURCES ${device_sources})
-
-  if(VTKm_HIDE_PRIVATE_SYMBOLS)
-    set_property(TARGET ${test_prog} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
-    set_property(TARGET ${test_prog} PROPERTY CXX_VISIBILITY_PRESET "hidden")
-  endif()
-  set_property(TARGET ${test_prog} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
-  set_property(TARGET ${test_prog} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
-  set_property(TARGET ${test_prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
-
-  target_link_libraries(${test_prog} PRIVATE vtkm_cont ${VTKm_UT_LIBRARIES})
-
-  if(VTKm_UT_USE_VTKM_JOB_POOL)
-    vtkm_setup_job_pool()
-    set_property(TARGET ${test_prog} PROPERTY JOB_POOL_COMPILE vtkm_pool)
+    vtkm_create_test_executable(
+      ${test_prog}
+      "${VTKm_UT_SOURCES}"
+      "${VTKm_UT_LIBRARIES}"
+      "${VTKm_UT_DEFINES}"
+      OFF   # is_mpi_test
+      OFF   # use_mpi
+      ${enable_all_backends}
+      ${VTKm_UT_USE_VTKM_JOB_POOL})
   endif()
 
   list(LENGTH per_device_command_line_arguments number_of_devices)
@@ -170,25 +228,42 @@ function(vtkm_unit_tests)
 
     foreach (test ${VTKm_UT_SOURCES})
       get_filename_component(tname ${test} NAME_WE)
-      if(VTKm_UT_MPI AND VTKm_ENABLE_MPI)
-        add_test(NAME ${tname}${upper_backend}
-          COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS}
-                  $<TARGET_FILE:${test_prog}> ${tname} ${device_command_line_argument}
-                  ${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS} ${MPIEXEC_POSTFLAGS}
-          )
-      else()
+      if(VTKm_UT_MPI)
+        if (VTKm_ENABLE_MPI)
+          add_test(NAME ${tname}${upper_backend}_mpi
+            COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS}
+                    $<TARGET_FILE:${test_prog}_mpi> ${tname} ${device_command_line_argument}
+                    ${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS} ${MPIEXEC_POSTFLAGS}
+            )
+          set_tests_properties("${tname}${upper_backend}_mpi" PROPERTIES
+            LABELS "${upper_backend};${VTKm_UT_LABEL}"
+            TIMEOUT ${timeout}
+            RUN_SERIAL ${run_serial}
+            FAIL_REGULAR_EXPRESSION "runtime error")
+        endif() # VTKm_ENABLE_MPI
+        if ((NOT VTKm_ENABLE_MPI) OR VTKm_ENABLE_DIY_NOMPI)
+          add_test(NAME ${tname}${upper_backend}_nompi
+            COMMAND ${test_prog}_nompi ${tname} ${device_command_line_argument}
+                    ${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS}
+            )
+          set_tests_properties("${tname}${upper_backend}_nompi" PROPERTIES
+            LABELS "${upper_backend};${VTKm_UT_LABEL}"
+            TIMEOUT ${timeout}
+            RUN_SERIAL ${run_serial}
+            FAIL_REGULAR_EXPRESSION "runtime error")
+
+        endif() # VTKm_ENABLE_DIY_NOMPI
+      else() # VTKm_UT_MPI
         add_test(NAME ${tname}${upper_backend}
           COMMAND ${test_prog} ${tname} ${device_command_line_argument}
                   ${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS}
           )
-      endif()
-
-      set_tests_properties("${tname}${upper_backend}" PROPERTIES
-        LABELS "${upper_backend};${VTKm_UT_LABEL}"
-        TIMEOUT ${timeout}
-        RUN_SERIAL ${run_serial}
-        FAIL_REGULAR_EXPRESSION "runtime error"
-      )
+        set_tests_properties("${tname}${upper_backend}" PROPERTIES
+            LABELS "${upper_backend};${VTKm_UT_LABEL}"
+            TIMEOUT ${timeout}
+            RUN_SERIAL ${run_serial}
+            FAIL_REGULAR_EXPRESSION "runtime error")
+      endif() # VTKm_UT_MPI
     endforeach()
   endforeach()
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad9fde942..4084b096b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -277,6 +277,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
       ${VTKm_SOURCE_DIR}/CMake/VTKmCPUVectorization.cmake
       ${VTKm_SOURCE_DIR}/CMake/VTKmDetectCUDAVersion.cu
       ${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
+      ${VTKm_SOURCE_DIR}/CMake/VTKmDIYUtils.cmake
       ${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
       ${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
       ${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index fdd50a678..4ec62e7fb 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -18,7 +18,7 @@ if(VTKm_ENABLE_EXAMPLES)
   add_subdirectory(contour_tree_augmented)
   add_subdirectory(cosmotools)
   add_subdirectory(demo)
-  add_subdirectory(game_of_life)
+  #add_subdirectory(game_of_life)
   add_subdirectory(hello_worklet)
   add_subdirectory(histogram)
   add_subdirectory(lagrangian)
diff --git a/examples/contour_tree_augmented/CMakeLists.txt b/examples/contour_tree_augmented/CMakeLists.txt
index 35d6be675..f008cca52 100644
--- a/examples/contour_tree_augmented/CMakeLists.txt
+++ b/examples/contour_tree_augmented/CMakeLists.txt
@@ -78,7 +78,7 @@ endif()
 ####################################
 if (VTKm_ENABLE_MPI)
   add_executable(ContourTree_Augmented_MPI ContourTreeApp.cxx)
-  target_link_libraries(ContourTree_Augmented_MPI vtkm_filter vtkm_io)
+  target_link_libraries(ContourTree_Augmented_MPI vtkm_filter vtkm_io MPI::MPI_CXX)
   vtkm_add_target_information(ContourTree_Augmented_MPI
                               MODIFY_CUDA_FLAGS
                               DEVICE_SOURCES ContourTreeApp.cxx)
diff --git a/examples/contour_tree_augmented/ContourTreeApp.cxx b/examples/contour_tree_augmented/ContourTreeApp.cxx
index 7efffd5a6..310a91566 100644
--- a/examples/contour_tree_augmented/ContourTreeApp.cxx
+++ b/examples/contour_tree_augmented/ContourTreeApp.cxx
@@ -168,7 +168,7 @@ int main(int argc, char* argv[])
   auto comm = MPI_COMM_WORLD;
 
   // Tell VTK-m which communicator it should use.
-  vtkm::cont::EnvironmentTracker::SetCommunicator(vtkmdiy::mpi::communicator(comm));
+  vtkm::cont::EnvironmentTracker::SetCommunicator(vtkmdiy::mpi::communicator());
 
   // get the rank and size
   int rank, size;
diff --git a/examples/histogram/CMakeLists.txt b/examples/histogram/CMakeLists.txt
index 89440647e..e160afb7b 100644
--- a/examples/histogram/CMakeLists.txt
+++ b/examples/histogram/CMakeLists.txt
@@ -14,7 +14,7 @@ project(Histogram CXX)
 find_package(VTKm REQUIRED QUIET)
 if (VTKm_ENABLE_MPI)
   add_executable(Histogram Histogram.cxx HistogramMPI.h HistogramMPI.hxx)
-  target_link_libraries(Histogram PRIVATE vtkm_filter)
+  target_link_libraries(Histogram PRIVATE vtkm_filter MPI::MPI_CXX)
   vtkm_add_target_information(Histogram
                               DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
                               DEVICE_SOURCES Histogram.cxx)
diff --git a/examples/histogram/Histogram.cxx b/examples/histogram/Histogram.cxx
index 47b6bfb9b..5b1049ca7 100644
--- a/examples/histogram/Histogram.cxx
+++ b/examples/histogram/Histogram.cxx
@@ -57,14 +57,16 @@ int main(int argc, char* argv[])
   vtkm::cont::Initialize(argc, argv, opts);
 
   // setup MPI environment.
-  MPI_Init(&argc, &argv);
+  vtkmdiy::mpi::environment env(argc, argv); // will finalize on destruction
+
+  vtkmdiy::mpi::communicator world; // the default is MPI_COMM_WORLD
 
   // tell VTK-m the communicator to use.
-  vtkm::cont::EnvironmentTracker::SetCommunicator(vtkmdiy::mpi::communicator(MPI_COMM_WORLD));
+  vtkm::cont::EnvironmentTracker::SetCommunicator(world);
 
   int rank, size;
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
-  MPI_Comm_size(MPI_COMM_WORLD, &size);
+  MPI_Comm_rank(vtkmdiy::mpi::mpi_cast(world.handle()), &rank);
+  MPI_Comm_size(vtkmdiy::mpi::mpi_cast(world.handle()), &size);
 
   if (argc != 2)
   {
@@ -72,7 +74,6 @@ int main(int argc, char* argv[])
     {
       std::cout << "Usage: " << std::endl << "$ " << argv[0] << " <num-bins>" << std::endl;
     }
-    MPI_Finalize();
     return EXIT_FAILURE;
   }
 
@@ -105,11 +106,9 @@ int main(int argc, char* argv[])
     if (count != numVals * size)
     {
       std::cout << "ERROR: bins mismatched!" << std::endl;
-      MPI_Finalize();
       return EXIT_FAILURE;
     }
   }
 
-  MPI_Finalize();
   return EXIT_SUCCESS;
 }
diff --git a/examples/histogram/HistogramMPI.hxx b/examples/histogram/HistogramMPI.hxx
index c6def13c2..c23bcee5e 100644
--- a/examples/histogram/HistogramMPI.hxx
+++ b/examples/histogram/HistogramMPI.hxx
@@ -18,6 +18,9 @@
 #include <vtkm/cont/FieldRangeGlobalCompute.h>
 
 #include <vtkm/thirdparty/diy/diy.h>
+#include <vtkm/thirdparty/diy/mpi-cast.h>
+
+#include <mpi.h>
 
 namespace example
 {
@@ -81,7 +84,7 @@ public:
                sizeof(vtkm::Id) == 4 ? MPI_INT : MPI_LONG,
                MPI_SUM,
                0,
-               comm);
+               vtkmdiy::mpi::mpi_cast(comm.handle()));
 
     if (comm.rank() == 0)
     {
diff --git a/examples/redistribute_points/RedistributePoints.cxx b/examples/redistribute_points/RedistributePoints.cxx
index efa8027ec..ec94f1de7 100644
--- a/examples/redistribute_points/RedistributePoints.cxx
+++ b/examples/redistribute_points/RedistributePoints.cxx
@@ -30,7 +30,7 @@ int main(int argc, char* argv[])
   auto config = vtkm::cont::Initialize(argc, argv, opts);
 
   vtkmdiy::mpi::environment env(argc, argv);
-  auto comm = vtkmdiy::mpi::communicator(MPI_COMM_WORLD);
+  vtkmdiy::mpi::communicator comm;
   vtkm::cont::EnvironmentTracker::SetCommunicator(comm);
 
   if (argc != 3)
diff --git a/examples/redistribute_points/RedistributePoints.h b/examples/redistribute_points/RedistributePoints.h
index 447c4a1b2..4b055228f 100644
--- a/examples/redistribute_points/RedistributePoints.h
+++ b/examples/redistribute_points/RedistributePoints.h
@@ -27,7 +27,7 @@ namespace internal
 
 static vtkmdiy::ContinuousBounds convert(const vtkm::Bounds& bds)
 {
-  vtkmdiy::ContinuousBounds result;
+  vtkmdiy::ContinuousBounds result(3);
   result.min[0] = static_cast<float>(bds.X.Min);
   result.min[1] = static_cast<float>(bds.Y.Min);
   result.min[2] = static_cast<float>(bds.Z.Min);
@@ -136,7 +136,7 @@ public:
         {
           auto target = rp.out_link().target(cc);
           // let's get the bounding box for the target block.
-          vtkmdiy::ContinuousBounds bds;
+          vtkmdiy::ContinuousBounds bds(3);
           this->Decomposer.fill_bounds(bds, target.gid);
 
           auto extractedDS = this->Extract(*block, bds);
diff --git a/vtkm/cont/EnvironmentTracker.cxx b/vtkm/cont/EnvironmentTracker.cxx
index cb03cb736..924015889 100644
--- a/vtkm/cont/EnvironmentTracker.cxx
+++ b/vtkm/cont/EnvironmentTracker.cxx
@@ -11,34 +11,36 @@
 
 #include <vtkm/thirdparty/diy/diy.h>
 
+#include <memory>
+
 namespace vtkm
 {
 namespace cont
 {
 namespace internal
 {
-static vtkmdiy::mpi::communicator GlobalCommuncator(MPI_COMM_NULL);
+static std::unique_ptr<vtkmdiy::mpi::communicator> GlobalCommuncator;
 }
 
 void EnvironmentTracker::SetCommunicator(const vtkmdiy::mpi::communicator& comm)
 {
-  vtkm::cont::internal::GlobalCommuncator = comm;
+  if (!internal::GlobalCommuncator)
+  {
+    internal::GlobalCommuncator.reset(new vtkmdiy::mpi::communicator(comm));
+  }
+  else
+  {
+    *internal::GlobalCommuncator = comm;
+  }
 }
 
 const vtkmdiy::mpi::communicator& EnvironmentTracker::GetCommunicator()
 {
-#ifndef VTKM_DIY_NO_MPI
-  int flag;
-  MPI_Initialized(&flag);
-  if (!flag)
+  if (!internal::GlobalCommuncator)
   {
-    int argc = 0;
-    char** argv = nullptr;
-    MPI_Init(&argc, &argv);
-    internal::GlobalCommuncator = vtkmdiy::mpi::communicator(MPI_COMM_WORLD);
+    internal::GlobalCommuncator.reset(new vtkmdiy::mpi::communicator());
   }
-#endif
-  return vtkm::cont::internal::GlobalCommuncator;
+  return *internal::GlobalCommuncator;
 }
 } // namespace vtkm::cont
 } // namespace vtkm
diff --git a/vtkm/cont/testing/Testing.h b/vtkm/cont/testing/Testing.h
index 0be345992..770fb0ba3 100644
--- a/vtkm/cont/testing/Testing.h
+++ b/vtkm/cont/testing/Testing.h
@@ -24,7 +24,7 @@
 #include <vtkm/cont/DynamicCellSet.h>
 #include <vtkm/cont/VariantArrayHandle.h>
 
-#include <vtkm/thirdparty/diy/serialization.h>
+#include <vtkm/thirdparty/diy/diy.h>
 
 namespace opt = vtkm::cont::internal::option;
 
@@ -265,31 +265,6 @@ private:
   }
 };
 
-struct Environment
-{
-  VTKM_CONT Environment(int* argc, char*** argv)
-  {
-#if defined(VTKM_ENABLE_MPI)
-    int provided_threading;
-    MPI_Init_thread(argc, argv, MPI_THREAD_FUNNELED, &provided_threading);
-
-    // set the global communicator to use in VTKm.
-    vtkmdiy::mpi::communicator comm(MPI_COMM_WORLD);
-    vtkm::cont::EnvironmentTracker::SetCommunicator(comm);
-#else
-    (void)argc;
-    (void)argv;
-#endif
-  }
-
-  VTKM_CONT ~Environment()
-  {
-#if defined(VTKM_ENABLE_MPI)
-    MPI_Finalize();
-#endif
-  }
-};
-
 //============================================================================
 class TestEqualResult
 {
diff --git a/vtkm/filter/ContourTreeUniformAugmented.hxx b/vtkm/filter/ContourTreeUniformAugmented.hxx
index a707d44ae..e91c75bc7 100644
--- a/vtkm/filter/ContourTreeUniformAugmented.hxx
+++ b/vtkm/filter/ContourTreeUniformAugmented.hxx
@@ -202,8 +202,7 @@ public:
   {
     if (this->NumberOfDimensions() == 2)
     {
-      // may need to change back when porting ot later verison of VTKM/vtkmdiy
-      vtkmdiy::DiscreteBounds domain; //(2);
+      vtkmdiy::DiscreteBounds domain(2);
       domain.min[0] = domain.min[1] = 0;
       domain.max[0] = static_cast<int>(this->GlobalSize[0]);
       domain.max[1] = static_cast<int>(this->GlobalSize[1]);
@@ -211,8 +210,7 @@ public:
     }
     else
     {
-      // may need to change back when porting to later version of VTMK/vtkmdiy
-      vtkmdiy::DiscreteBounds domain; //(3);
+      vtkmdiy::DiscreteBounds domain(3);
       domain.min[0] = domain.min[1] = domain.min[2] = 0;
       domain.max[0] = static_cast<int>(this->GlobalSize[0]);
       domain.max[1] = static_cast<int>(this->GlobalSize[1]);
diff --git a/vtkm/filter/testing/UnitTestHistogramFilter.cxx b/vtkm/filter/testing/UnitTestHistogramFilter.cxx
index e34e7772d..8b3e06f15 100644
--- a/vtkm/filter/testing/UnitTestHistogramFilter.cxx
+++ b/vtkm/filter/testing/UnitTestHistogramFilter.cxx
@@ -13,6 +13,8 @@
 #include <vtkm/cont/DataSet.h>
 #include <vtkm/cont/testing/Testing.h>
 
+#include <vtkm/thirdparty/diy/environment.h>
+
 //
 // Make a simple 2D, 1000 point dataset populated with stat distributions
 //
@@ -328,5 +330,8 @@ void TestHistogram()
 
 int UnitTestHistogramFilter(int argc, char* argv[])
 {
+  // Setup MPI environment: This test is not intendent to be run in parallel
+  // but filter does make some DIY/MPI calls
+  vtkmdiy::mpi::environment env(argc, argv);
   return vtkm::cont::testing::Testing::Run(TestHistogram, argc, argv);
 }
diff --git a/vtkm/filter/testing/UnitTestPartitionedDataSetHistogramFilter.cxx b/vtkm/filter/testing/UnitTestPartitionedDataSetHistogramFilter.cxx
index 1e9445f3a..6ddd013dc 100644
--- a/vtkm/filter/testing/UnitTestPartitionedDataSetHistogramFilter.cxx
+++ b/vtkm/filter/testing/UnitTestPartitionedDataSetHistogramFilter.cxx
@@ -13,6 +13,8 @@
 #include <vtkm/cont/PartitionedDataSet.h>
 #include <vtkm/cont/testing/Testing.h>
 
+#include <vtkm/thirdparty/diy/environment.h>
+
 #include <algorithm>
 #include <numeric>
 #include <random>
@@ -127,5 +129,8 @@ static void TestPartitionedDataSetHistogram()
 
 int UnitTestPartitionedDataSetHistogramFilter(int argc, char* argv[])
 {
+  // Setup MPI environment: This test is not intendent to be run in parallel
+  // but filter does make some DIY/MPI calls
+  vtkmdiy::mpi::environment env(argc, argv);
   return vtkm::cont::testing::Testing::Run(TestPartitionedDataSetHistogram, argc, argv);
 }
diff --git a/vtkm/thirdparty/diy/CMakeLists.txt b/vtkm/thirdparty/diy/CMakeLists.txt
index af6303fe2..d6f892695 100644
--- a/vtkm/thirdparty/diy/CMakeLists.txt
+++ b/vtkm/thirdparty/diy/CMakeLists.txt
@@ -7,36 +7,118 @@
 ##  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 ##  PURPOSE.  See the above copyright notice for more information.
 ##============================================================================
-add_library(vtkm_diy INTERFACE)
-
 vtkm_get_kit_name(kit_name kit_dir)
 
-# diy needs C++11
-target_compile_features(vtkm_diy INTERFACE cxx_std_11)
+include(CMakeDependentOption)
+if (NOT DEFINED VTKm_ENABLE_DIY_NOMPI)
+  cmake_dependent_option(
+    VTKm_ENABLE_DIY_NOMPI "Also build DIY without mpi" OFF "VTKm_ENABLE_MPI" OFF)
+endif()
+
+if (VTKm_ENABLE_DIY_NOMPI AND
+    (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC") AND
+    BUILD_SHARED_LIBS)
+  message(WARNING "VTKm_ENABLE_DIY_NOMPI support with MSVC shared builds is experimental and may not work.")
+endif()
+
+## Configure diy
+set(build_examples OFF)
+set(build_tests OFF)
+
+set(threads OFF)
+set(log OFF)
+set(profile OFF)
+set(caliper OFF)
+
+set(build_diy_mpi_lib ON)
+
+set(mpi OFF)
+set(build_diy_nompi_lib OFF)
+if (VTKm_ENABLE_MPI)
+  set(mpi ON)
+endif()
+if (VTKm_ENABLE_DIY_NOMPI)
+  set(build_diy_nompi_lib ON)
+endif()
+
+mark_as_advanced(FORCE caliper log profile wrapped_mpi)
+
+set(diy_prefix "vtkmdiy")
+set(diy_install_include_dir ${VTKm_INSTALL_INCLUDE_DIR}/${kit_dir}/vtkmdiy/include)
+set(diy_install_lib_dir ${VTKm_INSTALL_LIB_DIR})
+set(diy_export_name ${VTKm_EXPORT_NAME})
+if (VTKm_INSTALL_ONLY_LIBRARIES)
+  set(diy_install_only_libraries)
+endif()
+set(diy_dont_install_export)
 
 # placeholder to support external DIY
 set(VTKM_USE_EXTERNAL_DIY OFF)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Configure.h.in
   ${VTKm_BINARY_INCLUDE_DIR}/${kit_dir}/Configure.h)
 
+function(vtkm_diy_set_target_output_directory target)
+  set_property(TARGET ${target} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
+  set_property(TARGET ${target} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
+  set_property(TARGET ${target} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
+endfunction()
+
+#-----------------------------------------------------------------------------
+add_subdirectory(vtkmdiy)
+
+# move diy libraries
+if (TARGET vtkmdiympi)
+  vtkm_diy_set_target_output_directory(vtkmdiympi)
+endif()
+if (TARGET vtkmdiympi_nompi)
+  vtkm_diy_set_target_output_directory(vtkmdiympi_nompi)
+endif()
+
+include(VTKmDIYUtils)
+
+add_library(vtkm_diy INTERFACE)
+vtkm_diy_init_target()
 target_include_directories(vtkm_diy INTERFACE
   $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
   $<INSTALL_INTERFACE:${VTKm_INSTALL_INCLUDE_DIR}/vtkm/thirdparty/diy>)
+target_link_libraries(vtkm_diy INTERFACE vtkmdiy)
 
-if(VTKm_ENABLE_MPI)
-  target_link_libraries(vtkm_diy INTERFACE MPI::MPI_CXX)
+# special logic for when both versions of the diy library are built
+if (VTKm_ENABLE_DIY_NOMPI)
+  # only link vtkmdiympi/vtkmdiympi_nompi when building executable
+  set(is_exe "$<STREQUAL:$<TARGET_PROPERTY:TYPE>,EXECUTABLE>")
+  target_link_libraries(vtkm_diy INTERFACE
+    "$<LINK_ONLY:$<${is_exe}:$<TARGET_PROPERTY:vtkm_diy,vtkm_diy_target>>>")
+
+  # ignore undefined symbols
+  set(is_shared_lib "$<STREQUAL:$<TARGET_PROPERTY:TYPE>,SHARED_LIBRARY>")
+  if (APPLE)
+    target_link_libraries(vtkm_diy INTERFACE "$<${is_shared_lib}:-undefined dynamic_lookup>")
+  elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
+    target_link_libraries(vtkm_diy INTERFACE
+      "$<${is_shared_lib}:-INCREMENTAL:NO>"
+      "$<${is_shared_lib}:-FORCE:UNRESOLVED>")
+  endif()
+else()
+  if (TARGET vtkmdiympi)
+    target_link_libraries(vtkm_diy INTERFACE vtkmdiympi)
+  else()
+    target_link_libraries(vtkm_diy INTERFACE vtkmdiympi_nompi)
+  endif()
 endif()
 
-install(TARGETS vtkm_diy
-  EXPORT ${VTKm_EXPORT_NAME})
+#-----------------------------------------------------------------------------
+install(TARGETS vtkm_diy EXPORT ${VTKm_EXPORT_NAME})
 
-  ## Install headers
-if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
-  install(DIRECTORY vtkmdiy
-    DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/${kit_dir}/)
+## Install headers
+if (NOT VTKm_INSTALL_ONLY_LIBRARIES)
   install(FILES
     ${VTKm_BINARY_INCLUDE_DIR}/${kit_dir}/Configure.h
     ${CMAKE_CURRENT_SOURCE_DIR}/diy.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/environment.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/mpi-cast.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/post-include.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/pre-include.h
     ${CMAKE_CURRENT_SOURCE_DIR}/serialization.h
     DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/${kit_dir}/)
 endif()
diff --git a/vtkm/thirdparty/diy/Configure.h.in b/vtkm/thirdparty/diy/Configure.h.in
index 94f898fe8..9365c39d5 100644
--- a/vtkm/thirdparty/diy/Configure.h.in
+++ b/vtkm/thirdparty/diy/Configure.h.in
@@ -20,16 +20,6 @@
 /* Use the diy library configured for VTM-m. */
 #cmakedefine01 VTKM_USE_EXTERNAL_DIY
 
-/* Whether to use MPI support in DIY */
-#if !defined(VTKM_ENABLE_MPI)
-# define VTKM_DIY_NO_MPI
-#endif
-
-/* initially, we disable DIY threads.
- * once we've sorted out how DIY threads and vtkm work together
- * we will make this configurable.*/
-#define VTKM_DIY_NO_THREADS
-
 /* Need to provide a way to for Serialziation
  * specializations to be injected into the correct
  * namespace. This solves the issue while allowing
@@ -41,5 +31,4 @@
 # define mangled_diy_namespace vtkmdiy
 #endif
 
-
 #endif
diff --git a/vtkm/thirdparty/diy/diy.h b/vtkm/thirdparty/diy/diy.h
index a0737eb37..3ae41ae30 100644
--- a/vtkm/thirdparty/diy/diy.h
+++ b/vtkm/thirdparty/diy/diy.h
@@ -10,21 +10,9 @@
 #ifndef vtk_m_thirdparty_diy_diy_h
 #define vtk_m_thirdparty_diy_diy_h
 
-#include <vtkm/thirdparty/diy/Configure.h>
-
-#if VTKM_USE_EXTERNAL_DIY
-#define VTKM_DIY_INCLUDE(header) <diy/header>
-#else
-#define VTKM_DIY_INCLUDE(header) <vtkmdiy/include/vtkmdiy/header>
-#define diy vtkmdiy // mangle namespace diy (see below comments)
-#endif
-
-#if defined(VTKM_CLANG) || defined(VTKM_GCC)
-#pragma GCC visibility push(default)
-#endif
 
+#include "pre-include.h"
 // clang-format off
-VTKM_THIRDPARTY_PRE_INCLUDE
 #include VTKM_DIY_INCLUDE(assigner.hpp)
 #include VTKM_DIY_INCLUDE(decomposition.hpp)
 #include VTKM_DIY_INCLUDE(master.hpp)
@@ -36,29 +24,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
 #include VTKM_DIY_INCLUDE(reduce-operations.hpp)
 #include VTKM_DIY_INCLUDE(resolve.hpp)
 #include VTKM_DIY_INCLUDE(serialization.hpp)
-#undef VTKM_DIY_INCLUDE
-VTKM_THIRDPARTY_POST_INCLUDE
 // clang-format on
+#include "post-include.h"
 
-#if defined(VTKM_CLANG) || defined(VTKM_GCC)
-#pragma GCC visibility pop
-#endif
-
-// When using an external DIY
-// We need to alias the diy namespace to
-// vtkmdiy so that VTK-m uses it properly
-#if VTKM_USE_EXTERNAL_DIY
-namespace vtkmdiy = ::diy;
-
-#else
-// The aliasing approach fails for when we
-// want to us an internal version since
-// the diy namespace already points to the
-// external version. Instead we use macro
-// replacement to make sure all diy classes
-// are placed in vtkmdiy placed
-#undef diy // mangle namespace diy
-
-#endif
-
-#endif
+#endif // vtk_m_thirdparty_diy_diy_h
diff --git a/vtkm/thirdparty/diy/environment.h b/vtkm/thirdparty/diy/environment.h
new file mode 100644
index 000000000..2e5adfc2c
--- /dev/null
+++ b/vtkm/thirdparty/diy/environment.h
@@ -0,0 +1,19 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+#ifndef vtk_m_thirdparty_diy_environment_h
+#define vtk_m_thirdparty_diy_environment_h
+
+#include "pre-include.h"
+// clang-format off
+#include VTKM_DIY_INCLUDE(mpi/environment.hpp)
+// clang-format on
+#include "post-include.h"
+
+#endif // vtk_m_thirdparty_diy_environment_h
diff --git a/vtkm/thirdparty/diy/mpi-cast.h b/vtkm/thirdparty/diy/mpi-cast.h
new file mode 100644
index 000000000..34786bc0c
--- /dev/null
+++ b/vtkm/thirdparty/diy/mpi-cast.h
@@ -0,0 +1,19 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//============================================================================
+#ifndef vtk_m_thirdparty_diy_cast_h
+#define vtk_m_thirdparty_diy_cast_h
+
+#include "pre-include.h"
+// clang-format off
+#include VTKM_DIY_INCLUDE(mpi/mpi_cast.hpp)
+// clang-format on
+#include "post-include.h"
+
+#endif // vtk_m_thirdparty_diy_cast_h
diff --git a/vtkm/thirdparty/diy/post-include.h b/vtkm/thirdparty/diy/post-include.h
new file mode 100644
index 000000000..e27037321
--- /dev/null
+++ b/vtkm/thirdparty/diy/post-include.h
@@ -0,0 +1,24 @@
+#undef VTKM_DIY_INCLUDE
+VTKM_THIRDPARTY_POST_INCLUDE
+// clang-format on
+
+#if defined(VTKM_CLANG) || defined(VTKM_GCC)
+#pragma GCC visibility pop
+#endif
+
+// When using an external DIY
+// We need to alias the diy namespace to
+// vtkmdiy so that VTK-m uses it properly
+#if VTKM_USE_EXTERNAL_DIY
+namespace vtkmdiy = ::diy;
+
+#else
+// The aliasing approach fails for when we
+// want to use an internal version since
+// the diy namespace already points to the
+// external version. Instead we use macro
+// replacement to make sure all diy classes
+// are placed in vtkmdiy placed
+#undef diy // mangle namespace diy
+
+#endif
diff --git a/vtkm/thirdparty/diy/pre-include.h b/vtkm/thirdparty/diy/pre-include.h
new file mode 100644
index 000000000..b1753531a
--- /dev/null
+++ b/vtkm/thirdparty/diy/pre-include.h
@@ -0,0 +1,15 @@
+#include <vtkm/thirdparty/diy/Configure.h>
+
+#if VTKM_USE_EXTERNAL_DIY
+#define VTKM_DIY_INCLUDE(header) <diy/header>
+#else
+#define VTKM_DIY_INCLUDE(header) <vtkmdiy/header>
+#define diy vtkmdiy // mangle namespace diy
+#endif
+
+#if defined(VTKM_CLANG) || defined(VTKM_GCC)
+#pragma GCC visibility push(default)
+#endif
+
+// clang-format off
+VTKM_THIRDPARTY_PRE_INCLUDE
diff --git a/vtkm/thirdparty/diy/serialization.h b/vtkm/thirdparty/diy/serialization.h
index a03e51ee2..128eb673b 100644
--- a/vtkm/thirdparty/diy/serialization.h
+++ b/vtkm/thirdparty/diy/serialization.h
@@ -10,45 +10,10 @@
 #ifndef vtk_m_thirdparty_diy_serialization_h
 #define vtk_m_thirdparty_diy_serialization_h
 
-#include <vtkm/thirdparty/diy/Configure.h>
-
-#if VTKM_USE_EXTERNAL_DIY
-#define VTKM_DIY_INCLUDE(header) <diy/header>
-#else
-#define VTKM_DIY_INCLUDE(header) <vtkmdiy/include/vtkmdiy/header>
-#define diy vtkmdiy // mangle namespace diy (see below comments)
-#endif
-
-#if defined(VTKM_CLANG) || defined(VTKM_GCC)
-#pragma GCC visibility push(default)
-#endif
-
+#include "pre-include.h"
 // clang-format off
-VTKM_THIRDPARTY_PRE_INCLUDE
 #include VTKM_DIY_INCLUDE(serialization.hpp)
-#undef VTKM_DIY_INCLUDE
-VTKM_THIRDPARTY_POST_INCLUDE
 // clang-format on
+#include "post-include.h"
 
-#if defined(VTKM_CLANG) || defined(VTKM_GCC)
-#pragma GCC visibility pop
-#endif
-
-// When using an external DIY
-// We need to alias the diy namespace to
-// vtkmdiy so that VTK-m uses it properly
-#if VTKM_USE_EXTERNAL_DIY
-namespace vtkmdiy = ::diy;
-
-#else
-// The aliasing approach fails for when we
-// want to us an internal version since
-// the diy namespace already points to the
-// external version. Instead we use macro
-// replacement to make sure all diy classes
-// are placed in vtkmdiy placed
-#undef diy // mangle namespace diy
-
-#endif
-
-#endif
+#endif // vtk_m_thirdparty_diy_serialization_h
diff --git a/vtkm/thirdparty/diy/update.sh b/vtkm/thirdparty/diy/update.sh
index 3ce5ef0ec..d15c2f4b2 100755
--- a/vtkm/thirdparty/diy/update.sh
+++ b/vtkm/thirdparty/diy/update.sh
@@ -8,9 +8,11 @@ readonly name="diy"
 readonly ownership="Diy Upstream <kwrobot@kitware.com>"
 readonly subtree="vtkm/thirdparty/$name/vtkm$name"
 readonly repo="https://gitlab.kitware.com/third-party/diy2.git"
-readonly tag="for/vtk-m"
+readonly tag="for/vtk-m-20200608-master"
 readonly paths="
+cmake
 include
+CMakeLists.txt
 LEGAL.txt
 LICENSE.txt
 README.md
diff --git a/vtkm/thirdparty/diy/vtkmdiy/CMakeLists.txt b/vtkm/thirdparty/diy/vtkmdiy/CMakeLists.txt
new file mode 100644
index 000000000..52113306b
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/CMakeLists.txt
@@ -0,0 +1,257 @@
+#=============================================================================
+# Adds the following DIY library targets:
+# 1. diy:          The main diy interface library and the only target for
+#                  header-only mode.
+# 2. diympi:       Generated when `build_diy_mpi_lib` and `mpi` are turned on.
+#                  Isolates the MPI dependent part of diy into a library.
+# 3. diympi_nompi: Generated when `build_diy_mpi_lib` is on and either `mpi`
+#                  is off or `build_diy_nompi_lib` is on.
+#
+# Both mpi and non-mpi libraries can be generated by turning on `build_diy_mpi_lib`
+# and `build_diy_nompi_lib`. In this case, one of these targets must be explicitly
+# specified when linking againts diy.
+#=============================================================================
+
+project                     (DIY)
+cmake_minimum_required      (VERSION 3.9)
+
+list (APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
+
+include(CMakeDependentOption)
+
+# Provides an option if it is not already defined.
+# This can be replaced when CMake 3.13 is our cmake_minimum_required
+macro (diy_option variable)
+  if (NOT DEFINED "${variable}")
+    option("${variable}" ${ARGN})
+  endif ()
+endmacro ()
+
+macro (diy_dependent_option variable)
+  if (NOT DEFINED "${variable}")
+    cmake_dependent_option("${variable}" ${ARGN})
+  endif ()
+endmacro ()
+
+diy_option                  (threads             "Build DIY with threading"                                  ON)
+diy_option                  (log                 "Build DIY with logging"                                    OFF)
+diy_option                  (profile             "Build DIY with profiling"                                  OFF)
+diy_option                  (caliper             "Build DIY with caliper"                                    OFF)
+diy_option                  (mpi                 "Build DIY with mpi"                                        ON)
+diy_option                  (wrapped_mpi         "MPI compiler wrapper requires no further MPI libraries"    OFF)
+diy_option                  (build_diy_mpi_lib   "Build diy::mpi as a library"                               OFF)
+diy_dependent_option        (BUILD_SHARED_LIBS   "Create shared libraries if on"                             ON  "build_diy_mpi_lib" OFF)
+diy_dependent_option        (build_diy_nompi_lib "Also build the nompi version of diy::mpi"                  OFF "mpi;build_diy_mpi_lib" OFF)
+diy_option                  (build_examples      "Build DIY examples"                                        ON)
+diy_option                  (build_tests         "Build DIY tests"                                           ON)
+
+# Default to Release
+if                          (NOT CMAKE_BUILD_TYPE)
+    set                     (CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
+    set_property            (CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
+endif                       (NOT CMAKE_BUILD_TYPE)
+
+set (diy_definitions "")
+set (diy_include_directories "")
+set (diy_include_thirdparty_directories "")
+set (diy_libraries "")
+
+# Debugging
+if                          (${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR
+                             ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
+    list (APPEND diy_definitions "-DDEBUG")
+endif                       ()
+
+# Logging
+if                          (log)
+    list (APPEND diy_definitions "-DVTKMDIY_USE_SPDLOG")
+    find_path               (SPDLOG_INCLUDE_DIR     spdlog/spdlog.h)
+    list (APPEND diy_include_thirdparty_directories $<BUILD_INTERFACE:${SPDLOG_INCLUDE_DIR}>)
+endif()
+
+# Profiling
+if                          (profile)
+    list (APPEND diy_definitions "-DVTKMDIY_PROFILE")
+endif()
+
+if                          (caliper)
+    list (APPEND diy_definitions "-DVTKMDIY_USE_CALIPER")
+
+    find_package            (caliper)
+    list (APPEND diy_include_thirdparty_directories $<BUILD_INTERFACE:${caliper_INCLUDE_DIR}>)
+    list (APPEND diy_libraries caliper caliper-mpi)
+endif()
+
+# Threads
+if                          (NOT threads)
+    list (APPEND diy_definitions "-DVTKMDIY_NO_THREADS")
+else                        (NOT threads)
+    find_package            (Threads)
+    list (APPEND diy_libraries ${CMAKE_THREAD_LIBS_INIT})
+endif                       (NOT threads)
+
+# MPI
+if (mpi AND NOT wrapped_mpi)
+    find_package(MPI REQUIRED)
+endif()
+
+# configuration variables for diy build and install
+# if diy is a sub-project, the following variables allow the parent project to
+# easily customize the library
+if (NOT DEFINED diy_prefix)
+    set(diy_prefix "diy")
+endif()
+if (NOT DEFINED diy_install_include_dir)
+    set(diy_install_include_dir "include")
+endif()
+if (NOT DEFINED diy_install_lib_dir)
+    set(diy_install_lib_dir "lib")
+endif()
+if (NOT DEFINED diy_export_name)
+    set(diy_export_name "diy_targets")
+endif()
+
+set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")
+set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")
+
+# for diy_developer_flags
+include(DIYCompilerFlags)
+
+function(add_diy_mpi_library use_mpi)
+    set (sources
+        "include/${diy_prefix}/mpi/collectives.cpp"
+        "include/${diy_prefix}/mpi/communicator.cpp"
+        "include/${diy_prefix}/mpi/datatypes.cpp"
+        "include/${diy_prefix}/mpi/environment.cpp"
+        "include/${diy_prefix}/mpi/io.cpp"
+        "include/${diy_prefix}/mpi/operations.cpp"
+        "include/${diy_prefix}/mpi/point-to-point.cpp"
+        "include/${diy_prefix}/mpi/request.cpp"
+        "include/${diy_prefix}/mpi/status.cpp"
+        "include/${diy_prefix}/mpi/window.cpp")
+
+    if (use_mpi)
+        set (lib_name ${diy_prefix}mpi)
+        set (has_mpi_val 1)
+    else()
+        set (lib_name ${diy_prefix}mpi_nompi)
+        set (has_mpi_val 0)
+    endif()
+
+    add_library(${lib_name} ${sources})
+    target_compile_features(${lib_name} PRIVATE cxx_std_11)
+    target_compile_definitions(${lib_name}
+        PRIVATE -DVTKMDIY_HAS_MPI=${has_mpi_val}
+        PRIVATE -Ddiy=${diy_prefix}         # mangle diy namespace
+        PRIVATE ${diy_definitions})
+    target_include_directories(${lib_name} SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/include) # for types.hpp
+    target_include_directories(${lib_name} SYSTEM PRIVATE ${diy_include_directories}) # for mpitypes.hpp
+    target_include_directories(${lib_name} SYSTEM PRIVATE ${diy_include_thirdparty_directories})
+    target_link_libraries(${lib_name} PRIVATE diy_developer_flags)
+    if (use_mpi AND TARGET MPI::MPI_CXX)
+        target_link_libraries(${lib_name} PRIVATE MPI::MPI_CXX)
+    endif()
+endfunction()
+
+# create the targets
+set (diy_targets)
+
+if (build_diy_mpi_lib)
+    include(DIYConfigureMPI)
+
+    # To be interchangeable, these libraries should only have PRIVATE properties.
+    # Properties that should be public should also be part of the core diy target.
+    list(APPEND diy_definitions -DVTKMDIY_MPI_AS_LIB)
+    list(APPEND diy_include_directories
+        "$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include/${diy_prefix}/mpi>"
+        "$<INSTALL_INTERFACE:${diy_install_include_dir}/${diy_prefix}/mpi>")
+
+    # macro required for proper export macros for static vs shared builds
+    if (NOT BUILD_SHARED_LIBS)
+        list(APPEND diy_definitions -DVTKMDIY_MPI_STATIC_BUILD)
+    endif()
+
+    if (mpi)
+        add_diy_mpi_library(ON)
+        list(APPEND diy_targets ${diy_prefix}mpi)
+    endif()
+    if ((NOT mpi) OR build_diy_nompi_lib)
+        add_diy_mpi_library(OFF)
+        list(APPEND diy_targets ${diy_prefix}mpi_nompi)
+    endif()
+endif() # build_diy_mpi_lib
+
+add_library(${diy_prefix} INTERFACE)
+target_compile_features(${diy_prefix} INTERFACE cxx_std_11)
+target_compile_definitions(${diy_prefix} INTERFACE ${diy_definitions})
+target_include_directories(${diy_prefix} SYSTEM INTERFACE
+    "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
+    "$<INSTALL_INTERFACE:${diy_install_include_dir}>")
+target_include_directories(${diy_prefix} SYSTEM INTERFACE ${diy_include_thirdparty_directories})
+if (diy_include_directories)
+    target_include_directories(${diy_prefix} SYSTEM INTERFACE ${diy_include_directories})
+endif()
+target_link_libraries(${diy_prefix} INTERFACE ${diy_libraries})
+if (NOT build_diy_mpi_lib)
+    if (mpi)
+        target_compile_definitions(${diy_prefix} INTERFACE -DVTKMDIY_HAS_MPI=1)
+        if (TARGET MPI::MPI_CXX)
+            target_link_libraries(${diy_prefix} INTERFACE MPI::MPI_CXX)
+        endif()
+    else()
+        target_compile_definitions(${diy_prefix} INTERFACE -DVTKMDIY_HAS_MPI=0)
+    endif()
+elseif (NOT build_diy_nompi_lib)
+    if (mpi)
+        target_link_libraries(${diy_prefix} INTERFACE ${diy_prefix}mpi)
+    else()
+        target_link_libraries(${diy_prefix} INTERFACE ${diy_prefix}mpi_nompi)
+    endif()
+endif()
+
+list(APPEND diy_targets ${diy_prefix} diy_developer_flags)
+
+# libraries used by examples and tests
+set(libraries ${diy_prefix})
+if (${diy_prefix}mpi IN_LIST diy_targets)
+    list(APPEND libraries ${diy_prefix}mpi)
+elseif (${diy_prefix}mpi_nompi IN_LIST diy_targets)
+    list(APPEND libraries ${diy_prefix}mpi_nompi)
+endif()
+list(APPEND libraries diy_developer_flags)
+
+# enable testing and CDash dashboard submission
+enable_testing              ()
+include                     (CTest)
+
+if                          (build_examples)
+    add_subdirectory        (examples)
+endif                       (build_examples)
+
+if                          (build_tests)
+    add_subdirectory        (tests)
+endif                       (build_tests)
+
+# configure find_package script
+include(CMakePackageConfigHelpers)
+configure_package_config_file(
+    "${PROJECT_SOURCE_DIR}/cmake/diy-config.cmake.in"
+    "${PROJECT_BINARY_DIR}/diy-config.cmake"
+    INSTALL_DESTINATION ".")
+
+# install targets
+if (NOT DEFINED diy_install_only_libraries) # defined by parent project if building for binary distribution
+    install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/${diy_prefix} DESTINATION ${diy_install_include_dir})
+
+    if (build_diy_mpi_lib)
+        install(FILES ${PROJECT_BINARY_DIR}/include/${diy_prefix}/mpi/mpitypes.hpp DESTINATION ${diy_install_include_dir}/${diy_prefix}/mpi)
+    endif()
+endif()
+
+install(TARGETS ${diy_targets} EXPORT ${diy_export_name} DESTINATION ${diy_install_lib_dir})
+
+export(EXPORT ${diy_export_name} NAMESPACE DIY:: FILE "${PROJECT_BINARY_DIR}/diy-targets.cmake")
+if (CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) # Only generate these files when diy is the main project
+    install(EXPORT ${diy_export_name} NAMESPACE DIY:: DESTINATION "." FILE diy-targets.cmake)
+    install(FILES "${PROJECT_BINARY_DIR}/diy-config.cmake" DESTINATION ".")
+endif()
diff --git a/vtkm/thirdparty/diy/vtkmdiy/cmake/DIYCompilerFlags.cmake b/vtkm/thirdparty/diy/vtkmdiy/cmake/DIYCompilerFlags.cmake
new file mode 100644
index 000000000..a4f68fc4d
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/cmake/DIYCompilerFlags.cmake
@@ -0,0 +1,76 @@
+if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR
+   CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
+  set(DIY_COMPILER_IS_MSVC 1)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
+  set(DIY_COMPILER_IS_PGI 1)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+  set(DIY_COMPILER_IS_ICC 1)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
+  set(DIY_COMPILER_IS_CLANG 1)
+  set(DIY_COMPILER_IS_APPLECLANG 1)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
+  set(DIY_COMPILER_IS_CLANG 1)
+elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+  set(DIY_COMPILER_IS_GNU 1)
+endif()
+
+#-----------------------------------------------------------------------------
+add_library(diy_developer_flags INTERFACE)
+
+if(DIY_COMPILER_IS_MSVC)
+  target_compile_definitions(diy_developer_flags INTERFACE
+    "_SCL_SECURE_NO_WARNINGS" "_CRT_SECURE_NO_WARNINGS")
+
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.15)
+    set(cxx_flags "-W3")
+  endif()
+  #list(APPEND cxx_flags -wd4702 -wd4505)
+
+  if(MSVC_VERSION LESS 1900)
+    # In VS2013 the C4127 warning has a bug in the implementation and
+    # generates false positive warnings for lots of template code
+    #list(APPEND cxx_flags -wd4127)
+  endif()
+
+  target_compile_options(diy_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>)
+
+elseif(DIY_COMPILER_IS_ICC)
+  # dissable some false positive warnings
+  set(cxx_flags -wd186 -wd3280)
+  list(APPEND cxx_flags -diag-disable=11074 -diag-disable=11076)
+  #list(APPEND cxx_flags -wd1478 -wd 13379)
+  target_compile_options(diy_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>)
+
+elseif(DIY_COMPILER_IS_GNU OR DIY_COMPILER_IS_CLANG)
+  set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common)
+
+  #Only add float-conversion warnings for gcc as the integer warnigns in GCC
+  #include the implicit casting of all types smaller than int to ints.
+  if (DIY_COMPILER_IS_GNU AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.99)
+    list(APPEND cxx_flags -Wfloat-conversion)
+  elseif (DIY_COMPILER_IS_CLANG)
+    list(APPEND cxx_flags -Wconversion)
+  endif()
+
+  # TODO: remove after resolving these warnings
+  # temporarily disable the following warnings as we will need a well thought out plan for fixing these
+  list(APPEND cxx_flags -Wno-sign-conversion -Wno-sign-compare -Wno-cast-align)
+
+  #Add in the -Wodr warning for GCC versions 5.2+
+  if (DIY_COMPILER_IS_CLANG OR (DIY_COMPILER_IS_GNU AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1))
+    list(APPEND cxx_flags -Wodr)
+  endif()
+
+  #GCC 5, 6 don't properly handle strict-overflow suppression through pragma's.
+  #Instead of suppressing around the location of the strict-overflow you
+  #have to suppress around the entry point, or in vtk-m case the worklet
+  #invocation site. This is incredibly tedious and has been fixed in gcc 7
+  #
+  if(DIY_COMPILER_IS_GNU AND
+    (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.99) AND
+    (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.99) )
+    list(APPEND cxx_flags -Wno-strict-overflow)
+  endif()
+
+  target_compile_options(diy_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>)
+endif()
diff --git a/vtkm/thirdparty/diy/vtkmdiy/cmake/DIYConfigureMPI.cmake b/vtkm/thirdparty/diy/vtkmdiy/cmake/DIYConfigureMPI.cmake
new file mode 100644
index 000000000..b4d797ea8
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/cmake/DIYConfigureMPI.cmake
@@ -0,0 +1,51 @@
+cmake_policy(PUSH)
+if (POLICY CMP0075)
+  cmake_policy(SET CMP0075 NEW)
+endif()
+
+include (CheckTypeSize)
+
+if (mpi)
+  find_package(MPI REQUIRED)
+  list(APPEND CMAKE_REQUIRED_INCLUDES   ${MPI_CXX_INCLUDE_PATH})
+  list(APPEND CMAKE_EXTRA_INCLUDE_FILES "mpi.h")
+  list(APPEND CMAKE_REQUIRED_LIBRARIES  ${MPI_CXX_LIBRARIES})
+else()
+  list(APPEND CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/include)
+  list(APPEND CMAKE_EXTRA_INCLUDE_FILES "${diy_prefix}/mpi/no-mpi.hpp")
+endif()
+
+list(APPEND CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/cmake)
+list(APPEND CMAKE_EXTRA_INCLUDE_FILES "mpi_types.h")
+
+if (NOT (DEFINED CACHE{previous_mpi} AND ((previous_mpi AND mpi) OR (NOT(previous_mpi OR mpi)))))
+  unset(TYPESIZE_MPI_Comm          CACHE)
+  unset(HAVE_TYPESIZE_MPI_Comm     CACHE)
+  unset(TYPESIZE_MPI_Datatype      CACHE)
+  unset(HAVE_TYPESIZE_MPI_Datatype CACHE)
+  unset(TYPESIZE_MPI_Status        CACHE)
+  unset(HAVE_TYPESIZE_MPI_Status   CACHE)
+  unset(TYPESIZE_MPI_Request       CACHE)
+  unset(HAVE_TYPESIZE_MPI_Request  CACHE)
+  unset(TYPESIZE_MPI_Op            CACHE)
+  unset(HAVE_TYPESIZE_MPI_Op       CACHE)
+  unset(TYPESIZE_MPI_File          CACHE)
+  unset(HAVE_TYPESIZE_MPI_File     CACHE)
+  unset(TYPESIZE_MPI_Win           CACHE)
+  unset(HAVE_TYPESIZE_MPI_Win      CACHE)
+
+  set (previous_mpi ${mpi} CACHE INTERNAL "" FORCE)
+endif()
+
+set(CMAKE_CXX_STANDARD 11)
+
+check_type_size("Wrapped_MPI_Comm"     TYPESIZE_MPI_Comm     LANGUAGE CXX)
+check_type_size("Wrapped_MPI_Datatype" TYPESIZE_MPI_Datatype LANGUAGE CXX)
+check_type_size("Wrapped_MPI_Status"   TYPESIZE_MPI_Status   LANGUAGE CXX)
+check_type_size("Wrapped_MPI_Request"  TYPESIZE_MPI_Request  LANGUAGE CXX)
+check_type_size("Wrapped_MPI_Op"       TYPESIZE_MPI_Op       LANGUAGE CXX)
+check_type_size("Wrapped_MPI_File"     TYPESIZE_MPI_File     LANGUAGE CXX)
+check_type_size("Wrapped_MPI_Win"      TYPESIZE_MPI_Win      LANGUAGE CXX)
+configure_file("include/${diy_prefix}/mpi/mpitypes.hpp.in" "include/${diy_prefix}/mpi/mpitypes.hpp" @ONLY)
+
+cmake_policy(POP)
diff --git a/vtkm/thirdparty/diy/vtkmdiy/cmake/diy-config.cmake.in b/vtkm/thirdparty/diy/vtkmdiy/cmake/diy-config.cmake.in
new file mode 100644
index 000000000..9feb91b65
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/cmake/diy-config.cmake.in
@@ -0,0 +1,57 @@
+if (CMAKE_VERSION VERSION_LESS "3.9")
+  message(FATAL_ERROR "Diy requires CMake 3.9+")
+endif()
+
+@PACKAGE_INIT@
+
+set(threads "@threads@")
+set(log "@log@")
+set(caliper "@caliper@")
+set(mpi "@mpi@")
+
+include("${CMAKE_CURRENT_LIST_DIR}/diy-targets.cmake")
+
+set(_diy_find_quietly)
+if (${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
+  set(_diy_find_quietly QUIET)
+endif()
+
+if (threads)
+  find_package(Threads ${_diy_find_quietly})
+  if (NOT Threads_FOUND)
+    list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "Threads not found")
+    set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0)
+  endif()
+endif()
+
+if (log)
+  find_path(SPDLOG_INCLUDE_DIR "spdlog/spdlog.h")
+  if (SPDLOG_INCLUDE_DIR STREQUAL "SPDLOG_INCLUDE_DIR-NOTFOUND")
+    list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "SPDLOG not found")
+    set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0)
+  else()
+    target_include_directories(DIY::@diy_prefix@ INTERFACE $<INSTALL_INTERFACE:${SPDLOG_INCLUDE_DIR}>)
+  endif()
+endif()
+
+if (caliper)
+  find_package(caliper ${_diy_find_quietly})
+  if (NOT caliper_FOUND)
+    list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "Caliper not found")
+    set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0)
+  else()
+    target_include_directories(DIY::@diy_prefix@ INTERFACE $<INSTALL_INTERFACE:${caliper_INCLUDE_DIR}>)
+  endif()
+endif()
+
+if (mpi)
+  find_package(MPI ${_diy_find_quietly})
+  if (NOT MPI_FOUND)
+    list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "MPI not found")
+    set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0)
+  endif()
+endif()
+
+if (NOT DEFINED "${CMAKE_FIND_PACKAGE_NAME}_FOUND")
+  set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 1)
+endif ()
diff --git a/vtkm/thirdparty/diy/vtkmdiy/cmake/mpi_types.h b/vtkm/thirdparty/diy/vtkmdiy/cmake/mpi_types.h
new file mode 100644
index 000000000..0f05686cb
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/cmake/mpi_types.h
@@ -0,0 +1,9 @@
+// Wrap the mpi types in structs as they can be macros in some implementations,
+// causing `check_type_size` to fail.
+struct Wrapped_MPI_Comm     { MPI_Comm     obj; };
+struct Wrapped_MPI_Datatype { MPI_Datatype obj; };
+struct Wrapped_MPI_Status   { MPI_Status   obj; };
+struct Wrapped_MPI_Request  { MPI_Request  obj; };
+struct Wrapped_MPI_Op       { MPI_Op       obj; };
+struct Wrapped_MPI_File     { MPI_File     obj; };
+struct Wrapped_MPI_Win      { MPI_Win      obj; };
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/algorithms.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/algorithms.hpp
index 32be13b8a..6a1190c41 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/algorithms.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/algorithms.hpp
@@ -86,7 +86,7 @@ namespace diy
 
         typedef     diy::RegularContinuousLink      RCLink;
 
-        for (size_t i = 0; i < master.size(); ++i)
+        for (int i = 0; i < static_cast<int>(master.size()); ++i)
         {
             RCLink* link   = static_cast<RCLink*>(master.link(i));
             *link = RCLink(dim, domain, domain);
@@ -96,7 +96,7 @@ namespace diy
                 diy::BlockID self = { master.gid(i), master.communicator().rank() };
                 for (int j = 0; j < dim; ++j)
                 {
-                    diy::Direction dir, wrap_dir;
+                    diy::Direction dir(dim,0), wrap_dir(dim,0);
 
                     // left
                     dir[j] = -1; wrap_dir[j] = -1;
@@ -122,7 +122,7 @@ namespace diy
 
         // update master.expected to match the links
         int expected = 0;
-        for (size_t i = 0; i < master.size(); ++i)
+        for (int i = 0; i < static_cast<int>(master.size()); ++i)
             expected += master.link(i)->size_unique();
         master.set_expected(expected);
     }
@@ -146,7 +146,7 @@ namespace diy
 
         typedef     diy::RegularContinuousLink      RCLink;
 
-        for (size_t i = 0; i < master.size(); ++i)
+        for (int i = 0; i < static_cast<int>(master.size()); ++i)
         {
             RCLink* link   = static_cast<RCLink*>(master.link(i));
             *link = RCLink(dim, domain, domain);
@@ -156,7 +156,7 @@ namespace diy
                 diy::BlockID self = { master.gid(i), master.communicator().rank() };
                 for (int j = 0; j < dim; ++j)
                 {
-                    diy::Direction dir, wrap_dir;
+                    diy::Direction dir(dim,0), wrap_dir(dim,0);
 
                     // left
                     dir[j] = -1; wrap_dir[j] = -1;
@@ -182,7 +182,7 @@ namespace diy
 
         // update master.expected to match the links
         int expected = 0;
-        for (size_t i = 0; i < master.size(); ++i)
+        for (int i = 0; i < static_cast<int>(master.size()); ++i)
             expected += master.link(i)->size_unique();
         master.set_expected(expected);
     }
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/assigner.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/assigner.hpp
index 3978acb4a..8560dd73b 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/assigner.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/assigner.hpp
@@ -22,6 +22,7 @@ namespace diy
                              int nblocks__   //!< total (global) number of blocks
                              ):
                       size_(size__), nblocks_(nblocks__)  {}
+      virtual       ~Assigner()                         {}
 
       //! returns the total number of process ranks
       int           size() const                        { return size_; }
@@ -36,6 +37,7 @@ namespace diy
       virtual std::vector<int>
                     ranks(const std::vector<int>& gids) const;
 
+
     private:
       int           size_;      // total number of ranks
       int           nblocks_;   // total number of blocks
@@ -107,7 +109,7 @@ namespace diy
                       Assigner(size__, nblocks__),
                       comm_(comm),
                       div_(nblocks__ / size__ + ((nblocks__ % size__) == 0 ? 0 : 1)),   // NB: same size window everywhere means the last rank may allocate extra space
-                      rank_map_(comm_, div_)                                            { rank_map_.lock_all(MPI_MODE_NOCHECK); }
+                      rank_map_(comm_, div_)                                            { rank_map_.lock_all(mpi::nocheck); }
                     ~DynamicAssigner()                                                  { rank_map_.unlock_all(); }
 
       inline
@@ -187,7 +189,7 @@ set_nblocks(int nblocks__)
 
     rank_map_.unlock_all();
     rank_map_ = mpi::window<int>(comm_, div_);
-    rank_map_.lock_all(MPI_MODE_NOCHECK);
+    rank_map_.lock_all(mpi::nocheck);
 }
 
 std::tuple<bool,int>
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/constants.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/constants.h
index 85a54d9f3..1d4981a95 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/constants.h
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/constants.h
@@ -1,26 +1,39 @@
 #ifndef VTKMDIY_CONSTANTS_H
 #define VTKMDIY_CONSTANTS_H
 
-// Default DIY_MAX_DIM to 4, unless provided by the user
+// Default VTKMDIY_MAX_DIM to 4, unless provided by the user
 // (used for static min/max size in various Bounds)
-#ifndef DIY_MAX_DIM
-#define DIY_MAX_DIM 4
+#ifndef VTKMDIY_MAX_DIM
+#define VTKMDIY_MAX_DIM 4
 #endif
 
 enum
 {
-  DIY_X0 = 0x01, /* minimum-side x (left) neighbor */
-  DIY_X1 = 0x02, /* maximum-side x (right) neighbor */
-  DIY_Y0 = 0x04, /* minimum-side y (bottom) neighbor */
-  DIY_Y1 = 0x08, /* maximum-side y (top) neighbor */
-  DIY_Z0 = 0x10, /* minimum-side z (back) neighbor */
-  DIY_Z1 = 0x20, /* maximum-side z (front)neighbor */
-  DIY_T0 = 0x40, /* minimum-side t (earlier) neighbor */
-  DIY_T1 = 0x80  /* maximum-side t (later) neighbor */
+  VTKMDIY_X0 = 0x01, /* minimum-side x (left) neighbor */
+  VTKMDIY_X1 = 0x02, /* maximum-side x (right) neighbor */
+  VTKMDIY_Y0 = 0x04, /* minimum-side y (bottom) neighbor */
+  VTKMDIY_Y1 = 0x08, /* maximum-side y (top) neighbor */
+  VTKMDIY_Z0 = 0x10, /* minimum-side z (back) neighbor */
+  VTKMDIY_Z1 = 0x20, /* maximum-side z (front)neighbor */
+  VTKMDIY_T0 = 0x40, /* minimum-side t (earlier) neighbor */
+  VTKMDIY_T1 = 0x80  /* maximum-side t (later) neighbor */
 };
 
-#ifndef DIY_UNUSED
-#define DIY_UNUSED(expr) do { (void)(expr); } while (0)
+#define VTKMDIY_UNUSED(expr) do { (void)(expr); } while (0)
+
+// From https://stackoverflow.com/a/21265197/44738
+#if defined(__cplusplus) && (__cplusplus >= 201402L)
+#  define DEPRECATED(msg) [[deprecated(#msg)]]
+#else
+#  if defined(__GNUC__) || defined(__clang__)
+#    define DEPRECATED(msg) __attribute__((deprecated(#msg)))
+#  elif defined(_MSC_VER)
+#    define DEPRECATED(msg) __declspec(deprecated(#msg))
+#  else
+#    pragma message("WARNING: You need to implement DEPRECATED for this compiler")
+#    define DEPRECATED(msg)
+#  endif
 #endif
 
+
 #endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/critical-resource.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/critical-resource.hpp
index 46d1bb8da..d609f6af9 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/critical-resource.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/critical-resource.hpp
@@ -17,6 +17,9 @@ namespace diy
       const T&  operator*() const                           { return x_; }
       const T*  operator->() const                          { return &x_; }
 
+      void      lock()                                      { lock_.lock(); }
+      void      unlock()                                    { lock_.unlock(); }
+
     private:
       T&                        x_;
       lock_guard<Mutex>         lock_;
@@ -33,6 +36,8 @@ namespace diy
                         critical_resource()                 {}
                         critical_resource(const T& x):
                             x_(x)                           {}
+                        critical_resource(T&& x):
+                            x_(std::move(x))                {}
 
       accessor          access()                            { return accessor(x_, m_); }
       const_accessor    const_access() const                { return const_accessor(x_, m_); }
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/decomposition.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/decomposition.hpp
index 2701f26b4..5717d7c96 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/decomposition.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/decomposition.hpp
@@ -63,8 +63,8 @@ namespace detail
     static Coordinate   from(int i, int n, Coordinate min, Coordinate max, bool)      { return min + (max - min)/n * i; }
     static Coordinate   to  (int i, int n, Coordinate min, Coordinate max, bool)      { return min + (max - min)/n * (i+1); }
 
-    static int          lower(Coordinate x, int n, Coordinate min, Coordinate max, bool)   { Coordinate width = (max - min)/n; Coordinate res = std::floor((x - min)/width); if (min + res*width == x) return (res - 1); else return res; }
-    static int          upper(Coordinate x, int n, Coordinate min, Coordinate max, bool)   { Coordinate width = (max - min)/n; Coordinate res = std::ceil ((x - min)/width); if (min + res*width == x) return (res + 1); else return res; }
+    static int          lower(Coordinate x, int n, Coordinate min, Coordinate max, bool)   { Coordinate width = (max - min)/n; auto res = static_cast<int>(std::floor((x - min)/width)); if (min + res*width == x) return (res - 1); else return res; }
+    static int          upper(Coordinate x, int n, Coordinate min, Coordinate max, bool)   { Coordinate width = (max - min)/n; auto res = static_cast<int>(std::ceil ((x - min)/width)); if (min + res*width == x) return (res + 1); else return res; }
   };
 }
 
@@ -74,9 +74,9 @@ namespace detail
   template<class Bounds_>
   struct RegularDecomposer
   {
-    typedef         Bounds_                                         Bounds;
-    typedef         typename BoundsValue<Bounds>::type              Coordinate;
-    typedef         typename RegularLinkSelector<Bounds>::type      Link;
+    using Bounds     = Bounds_;
+    using Coordinate = typename BoundsValue<Bounds>::type;
+    using Link       = RegularLink<Bounds>;
 
     using Creator = std::function<void(int,      Bounds, Bounds, Bounds, Link)>;
     using Updater = std::function<void(int, int, Bounds, Bounds, Bounds, Link)>;
@@ -123,6 +123,7 @@ namespace detail
     template<class Point>
     int             lowest_gid(const Point& p) const;
 
+    DivisionsVector gid_to_coords(int gid) const                                { DivisionsVector coords; gid_to_coords(gid, coords); return coords; }
     void            gid_to_coords(int gid, DivisionsVector& coords) const       { gid_to_coords(gid, coords, divisions); }
     int             coords_to_gid(const DivisionsVector& coords) const          { return coords_to_gid(coords, divisions); }
     void            fill_divisions(std::vector<int>& divisions) const;
@@ -131,8 +132,8 @@ namespace detail
     void            fill_bounds(Bounds& bounds, int gid, bool add_ghosts = false) const;
 
     static bool     all(const std::vector<int>& v, int x);
-    static void     gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divisions);
-    static int      coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divisions);
+    static void     gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divs);
+    static int      coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divs);
 
     static void     factor(std::vector<unsigned>& factors, int n);
 
@@ -303,7 +304,7 @@ decompose(int rank, const StaticAssigner& assigner, const Creator& create)
     DivisionsVector coords;
     gid_to_coords(gid, coords);
 
-    Bounds core, bounds;
+    Bounds core(dim), bounds(dim);
     fill_bounds(core,   coords);
     fill_bounds(bounds, coords, true);
 
@@ -325,7 +326,7 @@ decompose(int rank, const StaticAssigner& assigner, const Creator& create)
       if (all(offsets, 0)) continue;      // skip ourselves
 
       DivisionsVector     nhbr_coords(dim);
-      Direction           dir, wrap_dir;
+      Direction           dir(dim,0), wrap_dir(dim,0);
       bool                inbounds = true;
       for (int k = 0; k < dim; ++k)
       {
@@ -364,8 +365,12 @@ decompose(int rank, const StaticAssigner& assigner, const Creator& create)
       BlockID bid; bid.gid = nhbr_gid; bid.proc = assigner.rank(nhbr_gid);
       link.add_neighbor(bid);
 
-      Bounds nhbr_bounds;
-      fill_bounds(nhbr_bounds, nhbr_coords);
+      Bounds nhbr_core(dim);
+      fill_bounds(nhbr_core, nhbr_coords);
+      link.add_core(nhbr_core);
+
+      Bounds nhbr_bounds(dim);
+      fill_bounds(nhbr_bounds, nhbr_coords, true);
       link.add_bounds(nhbr_bounds);
 
       link.add_direction(dir);
@@ -405,25 +410,25 @@ all(const std::vector<int>& v, int x)
 template<class Bounds>
 void
 diy::RegularDecomposer<Bounds>::
-gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divisions)
+gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divs)
 {
-  int dim = static_cast<int>(divisions.size());
-  for (int i = 0; i < dim; ++i)
+  coords.clear();
+  for (int i = 0; i < static_cast<int>(divs.size()); ++i)
   {
-    coords.push_back(gid % divisions[i]);
-    gid /= divisions[i];
+    coords.push_back(gid % divs[i]);
+    gid /= divs[i];
   }
 }
 
 template<class Bounds>
 int
 diy::RegularDecomposer<Bounds>::
-coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divisions)
+coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divs)
 {
   int gid = 0;
   for (int i = static_cast<int>(coords.size()) - 1; i >= 0; --i)
   {
-    gid *= divisions[i];
+    gid *= divs[i];
     gid += coords[i];
   }
   return gid;
@@ -445,12 +450,6 @@ fill_bounds(Bounds& bounds,                  //!< (output) bounds
     bounds.max[i] = detail::BoundsHelper<Bounds>::to  (coords[i], divisions[i], domain.min[i], domain.max[i], share_face[i]);
   }
 
-  for (int i = dim; i < DIY_MAX_DIM; ++i)   // set the unused dimension to 0
-  {
-    bounds.min[i] = 0;
-    bounds.max[i] = 0;
-  }
-
   if (!add_ghosts)
     return;
 
@@ -554,8 +553,7 @@ fill_divisions(std::vector<int>& divisions_) const
     }
 
     // iterate over factorization of number of blocks (factors are sorted smallest to largest)
-    // NB: using int instead of size_t because must be negative in order to break out of loop
-    for (int i = factors.size() - 1; i >= 0; --i)
+    for (auto f = factors.rbegin(); f != factors.rend(); ++f)
     {
         // fill in missing divs by dividing dimension w/ largest block size
         // except when this would be illegal (resulting in bounds.max < bounds.min;
@@ -567,19 +565,19 @@ fill_divisions(std::vector<int>& divisions_) const
         // split the dimension with the largest block size (first element in vector)
         Coordinate min =
             detail::BoundsHelper<Bounds>::from(0,
-                                               missing_divs[0].nb * factors[i],
+                                               missing_divs[0].nb * (*f),
                                                domain.min[missing_divs[0].dim],
                                                domain.max[missing_divs[0].dim],
                                                share_face[missing_divs[0].dim]);
         Coordinate max =
             detail::BoundsHelper<Bounds>::to(0,
-                                             missing_divs[0].nb * factors[i],
+                                             missing_divs[0].nb * (*f),
                                              domain.min[missing_divs[0].dim],
                                              domain.max[missing_divs[0].dim],
                                              share_face[missing_divs[0].dim]);
         if (max >= min)
         {
-            missing_divs[0].nb    *= factors[i];
+            missing_divs[0].nb    *= (*f);
             missing_divs[0].b_size = max - min;
         }
         else
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/kdtree-sampling.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/kdtree-sampling.hpp
index b9e5230bc..2f79fbc4c 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/kdtree-sampling.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/kdtree-sampling.hpp
@@ -74,7 +74,7 @@ operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners
         dim = partners.dim(srp.round() - 1);
 
     if (srp.round() == partners.rounds())
-        update_links(b, srp, dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
+        update_links(b, srp, dim, partners.sub_round((int)srp.round() - 2), (int)partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
     else if (partners.swap_round(srp.round()) && partners.sub_round(srp.round()) < 0)       // link round
     {
         dequeue_exchange(b, srp, dim);         // from the swap round
@@ -92,7 +92,7 @@ operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners
             int prev_dim = dim - 1;
             if (prev_dim < 0)
                 prev_dim += dim_;
-            update_links(b, srp, prev_dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain);    // -1 would be the "uninformative" link round
+            update_links(b, srp, prev_dim, partners.sub_round((int)srp.round() - 2), (int)partners.swap_rounds(), partners.wrap, partners.domain);    // -1 would be the "uninformative" link round
         }
 
         compute_local_samples(b, srp, dim);
@@ -134,7 +134,7 @@ divide_gid(int gid, bool lower, int round, int rounds) const
 template<class Block, class Point>
 void
 diy::detail::KDTreeSamplingPartition<Block,Point>::
-update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
+update_links(Block*, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
 {
     auto        log  = get_logger();
     int         gid  = srp.gid();
@@ -150,7 +150,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
     std::vector<float>  splits(link->size());
     for (int i = 0; i < link->size(); ++i)
     {
-        float split; diy::Direction dir;
+        float split; diy::Direction dir(dim_,0);
 
         int in_gid = link->target(i).gid;
         while(srp.incoming(in_gid))
@@ -194,7 +194,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
                 if (wrap)
                     new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
                 else
-                    new_link.add_wrap(diy::Direction());
+                    new_link.add_wrap(diy::Direction(dim_,0));
             }
         } else // non-aligned side
         {
@@ -215,7 +215,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
                     if (wrap)
                         new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
                     else
-                        new_link.add_wrap(diy::Direction());
+                        new_link.add_wrap(diy::Direction(dim_,0));
                 }
             }
         }
@@ -230,16 +230,16 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
     update_neighbor_bounds(nbr_bounds, find_split(new_link.bounds(), nbr_bounds), dim, !lower);
     new_link.add_bounds(nbr_bounds);
 
-    new_link.add_wrap(diy::Direction());    // dual block cannot be wrapped
+    new_link.add_wrap(diy::Direction(dim_,0));    // dual block cannot be wrapped
 
     if (lower)
     {
-        diy::Direction right;
+        diy::Direction right(dim_,0);
         right[dim] = 1;
         new_link.add_direction(right);
     } else
     {
-        diy::Direction left;
+        diy::Direction left(dim_,0);
         left[dim] = -1;
         new_link.add_direction(left);
     }
@@ -253,7 +253,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
 template<class Block, class Point>
 void
 diy::detail::KDTreeSamplingPartition<Block,Point>::
-split_to_neighbors(Block* b, const diy::ReduceProxy& srp, int dim) const
+split_to_neighbors(Block*, const diy::ReduceProxy& srp, int) const
 {
     int         lid  = srp.master()->lid(srp.gid());
     RCLink*     link = static_cast<RCLink*>(srp.master()->link(lid));
@@ -290,7 +290,7 @@ compute_local_samples(Block* b, const diy::ReduceProxy& srp, int dim) const
 template<class Block, class Point>
 void
 diy::detail::KDTreeSamplingPartition<Block,Point>::
-add_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
+add_samples(Block*, const diy::ReduceProxy& srp, Samples& samples) const
 {
     // dequeue and combine the samples
     for (int i = 0; i < srp.in_link().size(); ++i)
@@ -307,7 +307,7 @@ add_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
 template<class Block, class Point>
 void
 diy::detail::KDTreeSamplingPartition<Block,Point>::
-receive_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
+receive_samples(Block*, const diy::ReduceProxy& srp, Samples& samples) const
 {
     srp.dequeue(srp.in_link().target(0).gid, samples);
 }
@@ -315,7 +315,7 @@ receive_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
 template<class Block, class Point>
 void
 diy::detail::KDTreeSamplingPartition<Block,Point>::
-forward_samples(Block* b, const diy::ReduceProxy& srp, const Samples& samples) const
+forward_samples(Block*, const diy::ReduceProxy& srp, const Samples& samples) const
 {
     for (int i = 0; i < srp.out_link().size(); ++i)
         srp.enqueue(srp.out_link().target(i), samples);
@@ -435,7 +435,7 @@ diy::Direction
 diy::detail::KDTreeSamplingPartition<Block,Point>::
 find_wrap(const Bounds& bounds, const Bounds& nbr_bounds, const Bounds& domain) const
 {
-    diy::Direction wrap;
+    diy::Direction wrap(dim_,0);
     for (int i = 0; i < dim_; ++i)
     {
         if (bounds.min[i] == domain.min[i] && nbr_bounds.max[i] == domain.max[i])
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/kdtree.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/kdtree.hpp
index 7e722d738..301b0878b 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/kdtree.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/kdtree.hpp
@@ -68,10 +68,10 @@ struct diy::detail::KDTreePartners
                         wrap(wrap_),
                         domain(domain_)
   {
-    for (unsigned i = 0; i < swap.rounds(); ++i)
+    for (int i = 0; i < swap.rounds(); ++i)
     {
       // fill histogram rounds
-      for (unsigned j = 0; j < histogram.rounds(); ++j)
+      for (int j = 0; j < histogram.rounds(); ++j)
       {
         rounds_.push_back(std::make_pair(false, j));
         dim_.push_back(i % dim);
@@ -115,7 +115,7 @@ struct diy::detail::KDTreePartners
     else if (swap_round(round) && sub_round(round) < 0)       // link round
         swap.incoming(sub_round(round - 1) + 1, gid, partners, m);
     else if (swap_round(round))
-        histogram.incoming(histogram.rounds(), gid, partners, m);
+        histogram.incoming(static_cast<int>(histogram.rounds()), gid, partners, m);
     else
     {
         if (round > 0 && sub_round(round) == 0)
@@ -177,7 +177,7 @@ operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners
         dim = partners.dim(srp.round() - 1);
 
     if (srp.round() == partners.rounds())
-        update_links(b, srp, dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
+        update_links(b, srp, dim, partners.sub_round((int)srp.round() - 2), (int)partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
     else if (partners.swap_round(srp.round()) && partners.sub_round(srp.round()) < 0)       // link round
     {
         dequeue_exchange(b, srp, dim);         // from the swap round
@@ -195,7 +195,7 @@ operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners
             int prev_dim = dim - 1;
             if (prev_dim < 0)
                 prev_dim += dim_;
-            update_links(b, srp, prev_dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain);    // -1 would be the "uninformative" link round
+            update_links(b, srp, prev_dim, partners.sub_round((int)srp.round() - 2), (int)partners.swap_rounds(), partners.wrap, partners.domain);    // -1 would be the "uninformative" link round
         }
 
         compute_local_histogram(b, srp, dim);
@@ -229,7 +229,7 @@ divide_gid(int gid, bool lower, int round, int rounds) const
 template<class Block, class Point>
 void
 diy::detail::KDTreePartition<Block,Point>::
-update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
+update_links(Block*, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
 {
     int         gid  = srp.gid();
     int         lid  = srp.master()->lid(gid);
@@ -244,7 +244,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
     std::vector<float>  splits(link->size());
     for (int i = 0; i < link->size(); ++i)
     {
-        float split; diy::Direction dir;
+        float split; diy::Direction dir(dim_,0);
 
         int in_gid = link->target(i).gid;
         while(srp.incoming(in_gid))
@@ -287,7 +287,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
                 if (wrap)
                     new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
                 else
-                    new_link.add_wrap(diy::Direction());
+                    new_link.add_wrap(diy::Direction(dim_,0));
             }
         } else // non-aligned side
         {
@@ -308,7 +308,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
                     if (wrap)
                         new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
                     else
-                        new_link.add_wrap(diy::Direction());
+                        new_link.add_wrap(diy::Direction(dim_, 0));
                 }
             }
         }
@@ -323,16 +323,16 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
     update_neighbor_bounds(nbr_bounds, find_split(new_link.bounds(), nbr_bounds), dim, !lower);
     new_link.add_bounds(nbr_bounds);
 
-    new_link.add_wrap(diy::Direction());    // dual block cannot be wrapped
+    new_link.add_wrap(diy::Direction(dim_,0));    // dual block cannot be wrapped
 
     if (lower)
     {
-        diy::Direction right;
+        diy::Direction right(dim_,0);
         right[dim] = 1;
         new_link.add_direction(right);
     } else
     {
-        diy::Direction left;
+        diy::Direction left(dim_,0);
         left[dim] = -1;
         new_link.add_direction(left);
     }
@@ -346,7 +346,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
 template<class Block, class Point>
 void
 diy::detail::KDTreePartition<Block,Point>::
-split_to_neighbors(Block* b, const diy::ReduceProxy& srp, int dim) const
+split_to_neighbors(Block*, const diy::ReduceProxy& srp, int) const
 {
     int         lid  = srp.master()->lid(srp.gid());
     RCLink*     link = static_cast<RCLink*>(srp.master()->link(lid));
@@ -366,20 +366,23 @@ void
 diy::detail::KDTreePartition<Block,Point>::
 compute_local_histogram(Block* b, const diy::ReduceProxy& srp, int dim) const
 {
+    auto udim = static_cast<unsigned>(dim);
     int         lid  = srp.master()->lid(srp.gid());
     RCLink*     link = static_cast<RCLink*>(srp.master()->link(lid));
 
     // compute and enqueue local histogram
     Histogram histogram(bins_);
 
-    float   width = (link->core().max[dim] - link->core().min[dim])/bins_;
+    float   width = (link->core().max[udim] - link->core().min[udim])/bins_;
     for (size_t i = 0; i < (b->*points_).size(); ++i)
     {
-        float x = (b->*points_)[i][dim];
-        int loc = (x - link->core().min[dim]) / width;
-        if (loc < 0)
-            throw std::runtime_error(fmt::format("{} {} {}", loc, x, link->core().min[dim]));
-        if (loc >= (int) bins_)
+        float x = (b->*points_)[i][udim];
+        float floc = (x - link->core().min[udim]) / width;
+        if (floc < 0)
+            throw std::runtime_error(fmt::format("{} {} {}", floc, x, link->core().min[udim]));
+
+        auto loc = static_cast<size_t>(floc);
+        if (loc >= bins_)
             loc = bins_ - 1;
         ++(histogram[loc]);
     }
@@ -390,7 +393,7 @@ compute_local_histogram(Block* b, const diy::ReduceProxy& srp, int dim) const
 template<class Block, class Point>
 void
 diy::detail::KDTreePartition<Block,Point>::
-add_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const
+add_histogram(Block*, const diy::ReduceProxy& srp, Histogram& histogram) const
 {
     // dequeue and add up the histograms
     for (int i = 0; i < srp.in_link().size(); ++i)
@@ -407,7 +410,7 @@ add_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const
 template<class Block, class Point>
 void
 diy::detail::KDTreePartition<Block,Point>::
-receive_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const
+receive_histogram(Block*, const diy::ReduceProxy& srp, Histogram& histogram) const
 {
     srp.dequeue(srp.in_link().target(0).gid, histogram);
 }
@@ -415,7 +418,7 @@ receive_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) c
 template<class Block, class Point>
 void
 diy::detail::KDTreePartition<Block,Point>::
-forward_histogram(Block* b, const diy::ReduceProxy& srp, const Histogram& histogram) const
+forward_histogram(Block*, const diy::ReduceProxy& srp, const Histogram& histogram) const
 {
     for (int i = 0; i < srp.out_link().size(); ++i)
         srp.enqueue(srp.out_link().target(i), histogram);
@@ -445,19 +448,26 @@ enqueue_exchange(Block* b, const diy::ReduceProxy& srp, int dim, const Histogram
     size_t cur   = 0;
     float  width = (link->core().max[dim] - link->core().min[dim])/bins_;
     float  split = 0;
-    for (size_t i = 0; i < histogram.size(); ++i)
+
+    // scope-block for variable `i`
     {
-        if (cur + histogram[i] > total/2)
+        size_t i = 0;
+        for (; i < histogram.size(); ++i)
         {
-            split = link->core().min[dim] + width*i;
-            break;
+            if (cur + histogram[i] > total/2)
+                break;
+            cur += histogram[i];
         }
-        cur += histogram[i];
+        if (i == 0)
+            ++i;
+        else if (i >= histogram.size() - 1)
+            i = histogram.size() - 2;
+        split = link->core().min[dim] + width*i;
+        log->trace("Found split: {} (dim={}) in {} - {}", split, dim, link->core().min[dim], link->core().max[dim]);
     }
-    log->trace("Found split: {} (dim={}) in {} - {}", split, dim, link->core().min[dim], link->core().max[dim]);
 
     // subset and enqueue
-    std::vector< std::vector<Point> > out_points(srp.out_link().size());
+    std::vector< std::vector<Point> > out_points(static_cast<size_t>(srp.out_link().size()));
     for (size_t i = 0; i < (b->*points_).size(); ++i)
     {
       float x = (b->*points_)[i][dim];
@@ -554,7 +564,7 @@ diy::Direction
 diy::detail::KDTreePartition<Block,Point>::
 find_wrap(const Bounds& bounds, const Bounds& nbr_bounds, const Bounds& domain) const
 {
-    diy::Direction wrap;
+    diy::Direction wrap(dim_,0);
     for (int i = 0; i < dim_; ++i)
     {
         if (bounds.min[i] == domain.min[i] && nbr_bounds.max[i] == domain.max[i])
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/sort.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/sort.hpp
index 55459c513..a0167356f 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/sort.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/algorithms/sort.hpp
@@ -85,29 +85,29 @@ struct SampleSort<Block,T,Cmp>::Sampler
                     Sampler(ValuesVector values_, ValuesVector dividers_, const Cmp& cmp_, size_t num_samples_):
                         values(values_), dividers(dividers_), cmp(cmp_), num_samples(num_samples_)    {}
 
-    void            operator()(Block* b, const ReduceProxy& srp, const RegularSwapPartners& partners) const
+    void            operator()(Block* b, const ReduceProxy& srp, const RegularSwapPartners&) const
     {
         int k_in  = srp.in_link().size();
         int k_out = srp.out_link().size();
 
-        std::vector<T> samples;
+        std::vector<T> samps;
 
         if (k_in == 0)
         {
             // draw random samples
             for (size_t i = 0; i < num_samples; ++i)
-                samples.push_back((b->*values)[std::rand() % (b->*values).size()]);
+                samps.push_back((b->*values)[std::rand() % (b->*values).size()]);
         } else
-            dequeue_values(samples, srp, false);
+            dequeue_values(samps, srp, false);
 
         if (k_out == 0)
         {
             // pick subsamples that separate quantiles
-            std::sort(samples.begin(), samples.end(), cmp);
+            std::sort(samps.begin(), samps.end(), cmp);
             std::vector<T>  subsamples(srp.nblocks() - 1);
-            int step = samples.size() / srp.nblocks();       // NB: subsamples.size() + 1
+            size_t step = samps.size() / srp.nblocks();       // NB: subsamples.size() + 1
             for (size_t i = 0; i < subsamples.size(); ++i)
-                subsamples[i] = samples[(i+1)*step];
+                subsamples[i] = samps[(i+1)*step];
             (b->*dividers).swap(subsamples);
         }
         else
@@ -115,7 +115,7 @@ struct SampleSort<Block,T,Cmp>::Sampler
             for (int i = 0; i < k_out; ++i)
             {
                 MemoryBuffer& out = srp.outgoing(srp.out_link().target(i));
-                save(out, &samples[0], samples.size());
+                save(out, &samps[0], samps.size());
             }
         }
     }
@@ -139,7 +139,7 @@ struct SampleSort<Block,T,Cmp>::Exchanger
             // enqueue values to the correct locations
             for (size_t i = 0; i < (b->*values).size(); ++i)
             {
-                int to = std::lower_bound((b->*samples).begin(), (b->*samples).end(), (b->*values)[i], cmp) - (b->*samples).begin();
+                int to = static_cast<int>(std::lower_bound((b->*samples).begin(), (b->*samples).end(), (b->*values)[i], cmp) - (b->*samples).begin());
                 rp.enqueue(rp.out_link().target(to), (b->*values)[i]);
             }
             (b->*values).clear();
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/collectives.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/collectives.hpp
index 303ba74a6..6f65e95e0 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/collectives.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/collectives.hpp
@@ -20,7 +20,7 @@ namespace diy
 
           void  init()                                    { out_ = in_; }
           void  update(const CollectiveOp& other)         { out_ = op_(out_, static_cast<const AllReduceOp&>(other).in_); }
-          void  global(const mpi::communicator& comm)     { T res; mpi::all_reduce(comm, out_, res, op_); out_ = res; }
+          void  global(const mpi::communicator& comm)     { T res{}; mpi::all_reduce(comm, out_, res, op_); out_ = res; }
           void  copy_from(const CollectiveOp& other)      { out_ = static_cast<const AllReduceOp&>(other).out_; }
           void  result_out(void* dest) const              { *reinterpret_cast<T*>(dest) = out_; }
 
@@ -95,7 +95,7 @@ diy::Master::
 process_collectives()
 {
   auto scoped = prof.scoped("collectives");
-  DIY_UNUSED(scoped);
+  VTKMDIY_UNUSED(scoped);
 
   if (collectives().empty())
       return;
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/communication.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/communication.hpp
index 394a2a5c6..c9133ed6f 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/communication.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/communication.hpp
@@ -1,10 +1,9 @@
 namespace diy
 {
-    struct Master::tags             { enum { queue, piece }; };
-
     struct Master::MessageInfo
     {
         int from, to;
+        int nparts;
         int round;
     };
 
@@ -19,10 +18,10 @@ namespace diy
     struct Master::InFlightRecv
     {
         MemoryBuffer    message;
-        MessageInfo     info { -1, -1, -1 };
+        MessageInfo     info { -1, -1, -1, -1 };
         bool            done = false;
 
-        inline void     recv(mpi::communicator& comm, const mpi::status& status);
+        inline bool     recv(mpi::communicator& comm, const mpi::status& status);
         inline void     place(IncomingRound* in, bool unload, ExternalStorage* storage, IExchangeInfo* iexchange);
         void            reset()     { *this = InFlightRecv(); }
     };
@@ -43,32 +42,6 @@ namespace diy
         size_t              limit = 0;
     };
 
-    struct Master::IExchangeInfo
-    {
-                        IExchangeInfo():
-                            n(0)                                                  {}
-                        IExchangeInfo(size_t n_, mpi::communicator comm_):
-                            n(n_),
-                            comm(comm_),
-                            global_work_(new mpi::window<int>(comm, 1))           { global_work_->lock_all(MPI_MODE_NOCHECK); }
-                        ~IExchangeInfo()                                          { global_work_->unlock_all(); }
-
-      inline void       not_done(int gid);
-
-      inline int        global_work();                          // get global work status (for debugging)
-      inline bool       all_done();                             // get global all done status
-      inline void       reset_work();                           // reset global work counter
-      inline int        add_work(int work);                     // add work to global work counter
-      int               inc_work()                              { return add_work(1); }   // increment global work counter
-      int               dec_work()                              { return add_work(-1); }  // decremnent global work counter
-
-      size_t                              n;
-      mpi::communicator                   comm;
-      std::unordered_map<int, bool>       done;                 // gid -> done
-      std::unique_ptr<mpi::window<int>>   global_work_;         // global work to do
-      std::shared_ptr<spd::logger>        log = get_logger();
-    };
-
     // VectorWindow is used to send and receive subsets of a contiguous array in-place
     namespace detail
     {
@@ -90,7 +63,7 @@ namespace diy
         struct mpi_datatype< diy::detail::VectorWindow<T> >
         {
             using VecWin = diy::detail::VectorWindow<T>;
-            static MPI_Datatype         datatype()                { return get_mpi_datatype<T>(); }
+            static diy::mpi::datatype   datatype()                { return get_mpi_datatype<T>(); }
             static const void*          address(const VecWin& x)  { return x.begin; }
             static void*                address(VecWin& x)        { return x.begin; }
             static int                  count(const VecWin& x)    { return static_cast<int>(x.count); }
@@ -99,18 +72,8 @@ namespace diy
     } // namespace mpi::detail
 } // namespace diy
 
-void
-diy::Master::IExchangeInfo::
-not_done(int gid)
-{
-    if (done[gid])
-    {
-        done[gid] = false;
-        int work = inc_work();
-        log->debug("[{}] Incrementing work when switching done (on receipt): work = {}\n", gid, work);
-    } else
-        log->debug("[{}] Not done, no need to increment work\n", gid);
-}
+
+/** InFlightRecv **/
 
 diy::Master::InFlightRecv&
 diy::Master::
@@ -126,28 +89,27 @@ diy::Master::inflight_sends()
 }
 
 // receive message described by status
-void
+bool
 diy::Master::InFlightRecv::
 recv(mpi::communicator& comm, const mpi::status& status)
 {
+    bool result = false;            // indicates whether this is the first (and possibly only) message of a given queue
     if (info.from == -1)            // uninitialized
     {
         MemoryBuffer bb;
         comm.recv(status.source(), status.tag(), bb.buffer);
 
-        if (status.tag() == tags::piece)     // first piece is the header
+        diy::load_back(bb, info);
+        info.nparts--;
+        if (info.nparts > 0)        // multi-part message
         {
             size_t msg_size;
             diy::load(bb, msg_size);
-            diy::load(bb, info);
-
             message.buffer.reserve(msg_size);
-        }
-        else    // tags::queue
-        {
-            diy::load_back(bb, info);
+        } else
             message.swap(bb);
-        }
+
+        result = true;
     }
     else
     {
@@ -160,43 +122,34 @@ recv(mpi::communicator& comm, const mpi::status& status)
         window.count = count;
 
         comm.recv(status.source(), status.tag(), window);
+
+        info.nparts--;
     }
 
-    if (status.tag() == tags::queue)
+    if (info.nparts == 0)
         done = true;
+
+    return result;
 }
 
 // once the InFlightRecv is done, place it either out of core or in the appropriate incoming queue
 void
 diy::Master::InFlightRecv::
-place(IncomingRound* in, bool unload, ExternalStorage* storage, IExchangeInfo* iexchange)
+place(IncomingRound* in, bool unload, ExternalStorage* storage, IExchangeInfo*)
 {
-    size_t size     = message.size();
     int from        = info.from;
     int to          = info.to;
-    int external    = -1;
+
+    message.reset();
+
+    auto access = in->map[to][from].access();
+    access->emplace_back(std::move(message));
 
     if (unload)
     {
         get_logger()->debug("Directly unloading queue {} <- {}", to, from);
-        external = storage->put(message);       // unload directly
+        access->back().unload(storage);
     }
-    else if (!iexchange)
-    {
-        in->map[to].queues[from].swap(message);
-        in->map[to].queues[from].reset();       // buffer position = 0
-    }
-    else    // iexchange
-    {
-        auto log = get_logger();
-
-        iexchange->not_done(to);
-        in->map[to].queues[from].append_binary(&message.buffer[0], message.size());        // append insted of overwrite
-
-        int work = iexchange->dec_work();
-        log->debug("[{}] Decrementing work after receiving: work = {}\n", to, work);
-    }
-    in->map[to].records[from] = QueueRecord(size, external);
 
     ++(in->received);
 }
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/execution.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/execution.hpp
index d1471babf..4a382a562 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/execution.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/execution.hpp
@@ -25,7 +25,10 @@ struct diy::Master::ProcessBlock
       if ((size_t)cur >= blocks.size())
           return;
 
-      int i = blocks[cur];
+      int i   = blocks[cur];
+      int gid = master.gid(i);
+      stats::Annotation::Guard g( stats::Annotation("diy.block").set(gid) );
+
       if (master.block(i))
       {
           if (local.size() == (size_t)local_limit)
@@ -33,7 +36,7 @@ struct diy::Master::ProcessBlock
           local.push_back(i);
       }
 
-      master.log->debug("Processing block: {}", master.gid(i));
+      master.log->debug("Processing block: {}", gid);
 
       bool skip = all_skip(i);
 
@@ -58,8 +61,7 @@ struct diy::Master::ProcessBlock
           cmd->execute(skip ? 0 : master.block(i), master.proxy(i));
 
           // no longer need them, so get rid of them
-          current_incoming[master.gid(i)].queues.clear();
-          current_incoming[master.gid(i)].records.clear();
+          current_incoming[gid].clear();
       }
 
       if (skip && master.block(i) == 0)
@@ -93,7 +95,7 @@ execute()
 {
   log->debug("Entered execute()");
   auto scoped = prof.scoped("execute");
-  DIY_UNUSED(scoped);
+  VTKMDIY_UNUSED(scoped);
   //show_incoming_records();
 
   // touch the outgoing and incoming queues as well as collectives to make sure they exist
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/iexchange-collective.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/iexchange-collective.hpp
new file mode 100644
index 000000000..ad7a9fb26
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/iexchange-collective.hpp
@@ -0,0 +1,88 @@
+#include <atomic>
+
+namespace diy
+{
+    struct Master::IExchangeInfoCollective: public IExchangeInfo
+    {
+                        IExchangeInfoCollective(mpi::communicator c, stats::Profiler& p):
+                            IExchangeInfo(c, p)
+      {
+          local_work_ = 0;
+          dirty = 0;
+          state = 0;
+      }
+
+      inline bool       all_done() override;                    // get global all done status
+      inline void       add_work(int work) override;            // add work to global work counter
+      inline void       control() override;
+
+      std::atomic<int>  local_work_;
+      std::atomic<int>  dirty;
+      int               local_dirty, all_dirty;
+
+      std::atomic<int>  state;
+      mpi::request      r;
+
+      // debug
+      bool              first_ibarrier = true;
+
+      using IExchangeInfo::prof;
+    };
+}
+
+bool
+diy::Master::IExchangeInfoCollective::
+all_done()
+{
+    return state == 3;
+}
+
+void
+diy::Master::IExchangeInfoCollective::
+add_work(int work)
+{
+    local_work_ += work;
+    if (local_work_ > 0)
+        dirty = 1;
+}
+
+void
+diy::Master::IExchangeInfoCollective::
+control()
+{
+    if (state == 0 && local_work_ == 0)
+    {
+        // debug
+        if (first_ibarrier)
+        {
+            prof >> "iexchange-control";        // consensus-time cannot nest in iexchange-control
+            prof << "consensus-time";
+            prof << "iexchange-control";
+            first_ibarrier = false;
+        }
+
+        r = ibarrier(comm);
+        dirty = 0;
+        state = 1;
+    } else if (state == 1)
+    {
+        mpi::optional<mpi::status> ostatus = r.test();
+        if (ostatus)
+        {
+            local_dirty = dirty;
+            r = mpi::iall_reduce(comm, local_dirty, all_dirty, std::logical_or<int>());
+            state = 2;
+        }
+    } else if (state == 2)
+    {
+        mpi::optional<mpi::status> ostatus = r.test();
+        if (ostatus)
+        {
+            if (all_dirty == 0)     // done
+                state = 3;
+            else
+                state = 0;          // reset
+        }
+    }
+}
+
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/iexchange.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/iexchange.hpp
new file mode 100644
index 000000000..1d10c1b72
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/master/iexchange.hpp
@@ -0,0 +1,28 @@
+namespace diy
+{
+    struct Master::IExchangeInfo
+    {
+      using   Clock   = std::chrono::high_resolution_clock;
+      using   Time    = Clock::time_point;
+
+                        IExchangeInfo(mpi::communicator c, stats::Profiler& p):
+                            comm(c),
+                            prof(p)                             {}
+      virtual           ~IExchangeInfo()                        {}
+
+      virtual bool      all_done() =0;                             // get global all done status
+      virtual void      add_work(int work) =0;                     // add work to global work counter
+      virtual void      control() =0;
+
+      void              inc_work()                              { add_work(1); }   // increment work counter
+      void              dec_work()                              { add_work(-1); }  // decremnent work counter
+
+      mpi::communicator                   comm;
+
+      std::shared_ptr<spd::logger>        log = get_logger();
+      stats::Profiler&                    prof;
+    };
+}
+
+
+#include "iexchange-collective.hpp"
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/reduce/all-to-all.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/reduce/all-to-all.hpp
index 259898b6a..48b74d026 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/reduce/all-to-all.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/detail/reduce/all-to-all.hpp
@@ -23,31 +23,38 @@ namespace detail
       }
     }
 
-    void operator()(Block* b, const ReduceProxy& srp, const RegularSwapPartners& partners) const
+    void operator()(Block* b, const ReduceProxy& srp, const RegularSwapPartners&) const
     {
       int k_in  = srp.in_link().size();
       int k_out = srp.out_link().size();
 
       if (k_in == 0 && k_out == 0)  // special case of a single block
       {
-          ReduceProxy all_srp_out(srp, srp.block(), 0, srp.assigner(), empty_link,         all_neighbors_link);
-          ReduceProxy all_srp_in (srp, srp.block(), 1, srp.assigner(), all_neighbors_link, empty_link);
+          ReduceProxy all_srp(std::move(const_cast<ReduceProxy&>(srp)), srp.block(), 0, srp.assigner(), empty_link, all_neighbors_link);
 
-          op(b, all_srp_out);
-          MemoryBuffer& in_queue = all_srp_in.incoming(all_srp_in.in_link().target(0).gid);
-          in_queue.swap(all_srp_out.outgoing(all_srp_out.out_link().target(0)));
+          op(b, all_srp);
+
+          MemoryBuffer& in_queue = all_srp.incoming(all_srp.in_link().target(0).gid);
+          in_queue.swap(all_srp.outgoing(all_srp.out_link().target(0)));
           in_queue.reset();
+          all_srp.outgoing()->clear();
 
-          op(b, all_srp_in);
+          // change to incoming proxy
+          all_srp.set_round(1);
+          auto& in_link  = const_cast<Link&>(all_srp.in_link());
+          auto& out_link = const_cast<Link&>(all_srp.out_link());
+          in_link.swap(out_link);
+
+          op(b, all_srp);
           return;
       }
 
       if (k_in == 0)                // initial round
       {
-        ReduceProxy all_srp(srp, srp.block(), 0, srp.assigner(), empty_link, all_neighbors_link);
+        ReduceProxy all_srp(std::move(const_cast<ReduceProxy&>(srp)), srp.block(), 0, srp.assigner(), empty_link, all_neighbors_link);
         op(b, all_srp);
 
-        Master::OutgoingQueues all_queues;
+        Master::Proxy::OutgoingQueues all_queues;
         all_queues.swap(*all_srp.outgoing());       // clears out the queues and stores them locally
 
         // enqueue outgoing
@@ -67,10 +74,10 @@ namespace detail
       } else if (k_out == 0)        // final round
       {
         // dequeue incoming + reorder into the correct order
-        ReduceProxy all_srp(srp, srp.block(), 1, srp.assigner(), all_neighbors_link, empty_link);
+        ReduceProxy all_srp(std::move(const_cast<ReduceProxy&>(srp)), srp.block(), 1, srp.assigner(), all_neighbors_link, empty_link);
 
-        Master::IncomingQueues all_incoming;
-        all_incoming.swap(*srp.incoming());
+        Master::Proxy::IncomingQueues all_incoming;
+        all_incoming.swap(*all_srp.incoming());
 
         std::pair<int, int> range;      // all the ranges should be the same
         for (int i = 0; i < k_in; ++i)
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/dynamic-point.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/dynamic-point.hpp
new file mode 100644
index 000000000..21a9f6367
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/dynamic-point.hpp
@@ -0,0 +1,158 @@
+#ifndef VTKMDIY_DYNAMIC_POINT_HPP
+#define VTKMDIY_DYNAMIC_POINT_HPP
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include <algorithm>
+
+#include "constants.h"
+#include "thirdparty/chobo/small_vector.hpp"
+
+namespace diy
+{
+
+template<class Coordinate_, size_t static_size = VTKMDIY_MAX_DIM>
+class DynamicPoint: public chobo::small_vector<Coordinate_, static_size>
+{
+    public:
+        using Coordinate    = Coordinate_;
+        using Parent        = chobo::small_vector<Coordinate_, static_size>;
+
+        template<class U>
+        struct rebind       { typedef DynamicPoint<U> type; };
+
+    public:
+                            DynamicPoint(size_t dim, Coordinate x = 0):
+                                Parent(dim, x)                      {}
+        template<class T>   DynamicPoint(const DynamicPoint<T>& p)  { for (size_t i = 0; i < dimension(); ++i) (*this)[i] = p[i]; }
+        template<class T>   DynamicPoint(const T* a, size_t dim)       { for (size_t i = 0; i < dim; ++i) (*this)[i] = a[i]; }
+        template<class T>   DynamicPoint(const std::vector<T>& a):
+                                Parent(a.begin(), a.end())          {}
+                            DynamicPoint(std::initializer_list<Coordinate> lst):
+                                Parent(lst.size())                  { size_t i = 0; for (Coordinate x : lst) (*this)[i++] = x; }
+
+                            DynamicPoint(DynamicPoint&&)            =default;
+                            DynamicPoint(const DynamicPoint&)       =default;
+        DynamicPoint&       operator=(const DynamicPoint&)          =default;
+
+        unsigned            dimension() const                       { return static_cast<unsigned>(Parent::size()); }
+
+        static DynamicPoint zero(size_t dim)                           { return DynamicPoint(dim, 0); }
+        static DynamicPoint one(size_t dim)                            { return DynamicPoint(dim, 1); }
+
+        DynamicPoint        drop(size_t dim) const                     { DynamicPoint p(dimension() - 1); size_t c = 0; for (size_t i = 0; i < dimension(); ++i) { if (i == dim) continue; p[c++] = (*this)[i]; } return p; }
+        DynamicPoint        lift(size_t dim, Coordinate x) const       { DynamicPoint p(dimension() + 1); for (size_t i = 0; i < dimension()+1; ++i) { if (i < dim) p[i] = (*this)[i]; else if (i == dim) p[i] = x; else if (i > dim) p[i] = (*this)[i-1]; } return p; }
+
+        using Parent::operator[];
+
+        DynamicPoint&       operator+=(const DynamicPoint& y)       { for (size_t i = 0; i < dimension(); ++i) (*this)[i] += y[i];  return *this; }
+        DynamicPoint&       operator-=(const DynamicPoint& y)       { for (size_t i = 0; i < dimension(); ++i) (*this)[i] -= y[i];  return *this; }
+        DynamicPoint&       operator*=(Coordinate a)                { for (size_t i = 0; i < dimension(); ++i) (*this)[i] *= a;     return *this; }
+        DynamicPoint&       operator/=(Coordinate a)                { for (size_t i = 0; i < dimension(); ++i) (*this)[i] /= a;     return *this; }
+
+        DEPRECATED("Use norm2 instead")
+        Coordinate          norm() const                            { return norm2(); }
+        Coordinate          norm2() const                           { return (*this)*(*this); }
+
+        std::ostream&       operator<<(std::ostream& out) const     { out << (*this)[0]; for (size_t i = 1; i < dimension(); ++i) out << " " << (*this)[i]; return out; }
+        std::istream&       operator>>(std::istream& in);
+
+        friend
+        DynamicPoint        operator+(DynamicPoint x, const DynamicPoint& y)    { x += y; return x; }
+
+        friend
+        DynamicPoint        operator-(DynamicPoint x, const DynamicPoint& y)    { x -= y; return x; }
+
+        friend
+        DynamicPoint        operator/(DynamicPoint x, Coordinate y)             { x /= y; return x; }
+
+        friend
+        DynamicPoint        operator*(DynamicPoint x, Coordinate y)             { x *= y; return x; }
+
+        friend
+        DynamicPoint        operator*(Coordinate y, DynamicPoint x)             { x *= y; return x; }
+
+        friend
+        Coordinate          operator*(const DynamicPoint& x, const DynamicPoint& y)         { Coordinate n = 0; for (size_t i = 0; i < x.dimension(); ++i) n += x[i] * y[i]; return n; }
+
+        friend
+        bool                operator<(const DynamicPoint& x, const DynamicPoint& y)         { return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end()); }
+
+        friend
+        bool                operator>(const DynamicPoint& x, const DynamicPoint& y)         { return y < x; }
+
+        template<class T, size_t s_>
+        friend
+        Coordinate          operator*(const DynamicPoint<T,s_>& x, const DynamicPoint& y)   { Coordinate n = 0; for (size_t i = 0; i < x.dimension(); ++i) n += x[i] * y[i]; return n; }
+};
+
+template<class C, size_t s_>
+std::istream&
+DynamicPoint<C,s_>::
+operator>>(std::istream& in)
+{
+    std::string point_str;
+    in >> point_str;        // read until ' '
+    std::stringstream ps(point_str);
+
+    char x;
+    for (unsigned i = 0; i < dimension(); ++i)
+    {
+        ps >> (*this)[i];
+        ps >> x;
+    }
+
+    return in;
+}
+
+
+template<class Coordinate, size_t s_>
+Coordinate norm2(const DynamicPoint<Coordinate,s_>& p)
+{ Coordinate res = 0; for (unsigned i = 0; i < p.dimension(); ++i) res += p[i]*p[i]; return res; }
+
+template<class C, size_t s_>
+std::ostream&
+operator<<(std::ostream& out, const DynamicPoint<C,s_>& p)
+{ return p.operator<<(out); }
+
+template<class C, size_t s_>
+std::istream&
+operator>>(std::istream& in, DynamicPoint<C,s_>& p)
+{ return p.operator>>(in); }
+
+// Serialization
+template<class T>
+struct Serialization;
+struct BinaryBuffer;
+template<class T> void save(BinaryBuffer&, const T&);
+template<class T> void load(BinaryBuffer&, T&);
+template<class T> void save(BinaryBuffer&, const T*, size_t);
+template<class T> void load(BinaryBuffer&, T*, size_t);
+
+template<class C, size_t s_>
+struct Serialization<DynamicPoint<C, s_>>
+{
+    using Point = DynamicPoint<C,s_>;
+
+    static void         save(BinaryBuffer& bb, const Point& p)
+    {
+      size_t s = p.size();
+      diy::save(bb, s);
+      if (s > 0)
+        diy::save(bb, &p[0], p.size());
+    }
+
+    static void         load(BinaryBuffer& bb, Point& p)
+    {
+      size_t s;
+      diy::load(bb, s);
+      p.resize(s);
+      if (s > 0)
+        diy::load(bb, &p[0], s);
+    }
+};
+
+}
+
+#endif // VTKMDIY_POINT_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/factory.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/factory.hpp
new file mode 100644
index 000000000..eed2a5df1
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/factory.hpp
@@ -0,0 +1,84 @@
+#ifndef VTKMDIY_FACTORY_HPP
+#define VTKMDIY_FACTORY_HPP
+
+// From http://www.nirfriedman.com/2018/04/29/unforgettable-factory/
+// with minor changes.
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace diy
+{
+
+template <class Base, class... Args>
+class Factory
+{
+    public:
+        template <class... T>
+        static Base* make(const std::string &s, T&&... args)
+        {
+            return data().at(s)(std::forward<T>(args)...);
+        }
+
+        virtual std::string id() const          { return typeid(Base).name(); }
+
+        template <class T>
+        struct Registrar: Base
+        {
+            static bool registerT()
+            {
+                const auto name = typeid(T).name();
+                Factory::data()[name] = [](Args... args) -> Base*
+                {
+                    return new T(std::forward<Args>(args)...);
+                };
+                return true;
+            }
+            static volatile bool registered;
+
+            std::string id() const override     { return typeid(T).name(); }
+
+#if defined(__NVCC__)
+	    protected:
+#else
+            private:
+              friend T;
+#endif
+#if defined(__INTEL_COMPILER)
+                __attribute__ ((used))
+#endif
+                Registrar(): Base(Key{}) { (void)registered; }
+        };
+
+
+#if defined(__NVCC__)
+    protected:
+#else
+    private:
+      friend Base;
+#endif
+        class Key
+        {
+            Key(){};
+            template <class T> friend struct Registrar;
+        };
+
+        using FuncType = Base* (*)(Args...);
+
+        Factory() = default;
+
+        static std::unordered_map<std::string, FuncType>& data()
+        {
+            static std::unordered_map<std::string, FuncType> s;
+            return s;
+        }
+};
+
+template <class Base, class... Args>
+template <class T>
+volatile bool Factory<Base, Args...>::Registrar<T>::registered = Factory<Base, Args...>::Registrar<T>::registerT();
+
+}
+
+#endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/format.cc b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/format.cc
deleted file mode 100644
index 09d2ea9fd..000000000
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/format.cc
+++ /dev/null
@@ -1,535 +0,0 @@
-/*
- Formatting library for C++
-
- Copyright (c) 2012 - 2016, Victor Zverovich
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice, this
-    list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "format.h"
-
-#include <string.h>
-
-#include <cctype>
-#include <cerrno>
-#include <climits>
-#include <cmath>
-#include <cstdarg>
-#include <cstddef>  // for std::ptrdiff_t
-
-#if defined(_WIN32) && defined(__MINGW32__)
-# include <cstring>
-#endif
-
-#if FMT_USE_WINDOWS_H
-# if !defined(FMT_HEADER_ONLY) && !defined(WIN32_LEAN_AND_MEAN)
-#  define WIN32_LEAN_AND_MEAN
-# endif
-# if defined(NOMINMAX) || defined(FMT_WIN_MINMAX)
-#  include <windows.h>
-# else
-#  define NOMINMAX
-#  include <windows.h>
-#  undef NOMINMAX
-# endif
-#endif
-
-#if FMT_EXCEPTIONS
-# define FMT_TRY try
-# define FMT_CATCH(x) catch (x)
-#else
-# define FMT_TRY if (true)
-# define FMT_CATCH(x) if (false)
-#endif
-
-#ifdef _MSC_VER
-# pragma warning(push)
-# pragma warning(disable: 4127)  // conditional expression is constant
-# pragma warning(disable: 4702)  // unreachable code
-// Disable deprecation warning for strerror. The latter is not called but
-// MSVC fails to detect it.
-# pragma warning(disable: 4996)
-#endif
-
-// Dummy implementations of strerror_r and strerror_s called if corresponding
-// system functions are not available.
-static inline fmt::internal::Null<> strerror_r(int, char *, ...) {
-  return fmt::internal::Null<>();
-}
-static inline fmt::internal::Null<> strerror_s(char *, std::size_t, ...) {
-  return fmt::internal::Null<>();
-}
-
-namespace fmt {
-
-FMT_FUNC internal::RuntimeError::~RuntimeError() FMT_DTOR_NOEXCEPT {}
-FMT_FUNC FormatError::~FormatError() FMT_DTOR_NOEXCEPT {}
-FMT_FUNC SystemError::~SystemError() FMT_DTOR_NOEXCEPT {}
-
-namespace {
-
-#ifndef _MSC_VER
-# define FMT_SNPRINTF snprintf
-#else  // _MSC_VER
-inline int fmt_snprintf(char *buffer, size_t size, const char *format, ...) {
-  va_list args;
-  va_start(args, format);
-  int result = vsnprintf_s(buffer, size, _TRUNCATE, format, args);
-  va_end(args);
-  return result;
-}
-# define FMT_SNPRINTF fmt_snprintf
-#endif  // _MSC_VER
-
-#if defined(_WIN32) && defined(__MINGW32__) && !defined(__NO_ISOCEXT)
-# define FMT_SWPRINTF snwprintf
-#else
-# define FMT_SWPRINTF swprintf
-#endif // defined(_WIN32) && defined(__MINGW32__) && !defined(__NO_ISOCEXT)
-
-const char RESET_COLOR[] = "\x1b[0m";
-
-typedef void (*FormatFunc)(Writer &, int, StringRef);
-
-// Portable thread-safe version of strerror.
-// Sets buffer to point to a string describing the error code.
-// This can be either a pointer to a string stored in buffer,
-// or a pointer to some static immutable string.
-// Returns one of the following values:
-//   0      - success
-//   ERANGE - buffer is not large enough to store the error message
-//   other  - failure
-// Buffer should be at least of size 1.
-int safe_strerror(
-    int error_code, char *&buffer, std::size_t buffer_size) FMT_NOEXCEPT {
-  FMT_ASSERT(buffer != 0 && buffer_size != 0, "invalid buffer");
-
-  class StrError {
-   private:
-    int error_code_;
-    char *&buffer_;
-    std::size_t buffer_size_;
-
-    // A noop assignment operator to avoid bogus warnings.
-    void operator=(const StrError &) {}
-
-    // Handle the result of XSI-compliant version of strerror_r.
-    int handle(int result) {
-      // glibc versions before 2.13 return result in errno.
-      return result == -1 ? errno : result;
-    }
-
-    // Handle the result of GNU-specific version of strerror_r.
-    int handle(char *message) {
-      // If the buffer is full then the message is probably truncated.
-      if (message == buffer_ && strlen(buffer_) == buffer_size_ - 1)
-        return ERANGE;
-      buffer_ = message;
-      return 0;
-    }
-
-    // Handle the case when strerror_r is not available.
-    int handle(internal::Null<>) {
-      return fallback(strerror_s(buffer_, buffer_size_, error_code_));
-    }
-
-    // Fallback to strerror_s when strerror_r is not available.
-    int fallback(int result) {
-      // If the buffer is full then the message is probably truncated.
-      return result == 0 && strlen(buffer_) == buffer_size_ - 1 ?
-            ERANGE : result;
-    }
-
-    // Fallback to strerror if strerror_r and strerror_s are not available.
-    int fallback(internal::Null<>) {
-      errno = 0;
-      buffer_ = strerror(error_code_);
-      return errno;
-    }
-
-   public:
-    StrError(int err_code, char *&buf, std::size_t buf_size)
-      : error_code_(err_code), buffer_(buf), buffer_size_(buf_size) {}
-
-    int run() {
-      // Suppress a warning about unused strerror_r.
-      strerror_r(0, FMT_NULL, "");
-      return handle(strerror_r(error_code_, buffer_, buffer_size_));
-    }
-  };
-  return StrError(error_code, buffer, buffer_size).run();
-}
-
-void format_error_code(Writer &out, int error_code,
-                       StringRef message) FMT_NOEXCEPT {
-  // Report error code making sure that the output fits into
-  // INLINE_BUFFER_SIZE to avoid dynamic memory allocation and potential
-  // bad_alloc.
-  out.clear();
-  static const char SEP[] = ": ";
-  static const char ERROR_STR[] = "error ";
-  // Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
-  std::size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2;
-  typedef internal::IntTraits<int>::MainType MainType;
-  MainType abs_value = static_cast<MainType>(error_code);
-  if (internal::is_negative(error_code)) {
-    abs_value = 0 - abs_value;
-    ++error_code_size;
-  }
-  error_code_size += internal::count_digits(abs_value);
-  if (message.size() <= internal::INLINE_BUFFER_SIZE - error_code_size)
-    out << message << SEP;
-  out << ERROR_STR << error_code;
-  assert(out.size() <= internal::INLINE_BUFFER_SIZE);
-}
-
-void report_error(FormatFunc func, int error_code,
-                  StringRef message) FMT_NOEXCEPT {
-  MemoryWriter full_message;
-  func(full_message, error_code, message);
-  // Use Writer::data instead of Writer::c_str to avoid potential memory
-  // allocation.
-  std::fwrite(full_message.data(), full_message.size(), 1, stderr);
-  std::fputc('\n', stderr);
-}
-}  // namespace
-
-FMT_FUNC void SystemError::init(
-    int err_code, CStringRef format_str, ArgList args) {
-  error_code_ = err_code;
-  MemoryWriter w;
-  format_system_error(w, err_code, format(format_str, args));
-  std::runtime_error &base = *this;
-  base = std::runtime_error(w.str());
-}
-
-template <typename T>
-int internal::CharTraits<char>::format_float(
-    char *buffer, std::size_t size, const char *format,
-    unsigned width, int precision, T value) {
-  if (width == 0) {
-    return precision < 0 ?
-        FMT_SNPRINTF(buffer, size, format, value) :
-        FMT_SNPRINTF(buffer, size, format, precision, value);
-  }
-  return precision < 0 ?
-      FMT_SNPRINTF(buffer, size, format, width, value) :
-      FMT_SNPRINTF(buffer, size, format, width, precision, value);
-}
-
-template <typename T>
-int internal::CharTraits<wchar_t>::format_float(
-    wchar_t *buffer, std::size_t size, const wchar_t *format,
-    unsigned width, int precision, T value) {
-  if (width == 0) {
-    return precision < 0 ?
-        FMT_SWPRINTF(buffer, size, format, value) :
-        FMT_SWPRINTF(buffer, size, format, precision, value);
-  }
-  return precision < 0 ?
-      FMT_SWPRINTF(buffer, size, format, width, value) :
-      FMT_SWPRINTF(buffer, size, format, width, precision, value);
-}
-
-template <typename T>
-const char internal::BasicData<T>::DIGITS[] =
-    "0001020304050607080910111213141516171819"
-    "2021222324252627282930313233343536373839"
-    "4041424344454647484950515253545556575859"
-    "6061626364656667686970717273747576777879"
-    "8081828384858687888990919293949596979899";
-
-#define FMT_POWERS_OF_10(factor) \
-  factor * 10, \
-  factor * 100, \
-  factor * 1000, \
-  factor * 10000, \
-  factor * 100000, \
-  factor * 1000000, \
-  factor * 10000000, \
-  factor * 100000000, \
-  factor * 1000000000
-
-template <typename T>
-const uint32_t internal::BasicData<T>::POWERS_OF_10_32[] = {
-  0, FMT_POWERS_OF_10(1)
-};
-
-template <typename T>
-const uint64_t internal::BasicData<T>::POWERS_OF_10_64[] = {
-  0,
-  FMT_POWERS_OF_10(1),
-  FMT_POWERS_OF_10(ULongLong(1000000000)),
-  // Multiply several constants instead of using a single long long constant
-  // to avoid warnings about C++98 not supporting long long.
-  ULongLong(1000000000) * ULongLong(1000000000) * 10
-};
-
-FMT_FUNC void internal::report_unknown_type(char code, const char *type) {
-  (void)type;
-  if (std::isprint(static_cast<unsigned char>(code))) {
-    FMT_THROW(FormatError(
-        format("unknown format code '{}' for {}", code, type)));
-  }
-  FMT_THROW(FormatError(
-      format("unknown format code '\\x{:02x}' for {}",
-        static_cast<unsigned>(code), type)));
-}
-
-#if FMT_USE_WINDOWS_H
-
-FMT_FUNC internal::UTF8ToUTF16::UTF8ToUTF16(StringRef s) {
-  static const char ERROR_MSG[] = "cannot convert string from UTF-8 to UTF-16";
-  if (s.size() > INT_MAX)
-    FMT_THROW(WindowsError(ERROR_INVALID_PARAMETER, ERROR_MSG));
-  int s_size = static_cast<int>(s.size());
-  int length = MultiByteToWideChar(
-      CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), s_size, FMT_NULL, 0);
-  if (length == 0)
-    FMT_THROW(WindowsError(GetLastError(), ERROR_MSG));
-  buffer_.resize(length + 1);
-  length = MultiByteToWideChar(
-    CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), s_size, &buffer_[0], length);
-  if (length == 0)
-    FMT_THROW(WindowsError(GetLastError(), ERROR_MSG));
-  buffer_[length] = 0;
-}
-
-FMT_FUNC internal::UTF16ToUTF8::UTF16ToUTF8(WStringRef s) {
-  if (int error_code = convert(s)) {
-    FMT_THROW(WindowsError(error_code,
-        "cannot convert string from UTF-16 to UTF-8"));
-  }
-}
-
-FMT_FUNC int internal::UTF16ToUTF8::convert(WStringRef s) {
-  if (s.size() > INT_MAX)
-    return ERROR_INVALID_PARAMETER;
-  int s_size = static_cast<int>(s.size());
-  int length = WideCharToMultiByte(
-    CP_UTF8, 0, s.data(), s_size, FMT_NULL, 0, FMT_NULL, FMT_NULL);
-  if (length == 0)
-    return GetLastError();
-  buffer_.resize(length + 1);
-  length = WideCharToMultiByte(
-    CP_UTF8, 0, s.data(), s_size, &buffer_[0], length, FMT_NULL, FMT_NULL);
-  if (length == 0)
-    return GetLastError();
-  buffer_[length] = 0;
-  return 0;
-}
-
-FMT_FUNC void WindowsError::init(
-    int err_code, CStringRef format_str, ArgList args) {
-  error_code_ = err_code;
-  MemoryWriter w;
-  internal::format_windows_error(w, err_code, format(format_str, args));
-  std::runtime_error &base = *this;
-  base = std::runtime_error(w.str());
-}
-
-FMT_FUNC void internal::format_windows_error(
-    Writer &out, int error_code, StringRef message) FMT_NOEXCEPT {
-  FMT_TRY {
-    MemoryBuffer<wchar_t, INLINE_BUFFER_SIZE> buffer;
-    buffer.resize(INLINE_BUFFER_SIZE);
-    for (;;) {
-      wchar_t *system_message = &buffer[0];
-      int result = FormatMessageW(
-        FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
-        FMT_NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
-        system_message, static_cast<uint32_t>(buffer.size()), FMT_NULL);
-      if (result != 0) {
-        UTF16ToUTF8 utf8_message;
-        if (utf8_message.convert(system_message) == ERROR_SUCCESS) {
-          out << message << ": " << utf8_message;
-          return;
-        }
-        break;
-      }
-      if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
-        break;  // Can't get error message, report error code instead.
-      buffer.resize(buffer.size() * 2);
-    }
-  } FMT_CATCH(...) {}
-  fmt::format_error_code(out, error_code, message);  // 'fmt::' is for bcc32.
-}
-
-#endif  // FMT_USE_WINDOWS_H
-
-FMT_FUNC void format_system_error(
-    Writer &out, int error_code, StringRef message) FMT_NOEXCEPT {
-  FMT_TRY {
-    internal::MemoryBuffer<char, internal::INLINE_BUFFER_SIZE> buffer;
-    buffer.resize(internal::INLINE_BUFFER_SIZE);
-    for (;;) {
-      char *system_message = &buffer[0];
-      int result = safe_strerror(error_code, system_message, buffer.size());
-      if (result == 0) {
-        out << message << ": " << system_message;
-        return;
-      }
-      if (result != ERANGE)
-        break;  // Can't get error message, report error code instead.
-      buffer.resize(buffer.size() * 2);
-    }
-  } FMT_CATCH(...) {}
-  fmt::format_error_code(out, error_code, message);  // 'fmt::' is for bcc32.
-}
-
-template <typename Char>
-void internal::ArgMap<Char>::init(const ArgList &args) {
-  if (!map_.empty())
-    return;
-  typedef internal::NamedArg<Char> NamedArg;
-  const NamedArg *named_arg = FMT_NULL;
-  bool use_values =
-      args.type(ArgList::MAX_PACKED_ARGS - 1) == internal::Arg::NONE;
-  if (use_values) {
-    for (unsigned i = 0;/*nothing*/; ++i) {
-      internal::Arg::Type arg_type = args.type(i);
-      switch (arg_type) {
-      case internal::Arg::NONE:
-        return;
-      case internal::Arg::NAMED_ARG:
-        named_arg = static_cast<const NamedArg*>(args.values_[i].pointer);
-        map_.push_back(Pair(named_arg->name, *named_arg));
-        break;
-      default:
-        /*nothing*/;
-      }
-    }
-    return;
-  }
-  for (unsigned i = 0; i != ArgList::MAX_PACKED_ARGS; ++i) {
-    internal::Arg::Type arg_type = args.type(i);
-    if (arg_type == internal::Arg::NAMED_ARG) {
-      named_arg = static_cast<const NamedArg*>(args.args_[i].pointer);
-      map_.push_back(Pair(named_arg->name, *named_arg));
-    }
-  }
-  for (unsigned i = ArgList::MAX_PACKED_ARGS;/*nothing*/; ++i) {
-    switch (args.args_[i].type) {
-    case internal::Arg::NONE:
-      return;
-    case internal::Arg::NAMED_ARG:
-      named_arg = static_cast<const NamedArg*>(args.args_[i].pointer);
-      map_.push_back(Pair(named_arg->name, *named_arg));
-      break;
-    default:
-      /*nothing*/;
-    }
-  }
-}
-
-template <typename Char>
-void internal::FixedBuffer<Char>::grow(std::size_t) {
-  FMT_THROW(std::runtime_error("buffer overflow"));
-}
-
-FMT_FUNC internal::Arg internal::FormatterBase::do_get_arg(
-    unsigned arg_index, const char *&error) {
-  internal::Arg arg = args_[arg_index];
-  switch (arg.type) {
-  case internal::Arg::NONE:
-    error = "argument index out of range";
-    break;
-  case internal::Arg::NAMED_ARG:
-    arg = *static_cast<const internal::Arg*>(arg.pointer);
-    break;
-  default:
-    /*nothing*/;
-  }
-  return arg;
-}
-
-FMT_FUNC void report_system_error(
-    int error_code, fmt::StringRef message) FMT_NOEXCEPT {
-  // 'fmt::' is for bcc32.
-  report_error(format_system_error, error_code, message);
-}
-
-#if FMT_USE_WINDOWS_H
-FMT_FUNC void report_windows_error(
-    int error_code, fmt::StringRef message) FMT_NOEXCEPT {
-  // 'fmt::' is for bcc32.
-  report_error(internal::format_windows_error, error_code, message);
-}
-#endif
-
-FMT_FUNC void print(std::FILE *f, CStringRef format_str, ArgList args) {
-  MemoryWriter w;
-  w.write(format_str, args);
-  std::fwrite(w.data(), 1, w.size(), f);
-}
-
-FMT_FUNC void print(CStringRef format_str, ArgList args) {
-  print(stdout, format_str, args);
-}
-
-FMT_FUNC void print_colored(Color c, CStringRef format, ArgList args) {
-  char escape[] = "\x1b[30m";
-  escape[3] = static_cast<char>('0' + c);
-  std::fputs(escape, stdout);
-  print(format, args);
-  std::fputs(RESET_COLOR, stdout);
-}
-
-#ifndef FMT_HEADER_ONLY
-
-template struct internal::BasicData<void>;
-
-// Explicit instantiations for char.
-
-template void internal::FixedBuffer<char>::grow(std::size_t);
-
-template void internal::ArgMap<char>::init(const ArgList &args);
-
-template FMT_API int internal::CharTraits<char>::format_float(
-    char *buffer, std::size_t size, const char *format,
-    unsigned width, int precision, double value);
-
-template FMT_API int internal::CharTraits<char>::format_float(
-    char *buffer, std::size_t size, const char *format,
-    unsigned width, int precision, long double value);
-
-// Explicit instantiations for wchar_t.
-
-template void internal::FixedBuffer<wchar_t>::grow(std::size_t);
-
-template void internal::ArgMap<wchar_t>::init(const ArgList &args);
-
-template FMT_API int internal::CharTraits<wchar_t>::format_float(
-    wchar_t *buffer, std::size_t size, const wchar_t *format,
-    unsigned width, int precision, double value);
-
-template FMT_API int internal::CharTraits<wchar_t>::format_float(
-    wchar_t *buffer, std::size_t size, const wchar_t *format,
-    unsigned width, int precision, long double value);
-
-#endif  // FMT_HEADER_ONLY
-
-}  // namespace fmt
-
-#ifdef _MSC_VER
-# pragma warning(pop)
-#endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/format.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/format.h
deleted file mode 100644
index 344845024..000000000
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/format.h
+++ /dev/null
@@ -1,4018 +0,0 @@
-/*
- Formatting library for C++
-
- Copyright (c) 2012 - 2016, Victor Zverovich
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice, this
-    list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright notice,
-    this list of conditions and the following disclaimer in the documentation
-    and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FMT_FORMAT_H_
-#define FMT_FORMAT_H_
-
-#define FMT_HEADER_ONLY     // Added by diy for header-only usage
-
-#include <cassert>
-#include <clocale>
-#include <cmath>
-#include <cstdio>
-#include <cstring>
-#include <limits>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <vector>
-#include <utility>  // for std::pair
-
-// The fmt library version in the form major * 10000 + minor * 100 + patch.
-#define FMT_VERSION 40000
-
-#ifdef _SECURE_SCL
-# define FMT_SECURE_SCL _SECURE_SCL
-#else
-# define FMT_SECURE_SCL 0
-#endif
-
-#if FMT_SECURE_SCL
-# include <iterator>
-#endif
-
-#ifdef _MSC_VER
-# define FMT_MSC_VER _MSC_VER
-#else
-# define FMT_MSC_VER 0
-#endif
-
-#if FMT_MSC_VER && FMT_MSC_VER <= 1500
-typedef unsigned __int32 uint32_t;
-typedef unsigned __int64 uint64_t;
-typedef __int64          intmax_t;
-#else
-#include <stdint.h>
-#endif
-
-#if !defined(FMT_HEADER_ONLY) && defined(_WIN32)
-# ifdef FMT_EXPORT
-#  define FMT_API __declspec(dllexport)
-# elif defined(FMT_SHARED)
-#  define FMT_API __declspec(dllimport)
-# endif
-#endif
-#ifndef FMT_API
-# define FMT_API
-#endif
-
-#ifdef __GNUC__
-# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-# define FMT_GCC_EXTENSION __extension__
-# if FMT_GCC_VERSION >= 406
-#  pragma GCC diagnostic push
-// Disable the warning about "long long" which is sometimes reported even
-// when using __extension__.
-#  pragma GCC diagnostic ignored "-Wlong-long"
-// Disable the warning about declaration shadowing because it affects too
-// many valid cases.
-#  pragma GCC diagnostic ignored "-Wshadow"
-// Disable the warning about implicit conversions that may change the sign of
-// an integer; silencing it otherwise would require many explicit casts.
-#  pragma GCC diagnostic ignored "-Wsign-conversion"
-# endif
-# if __cplusplus >= 201103L || defined __GXX_EXPERIMENTAL_CXX0X__
-#  define FMT_HAS_GXX_CXX11 1
-# endif
-#else
-# define FMT_GCC_EXTENSION
-#endif
-
-#if defined(__INTEL_COMPILER)
-# define FMT_ICC_VERSION __INTEL_COMPILER
-#elif defined(__ICL)
-# define FMT_ICC_VERSION __ICL
-#endif
-
-#if defined(__clang__) && !defined(FMT_ICC_VERSION)
-# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
-# pragma clang diagnostic push
-# pragma clang diagnostic ignored "-Wdocumentation-unknown-command"
-# pragma clang diagnostic ignored "-Wpadded"
-#endif
-
-#ifdef __GNUC_LIBSTD__
-# define FMT_GNUC_LIBSTD_VERSION (__GNUC_LIBSTD__ * 100 + __GNUC_LIBSTD_MINOR__)
-#endif
-
-#ifdef __has_feature
-# define FMT_HAS_FEATURE(x) __has_feature(x)
-#else
-# define FMT_HAS_FEATURE(x) 0
-#endif
-
-#ifdef __has_builtin
-# define FMT_HAS_BUILTIN(x) __has_builtin(x)
-#else
-# define FMT_HAS_BUILTIN(x) 0
-#endif
-
-#ifdef __has_cpp_attribute
-# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
-#else
-# define FMT_HAS_CPP_ATTRIBUTE(x) 0
-#endif
-
-#ifndef FMT_USE_VARIADIC_TEMPLATES
-// Variadic templates are available in GCC since version 4.4
-// (http://gcc.gnu.org/projects/cxx0x.html) and in Visual C++
-// since version 2013.
-# define FMT_USE_VARIADIC_TEMPLATES \
-   (FMT_HAS_FEATURE(cxx_variadic_templates) || \
-       (FMT_GCC_VERSION >= 404 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1800)
-#endif
-
-#ifndef FMT_USE_RVALUE_REFERENCES
-// Don't use rvalue references when compiling with clang and an old libstdc++
-// as the latter doesn't provide std::move.
-# if defined(FMT_GNUC_LIBSTD_VERSION) && FMT_GNUC_LIBSTD_VERSION <= 402
-#  define FMT_USE_RVALUE_REFERENCES 0
-# else
-#  define FMT_USE_RVALUE_REFERENCES \
-    (FMT_HAS_FEATURE(cxx_rvalue_references) || \
-        (FMT_GCC_VERSION >= 403 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1600)
-# endif
-#endif
-
-// Check if exceptions are disabled.
-#if defined(__GNUC__) && !defined(__EXCEPTIONS)
-# define FMT_EXCEPTIONS 0
-#endif
-#if FMT_MSC_VER && !_HAS_EXCEPTIONS
-# define FMT_EXCEPTIONS 0
-#endif
-#ifndef FMT_EXCEPTIONS
-# define FMT_EXCEPTIONS 1
-#endif
-
-#ifndef FMT_THROW
-# if FMT_EXCEPTIONS
-#  define FMT_THROW(x) throw x
-# else
-#  define FMT_THROW(x) assert(false)
-# endif
-#endif
-
-// Define FMT_USE_NOEXCEPT to make fmt use noexcept (C++11 feature).
-#ifndef FMT_USE_NOEXCEPT
-# define FMT_USE_NOEXCEPT 0
-#endif
-
-#if FMT_USE_NOEXCEPT || FMT_HAS_FEATURE(cxx_noexcept) || \
-    (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || \
-    FMT_MSC_VER >= 1900
-# define FMT_DETECTED_NOEXCEPT noexcept
-#else
-# define FMT_DETECTED_NOEXCEPT throw()
-#endif
-
-#ifndef FMT_NOEXCEPT
-# if FMT_EXCEPTIONS
-#  define FMT_NOEXCEPT FMT_DETECTED_NOEXCEPT
-# else
-#  define FMT_NOEXCEPT
-# endif
-#endif
-
-// This is needed because GCC still uses throw() in its headers when exceptions
-// are disabled.
-#if FMT_GCC_VERSION
-# define FMT_DTOR_NOEXCEPT FMT_DETECTED_NOEXCEPT
-#else
-# define FMT_DTOR_NOEXCEPT FMT_NOEXCEPT
-#endif
-
-#ifndef FMT_OVERRIDE
-# if (defined(FMT_USE_OVERRIDE) && FMT_USE_OVERRIDE) || FMT_HAS_FEATURE(cxx_override) || \
-   (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || \
-   FMT_MSC_VER >= 1900
-#  define FMT_OVERRIDE override
-# else
-#  define FMT_OVERRIDE
-# endif
-#endif
-
-#ifndef FMT_NULL
-# if FMT_HAS_FEATURE(cxx_nullptr) || \
-   (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || \
-   FMT_MSC_VER >= 1600
-#  define FMT_NULL nullptr
-# else
-#  define FMT_NULL NULL
-# endif
-#endif
-
-// A macro to disallow the copy constructor and operator= functions
-// This should be used in the private: declarations for a class
-#ifndef FMT_USE_DELETED_FUNCTIONS
-# define FMT_USE_DELETED_FUNCTIONS 0
-#endif
-
-#if FMT_USE_DELETED_FUNCTIONS || FMT_HAS_FEATURE(cxx_deleted_functions) || \
-  (FMT_GCC_VERSION >= 404 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1800
-# define FMT_DELETED_OR_UNDEFINED  = delete
-# define FMT_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-    TypeName(const TypeName&) = delete; \
-    TypeName& operator=(const TypeName&) = delete
-#else
-# define FMT_DELETED_OR_UNDEFINED
-# define FMT_DISALLOW_COPY_AND_ASSIGN(TypeName) \
-    TypeName(const TypeName&); \
-    TypeName& operator=(const TypeName&)
-#endif
-
-#ifndef FMT_USE_DEFAULTED_FUNCTIONS
-# define FMT_USE_DEFAULTED_FUNCTIONS 0
-#endif
-
-#ifndef FMT_DEFAULTED_COPY_CTOR
-# if FMT_USE_DEFAULTED_FUNCTIONS || FMT_HAS_FEATURE(cxx_defaulted_functions) || \
-   (FMT_GCC_VERSION >= 404 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1800
-#  define FMT_DEFAULTED_COPY_CTOR(TypeName) \
-    TypeName(const TypeName&) = default;
-# else
-#  define FMT_DEFAULTED_COPY_CTOR(TypeName)
-# endif
-#endif
-
-#ifndef FMT_USE_USER_DEFINED_LITERALS
-// All compilers which support UDLs also support variadic templates. This
-// makes the fmt::literals implementation easier. However, an explicit check
-// for variadic templates is added here just in case.
-// For Intel's compiler both it and the system gcc/msc must support UDLs.
-#if FMT_USE_VARIADIC_TEMPLATES && FMT_USE_RVALUE_REFERENCES && \
-   (FMT_HAS_FEATURE(cxx_user_literals) || \
-     (FMT_GCC_VERSION >= 407 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900) && \
-   (!defined(FMT_ICC_VERSION) || FMT_ICC_VERSION >= 1500)
-#define FMT_USE_USER_DEFINED_LITERALS 1
-#else
-define FMT_USE_USER_DEFINED_LITERALS 0
-#endif
-#endif
-
-
-#ifndef FMT_USE_EXTERN_TEMPLATES
-# define FMT_USE_EXTERN_TEMPLATES \
-    (FMT_CLANG_VERSION >= 209 || (FMT_GCC_VERSION >= 303 && FMT_HAS_GXX_CXX11))
-#endif
-
-#ifdef FMT_HEADER_ONLY
-// If header only do not use extern templates.
-# undef FMT_USE_EXTERN_TEMPLATES
-# define FMT_USE_EXTERN_TEMPLATES 0
-#endif
-
-#ifndef FMT_ASSERT
-# define FMT_ASSERT(condition, message) assert((condition) && message)
-#endif
-
-// __builtin_clz is broken in clang with Microsoft CodeGen:
-// https://github.com/fmtlib/fmt/issues/519
-#ifndef _MSC_VER
-# if FMT_GCC_VERSION >= 400 || FMT_HAS_BUILTIN(__builtin_clz)
-#  define FMT_BUILTIN_CLZ(n) __builtin_clz(n)
-# endif
-
-# if FMT_GCC_VERSION >= 400 || FMT_HAS_BUILTIN(__builtin_clzll)
-#  define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n)
-# endif
-#endif
-
-// Some compilers masquerade as both MSVC and GCC-likes or
-// otherwise support __builtin_clz and __builtin_clzll, so
-// only define FMT_BUILTIN_CLZ using the MSVC intrinsics
-// if the clz and clzll builtins are not available.
-#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED)
-# include <intrin.h>  // _BitScanReverse, _BitScanReverse64
-
-namespace fmt {
-namespace internal {
-# pragma intrinsic(_BitScanReverse)
-inline uint32_t clz(uint32_t x) {
-  unsigned long r = 0;
-  _BitScanReverse(&r, x);
-
-  assert(x != 0);
-  // Static analysis complains about using uninitialized data
-  // "r", but the only way that can happen is if "x" is 0,
-  // which the callers guarantee to not happen.
-# pragma warning(suppress: 6102)
-  return 31 - r;
-}
-# define FMT_BUILTIN_CLZ(n) fmt::internal::clz(n)
-
-# ifdef _WIN64
-#  pragma intrinsic(_BitScanReverse64)
-# endif
-
-inline uint32_t clzll(uint64_t x) {
-  unsigned long r = 0;
-# ifdef _WIN64
-  _BitScanReverse64(&r, x);
-# else
-  // Scan the high 32 bits.
-  if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32)))
-    return 63 - (r + 32);
-
-  // Scan the low 32 bits.
-  _BitScanReverse(&r, static_cast<uint32_t>(x));
-# endif
-
-  assert(x != 0);
-  // Static analysis complains about using uninitialized data
-  // "r", but the only way that can happen is if "x" is 0,
-  // which the callers guarantee to not happen.
-# pragma warning(suppress: 6102)
-  return 63 - r;
-}
-# define FMT_BUILTIN_CLZLL(n) fmt::internal::clzll(n)
-}
-}
-#endif
-
-namespace fmt {
-namespace internal {
-struct DummyInt {
-  int data[2];
-  operator int() const { return 0; }
-};
-typedef std::numeric_limits<fmt::internal::DummyInt> FPUtil;
-
-// Dummy implementations of system functions such as signbit and ecvt called
-// if the latter are not available.
-inline DummyInt signbit(...) { return DummyInt(); }
-inline DummyInt _ecvt_s(...) { return DummyInt(); }
-inline DummyInt isinf(...) { return DummyInt(); }
-inline DummyInt _finite(...) { return DummyInt(); }
-inline DummyInt isnan(...) { return DummyInt(); }
-inline DummyInt _isnan(...) { return DummyInt(); }
-
-// A helper function to suppress bogus "conditional expression is constant"
-// warnings.
-template <typename T>
-inline T const_check(T value) { return value; }
-}
-}  // namespace fmt
-
-namespace std {
-// Standard permits specialization of std::numeric_limits. This specialization
-// is used to resolve ambiguity between isinf and std::isinf in glibc:
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=48891
-// and the same for isnan and signbit.
-template <>
-class numeric_limits<fmt::internal::DummyInt> :
-    public std::numeric_limits<int> {
- public:
-  // Portable version of isinf.
-  template <typename T>
-  static bool isinfinity(T x) {
-    using namespace fmt::internal;
-    // The resolution "priority" is:
-    // isinf macro > std::isinf > ::isinf > fmt::internal::isinf
-    if (const_check(sizeof(isinf(x)) == sizeof(bool) ||
-                    sizeof(isinf(x)) == sizeof(int))) {
-      return isinf(x) != 0;
-    }
-    return !_finite(static_cast<double>(x));
-  }
-
-  // Portable version of isnan.
-  template <typename T>
-  static bool isnotanumber(T x) {
-    using namespace fmt::internal;
-    if (const_check(sizeof(isnan(x)) == sizeof(bool) ||
-                    sizeof(isnan(x)) == sizeof(int))) {
-      return isnan(x) != 0;
-    }
-    return _isnan(static_cast<double>(x)) != 0;
-  }
-
-  // Portable version of signbit.
-  static bool isnegative(double x) {
-    using namespace fmt::internal;
-    if (const_check(sizeof(signbit(x)) == sizeof(bool) ||
-                    sizeof(signbit(x)) == sizeof(int))) {
-      return signbit(x) != 0;
-    }
-    if (x < 0) return true;
-    if (!isnotanumber(x)) return false;
-    int dec = 0, sign = 0;
-    char buffer[2];  // The buffer size must be >= 2 or _ecvt_s will fail.
-    _ecvt_s(buffer, sizeof(buffer), x, 0, &dec, &sign);
-    return sign != 0;
-  }
-};
-}  // namespace std
-
-namespace fmt {
-
-// Fix the warning about long long on older versions of GCC
-// that don't support the diagnostic pragma.
-FMT_GCC_EXTENSION typedef long long LongLong;
-FMT_GCC_EXTENSION typedef unsigned long long ULongLong;
-
-#if FMT_USE_RVALUE_REFERENCES
-using std::move;
-#endif
-
-template <typename Char>
-class BasicWriter;
-
-typedef BasicWriter<char> Writer;
-typedef BasicWriter<wchar_t> WWriter;
-
-template <typename Char>
-class ArgFormatter;
-
-struct FormatSpec;
-
-template <typename Impl, typename Char, typename Spec = fmt::FormatSpec>
-class BasicPrintfArgFormatter;
-
-template <typename CharType,
-          typename ArgFormatter = fmt::ArgFormatter<CharType> >
-class BasicFormatter;
-
-/**
-  \rst
-  A string reference. It can be constructed from a C string or
-  ``std::basic_string``.
-
-  You can use one of the following typedefs for common character types:
-
-  +------------+-------------------------+
-  | Type       | Definition              |
-  +============+=========================+
-  | StringRef  | BasicStringRef<char>    |
-  +------------+-------------------------+
-  | WStringRef | BasicStringRef<wchar_t> |
-  +------------+-------------------------+
-
-  This class is most useful as a parameter type to allow passing
-  different types of strings to a function, for example::
-
-    template <typename... Args>
-    std::string format(StringRef format_str, const Args & ... args);
-
-    format("{}", 42);
-    format(std::string("{}"), 42);
-  \endrst
- */
-template <typename Char>
-class BasicStringRef {
- private:
-  const Char *data_;
-  std::size_t size_;
-
- public:
-  /** Constructs a string reference object from a C string and a size. */
-  BasicStringRef(const Char *s, std::size_t size) : data_(s), size_(size) {}
-
-  /**
-    \rst
-    Constructs a string reference object from a C string computing
-    the size with ``std::char_traits<Char>::length``.
-    \endrst
-   */
-  BasicStringRef(const Char *s)
-    : data_(s), size_(std::char_traits<Char>::length(s)) {}
-
-  /**
-    \rst
-    Constructs a string reference from a ``std::basic_string`` object.
-    \endrst
-   */
-  template <typename Allocator>
-  BasicStringRef(
-      const std::basic_string<Char, std::char_traits<Char>, Allocator> &s)
-  : data_(s.c_str()), size_(s.size()) {}
-
-  /**
-    \rst
-    Converts a string reference to an ``std::string`` object.
-    \endrst
-   */
-  std::basic_string<Char> to_string() const {
-    return std::basic_string<Char>(data_, size_);
-  }
-
-  /** Returns a pointer to the string data. */
-  const Char *data() const { return data_; }
-
-  /** Returns the string size. */
-  std::size_t size() const { return size_; }
-
-  // Lexicographically compare this string reference to other.
-  int compare(BasicStringRef other) const {
-    std::size_t size = size_ < other.size_ ? size_ : other.size_;
-    int result = std::char_traits<Char>::compare(data_, other.data_, size);
-    if (result == 0)
-      result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1);
-    return result;
-  }
-
-  friend bool operator==(BasicStringRef lhs, BasicStringRef rhs) {
-    return lhs.compare(rhs) == 0;
-  }
-  friend bool operator!=(BasicStringRef lhs, BasicStringRef rhs) {
-    return lhs.compare(rhs) != 0;
-  }
-  friend bool operator<(BasicStringRef lhs, BasicStringRef rhs) {
-    return lhs.compare(rhs) < 0;
-  }
-  friend bool operator<=(BasicStringRef lhs, BasicStringRef rhs) {
-    return lhs.compare(rhs) <= 0;
-  }
-  friend bool operator>(BasicStringRef lhs, BasicStringRef rhs) {
-    return lhs.compare(rhs) > 0;
-  }
-  friend bool operator>=(BasicStringRef lhs, BasicStringRef rhs) {
-    return lhs.compare(rhs) >= 0;
-  }
-};
-
-typedef BasicStringRef<char> StringRef;
-typedef BasicStringRef<wchar_t> WStringRef;
-
-/**
-  \rst
-  A reference to a null terminated string. It can be constructed from a C
-  string or ``std::basic_string``.
-
-  You can use one of the following typedefs for common character types:
-
-  +-------------+--------------------------+
-  | Type        | Definition               |
-  +=============+==========================+
-  | CStringRef  | BasicCStringRef<char>    |
-  +-------------+--------------------------+
-  | WCStringRef | BasicCStringRef<wchar_t> |
-  +-------------+--------------------------+
-
-  This class is most useful as a parameter type to allow passing
-  different types of strings to a function, for example::
-
-    template <typename... Args>
-    std::string format(CStringRef format_str, const Args & ... args);
-
-    format("{}", 42);
-    format(std::string("{}"), 42);
-  \endrst
- */
-template <typename Char>
-class BasicCStringRef {
- private:
-  const Char *data_;
-
- public:
-  /** Constructs a string reference object from a C string. */
-  BasicCStringRef(const Char *s) : data_(s) {}
-
-  /**
-    \rst
-    Constructs a string reference from a ``std::basic_string`` object.
-    \endrst
-   */
-  template <typename Allocator>
-  BasicCStringRef(
-      const std::basic_string<Char, std::char_traits<Char>, Allocator> &s)
-  : data_(s.c_str()) {}
-
-  /** Returns the pointer to a C string. */
-  const Char *c_str() const { return data_; }
-};
-
-typedef BasicCStringRef<char> CStringRef;
-typedef BasicCStringRef<wchar_t> WCStringRef;
-
-/** A formatting error such as invalid format string. */
-class FormatError : public std::runtime_error {
- public:
-  explicit FormatError(CStringRef message)
-  : std::runtime_error(message.c_str()) {}
-  FormatError(const FormatError &ferr) : std::runtime_error(ferr) {}
-  FMT_API ~FormatError() FMT_DTOR_NOEXCEPT;
-};
-
-namespace internal {
-
-// MakeUnsigned<T>::Type gives an unsigned type corresponding to integer type T.
-template <typename T>
-struct MakeUnsigned { typedef T Type; };
-
-#define FMT_SPECIALIZE_MAKE_UNSIGNED(T, U) \
-  template <> \
-  struct MakeUnsigned<T> { typedef U Type; }
-
-FMT_SPECIALIZE_MAKE_UNSIGNED(char, unsigned char);
-FMT_SPECIALIZE_MAKE_UNSIGNED(signed char, unsigned char);
-FMT_SPECIALIZE_MAKE_UNSIGNED(short, unsigned short);
-FMT_SPECIALIZE_MAKE_UNSIGNED(int, unsigned);
-FMT_SPECIALIZE_MAKE_UNSIGNED(long, unsigned long);
-FMT_SPECIALIZE_MAKE_UNSIGNED(LongLong, ULongLong);
-
-// Casts nonnegative integer to unsigned.
-template <typename Int>
-inline typename MakeUnsigned<Int>::Type to_unsigned(Int value) {
-  FMT_ASSERT(value >= 0, "negative value");
-  return static_cast<typename MakeUnsigned<Int>::Type>(value);
-}
-
-// The number of characters to store in the MemoryBuffer object itself
-// to avoid dynamic memory allocation.
-enum { INLINE_BUFFER_SIZE = 500 };
-
-#if FMT_SECURE_SCL
-// Use checked iterator to avoid warnings on MSVC.
-template <typename T>
-inline stdext::checked_array_iterator<T*> make_ptr(T *ptr, std::size_t size) {
-  return stdext::checked_array_iterator<T*>(ptr, size);
-}
-#else
-template <typename T>
-inline T *make_ptr(T *ptr, std::size_t) { return ptr; }
-#endif
-}  // namespace internal
-
-/**
-  \rst
-  A buffer supporting a subset of ``std::vector``'s operations.
-  \endrst
- */
-template <typename T>
-class Buffer {
- private:
-  FMT_DISALLOW_COPY_AND_ASSIGN(Buffer);
-
- protected:
-  T *ptr_;
-  std::size_t size_;
-  std::size_t capacity_;
-
-  Buffer(T *ptr = FMT_NULL, std::size_t capacity = 0)
-    : ptr_(ptr), size_(0), capacity_(capacity) {}
-
-  /**
-    \rst
-    Increases the buffer capacity to hold at least *size* elements updating
-    ``ptr_`` and ``capacity_``.
-    \endrst
-   */
-  virtual void grow(std::size_t size) = 0;
-
- public:
-  virtual ~Buffer() {}
-
-  /** Returns the size of this buffer. */
-  std::size_t size() const { return size_; }
-
-  /** Returns the capacity of this buffer. */
-  std::size_t capacity() const { return capacity_; }
-
-  /**
-    Resizes the buffer. If T is a POD type new elements may not be initialized.
-   */
-  void resize(std::size_t new_size) {
-    if (new_size > capacity_)
-      grow(new_size);
-    size_ = new_size;
-  }
-
-  /**
-    \rst
-    Reserves space to store at least *capacity* elements.
-    \endrst
-   */
-  void reserve(std::size_t capacity) {
-    if (capacity > capacity_)
-      grow(capacity);
-  }
-
-  void clear() FMT_NOEXCEPT { size_ = 0; }
-
-  void push_back(const T &value) {
-    if (size_ == capacity_)
-      grow(size_ + 1);
-    ptr_[size_++] = value;
-  }
-
-  /** Appends data to the end of the buffer. */
-  template <typename U>
-  void append(const U *begin, const U *end);
-
-  T &operator[](std::size_t index) { return ptr_[index]; }
-  const T &operator[](std::size_t index) const { return ptr_[index]; }
-};
-
-template <typename T>
-template <typename U>
-void Buffer<T>::append(const U *begin, const U *end) {
-  FMT_ASSERT(end >= begin, "negative value");
-  std::size_t new_size = size_ + (end - begin);
-  if (new_size > capacity_)
-    grow(new_size);
-  std::uninitialized_copy(begin, end,
-                          internal::make_ptr(ptr_, capacity_) + size_);
-  size_ = new_size;
-}
-
-namespace internal {
-
-// A memory buffer for trivially copyable/constructible types with the first
-// SIZE elements stored in the object itself.
-template <typename T, std::size_t SIZE, typename Allocator = std::allocator<T> >
-class MemoryBuffer : private Allocator, public Buffer<T> {
- private:
-  T data_[SIZE];
-
-  // Deallocate memory allocated by the buffer.
-  void deallocate() {
-    if (this->ptr_ != data_) Allocator::deallocate(this->ptr_, this->capacity_);
-  }
-
- protected:
-  void grow(std::size_t size) FMT_OVERRIDE;
-
- public:
-  explicit MemoryBuffer(const Allocator &alloc = Allocator())
-      : Allocator(alloc), Buffer<T>(data_, SIZE) {}
-  ~MemoryBuffer() { deallocate(); }
-
-#if FMT_USE_RVALUE_REFERENCES
- private:
-  // Move data from other to this buffer.
-  void move(MemoryBuffer &other) {
-    Allocator &this_alloc = *this, &other_alloc = other;
-    this_alloc = std::move(other_alloc);
-    this->size_ = other.size_;
-    this->capacity_ = other.capacity_;
-    if (other.ptr_ == other.data_) {
-      this->ptr_ = data_;
-      std::uninitialized_copy(other.data_, other.data_ + this->size_,
-                              make_ptr(data_, this->capacity_));
-    } else {
-      this->ptr_ = other.ptr_;
-      // Set pointer to the inline array so that delete is not called
-      // when deallocating.
-      other.ptr_ = other.data_;
-    }
-  }
-
- public:
-  MemoryBuffer(MemoryBuffer &&other) {
-    move(other);
-  }
-
-  MemoryBuffer &operator=(MemoryBuffer &&other) {
-    assert(this != &other);
-    deallocate();
-    move(other);
-    return *this;
-  }
-#endif
-
-  // Returns a copy of the allocator associated with this buffer.
-  Allocator get_allocator() const { return *this; }
-};
-
-template <typename T, std::size_t SIZE, typename Allocator>
-void MemoryBuffer<T, SIZE, Allocator>::grow(std::size_t size) {
-  std::size_t new_capacity = this->capacity_ + this->capacity_ / 2;
-  if (size > new_capacity)
-      new_capacity = size;
-  T *new_ptr = this->allocate(new_capacity, FMT_NULL);
-  // The following code doesn't throw, so the raw pointer above doesn't leak.
-  std::uninitialized_copy(this->ptr_, this->ptr_ + this->size_,
-                          make_ptr(new_ptr, new_capacity));
-  std::size_t old_capacity = this->capacity_;
-  T *old_ptr = this->ptr_;
-  this->capacity_ = new_capacity;
-  this->ptr_ = new_ptr;
-  // deallocate may throw (at least in principle), but it doesn't matter since
-  // the buffer already uses the new storage and will deallocate it in case
-  // of exception.
-  if (old_ptr != data_)
-    Allocator::deallocate(old_ptr, old_capacity);
-}
-
-// A fixed-size buffer.
-template <typename Char>
-class FixedBuffer : public fmt::Buffer<Char> {
- public:
-  FixedBuffer(Char *array, std::size_t size) : fmt::Buffer<Char>(array, size) {}
-
- protected:
-  FMT_API void grow(std::size_t size) FMT_OVERRIDE;
-};
-
-template <typename Char>
-class BasicCharTraits {
- public:
-#if FMT_SECURE_SCL
-  typedef stdext::checked_array_iterator<Char*> CharPtr;
-#else
-  typedef Char *CharPtr;
-#endif
-  static Char cast(int value) { return static_cast<Char>(value); }
-};
-
-template <typename Char>
-class CharTraits;
-
-template <>
-class CharTraits<char> : public BasicCharTraits<char> {
- private:
-  // Conversion from wchar_t to char is not allowed.
-  static char convert(wchar_t);
-
- public:
-  static char convert(char value) { return value; }
-
-  // Formats a floating-point number.
-  template <typename T>
-  FMT_API static int format_float(char *buffer, std::size_t size,
-      const char *format, unsigned width, int precision, T value);
-};
-
-#if FMT_USE_EXTERN_TEMPLATES
-extern template int CharTraits<char>::format_float<double>
-        (char *buffer, std::size_t size,
-         const char* format, unsigned width, int precision, double value);
-extern template int CharTraits<char>::format_float<long double>
-        (char *buffer, std::size_t size,
-         const char* format, unsigned width, int precision, long double value);
-#endif
-
-template <>
-class CharTraits<wchar_t> : public BasicCharTraits<wchar_t> {
- public:
-  static wchar_t convert(char value) { return value; }
-  static wchar_t convert(wchar_t value) { return value; }
-
-  template <typename T>
-  FMT_API static int format_float(wchar_t *buffer, std::size_t size,
-      const wchar_t *format, unsigned width, int precision, T value);
-};
-
-#if FMT_USE_EXTERN_TEMPLATES
-extern template int CharTraits<wchar_t>::format_float<double>
-        (wchar_t *buffer, std::size_t size,
-         const wchar_t* format, unsigned width, int precision, double value);
-extern template int CharTraits<wchar_t>::format_float<long double>
-        (wchar_t *buffer, std::size_t size,
-         const wchar_t* format, unsigned width, int precision, long double value);
-#endif
-
-// Checks if a number is negative - used to avoid warnings.
-template <bool IsSigned>
-struct SignChecker {
-  template <typename T>
-  static bool is_negative(T value) { return value < 0; }
-};
-
-template <>
-struct SignChecker<false> {
-  template <typename T>
-  static bool is_negative(T) { return false; }
-};
-
-// Returns true if value is negative, false otherwise.
-// Same as (value < 0) but doesn't produce warnings if T is an unsigned type.
-template <typename T>
-inline bool is_negative(T value) {
-  return SignChecker<std::numeric_limits<T>::is_signed>::is_negative(value);
-}
-
-// Selects uint32_t if FitsIn32Bits is true, uint64_t otherwise.
-template <bool FitsIn32Bits>
-struct TypeSelector { typedef uint32_t Type; };
-
-template <>
-struct TypeSelector<false> { typedef uint64_t Type; };
-
-template <typename T>
-struct IntTraits {
-  // Smallest of uint32_t and uint64_t that is large enough to represent
-  // all values of T.
-  typedef typename
-    TypeSelector<std::numeric_limits<T>::digits <= 32>::Type MainType;
-};
-
-FMT_API void report_unknown_type(char code, const char *type);
-
-// Static data is placed in this class template to allow header-only
-// configuration.
-template <typename T = void>
-struct FMT_API BasicData {
-  static const uint32_t POWERS_OF_10_32[];
-  static const uint64_t POWERS_OF_10_64[];
-  static const char DIGITS[];
-};
-
-#if FMT_USE_EXTERN_TEMPLATES
-extern template struct BasicData<void>;
-#endif
-
-typedef BasicData<> Data;
-
-#ifdef FMT_BUILTIN_CLZLL
-// Returns the number of decimal digits in n. Leading zeros are not counted
-// except for n == 0 in which case count_digits returns 1.
-inline unsigned count_digits(uint64_t n) {
-  // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits.
-  int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12;
-  return to_unsigned(t) - (n < Data::POWERS_OF_10_64[t]) + 1;
-}
-#else
-// Fallback version of count_digits used when __builtin_clz is not available.
-inline unsigned count_digits(uint64_t n) {
-  unsigned count = 1;
-  for (;;) {
-    // Integer division is slow so do it for a group of four digits instead
-    // of for every digit. The idea comes from the talk by Alexandrescu
-    // "Three Optimization Tips for C++". See speed-test for a comparison.
-    if (n < 10) return count;
-    if (n < 100) return count + 1;
-    if (n < 1000) return count + 2;
-    if (n < 10000) return count + 3;
-    n /= 10000u;
-    count += 4;
-  }
-}
-#endif
-
-#ifdef FMT_BUILTIN_CLZ
-// Optional version of count_digits for better performance on 32-bit platforms.
-inline unsigned count_digits(uint32_t n) {
-  int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12;
-  return to_unsigned(t) - (n < Data::POWERS_OF_10_32[t]) + 1;
-}
-#endif
-
-// A functor that doesn't add a thousands separator.
-struct NoThousandsSep {
-  template <typename Char>
-  void operator()(Char *) {}
-};
-
-// A functor that adds a thousands separator.
-class ThousandsSep {
- private:
-  fmt::StringRef sep_;
-
-  // Index of a decimal digit with the least significant digit having index 0.
-  unsigned digit_index_;
-
- public:
-  explicit ThousandsSep(fmt::StringRef sep) : sep_(sep), digit_index_(0) {}
-
-  template <typename Char>
-  void operator()(Char *&buffer) {
-    if (++digit_index_ % 3 != 0)
-      return;
-    buffer -= sep_.size();
-    std::uninitialized_copy(sep_.data(), sep_.data() + sep_.size(),
-                            internal::make_ptr(buffer, sep_.size()));
-  }
-};
-
-// Formats a decimal unsigned integer value writing into buffer.
-// thousands_sep is a functor that is called after writing each char to
-// add a thousands separator if necessary.
-template <typename UInt, typename Char, typename ThousandsSep>
-inline void format_decimal(Char *buffer, UInt value, unsigned num_digits,
-                           ThousandsSep thousands_sep) {
-  buffer += num_digits;
-  while (value >= 100) {
-    // Integer division is slow so do it for a group of two digits instead
-    // of for every digit. The idea comes from the talk by Alexandrescu
-    // "Three Optimization Tips for C++". See speed-test for a comparison.
-    unsigned index = static_cast<unsigned>((value % 100) * 2);
-    value /= 100;
-    *--buffer = Data::DIGITS[index + 1];
-    thousands_sep(buffer);
-    *--buffer = Data::DIGITS[index];
-    thousands_sep(buffer);
-  }
-  if (value < 10) {
-    *--buffer = static_cast<char>('0' + value);
-    return;
-  }
-  unsigned index = static_cast<unsigned>(value * 2);
-  *--buffer = Data::DIGITS[index + 1];
-  thousands_sep(buffer);
-  *--buffer = Data::DIGITS[index];
-}
-
-template <typename UInt, typename Char>
-inline void format_decimal(Char *buffer, UInt value, unsigned num_digits) {
-  format_decimal(buffer, value, num_digits, NoThousandsSep());
-  return;
-}
-
-#ifndef _WIN32
-# define FMT_USE_WINDOWS_H 0
-#elif !defined(FMT_USE_WINDOWS_H)
-# define FMT_USE_WINDOWS_H 1
-#endif
-
-// Define FMT_USE_WINDOWS_H to 0 to disable use of windows.h.
-// All the functionality that relies on it will be disabled too.
-#if FMT_USE_WINDOWS_H
-// A converter from UTF-8 to UTF-16.
-// It is only provided for Windows since other systems support UTF-8 natively.
-class UTF8ToUTF16 {
- private:
-  MemoryBuffer<wchar_t, INLINE_BUFFER_SIZE> buffer_;
-
- public:
-  FMT_API explicit UTF8ToUTF16(StringRef s);
-  operator WStringRef() const { return WStringRef(&buffer_[0], size()); }
-  size_t size() const { return buffer_.size() - 1; }
-  const wchar_t *c_str() const { return &buffer_[0]; }
-  std::wstring str() const { return std::wstring(&buffer_[0], size()); }
-};
-
-// A converter from UTF-16 to UTF-8.
-// It is only provided for Windows since other systems support UTF-8 natively.
-class UTF16ToUTF8 {
- private:
-  MemoryBuffer<char, INLINE_BUFFER_SIZE> buffer_;
-
- public:
-  UTF16ToUTF8() {}
-  FMT_API explicit UTF16ToUTF8(WStringRef s);
-  operator StringRef() const { return StringRef(&buffer_[0], size()); }
-  size_t size() const { return buffer_.size() - 1; }
-  const char *c_str() const { return &buffer_[0]; }
-  std::string str() const { return std::string(&buffer_[0], size()); }
-
-  // Performs conversion returning a system error code instead of
-  // throwing exception on conversion error. This method may still throw
-  // in case of memory allocation error.
-  FMT_API int convert(WStringRef s);
-};
-
-FMT_API void format_windows_error(fmt::Writer &out, int error_code,
-                                  fmt::StringRef message) FMT_NOEXCEPT;
-#endif
-
-// A formatting argument value.
-struct Value {
-  template <typename Char>
-  struct StringValue {
-    const Char *value;
-    std::size_t size;
-  };
-
-  typedef void (*FormatFunc)(
-      void *formatter, const void *arg, void *format_str_ptr);
-
-  struct CustomValue {
-    const void *value;
-    FormatFunc format;
-  };
-
-  union {
-    int int_value;
-    unsigned uint_value;
-    LongLong long_long_value;
-    ULongLong ulong_long_value;
-    double double_value;
-    long double long_double_value;
-    const void *pointer;
-    StringValue<char> string;
-    StringValue<signed char> sstring;
-    StringValue<unsigned char> ustring;
-    StringValue<wchar_t> wstring;
-    CustomValue custom;
-  };
-
-  enum Type {
-    NONE, NAMED_ARG,
-    // Integer types should go first,
-    INT, UINT, LONG_LONG, ULONG_LONG, BOOL, CHAR, LAST_INTEGER_TYPE = CHAR,
-    // followed by floating-point types.
-    DOUBLE, LONG_DOUBLE, LAST_NUMERIC_TYPE = LONG_DOUBLE,
-    CSTRING, STRING, WSTRING, POINTER, CUSTOM
-  };
-};
-
-// A formatting argument. It is a trivially copyable/constructible type to
-// allow storage in internal::MemoryBuffer.
-struct Arg : Value {
-  Type type;
-};
-
-template <typename Char>
-struct NamedArg;
-template <typename Char, typename T>
-struct NamedArgWithType;
-
-template <typename T = void>
-struct Null {};
-
-// A helper class template to enable or disable overloads taking wide
-// characters and strings in MakeValue.
-template <typename T, typename Char>
-struct WCharHelper {
-  typedef Null<T> Supported;
-  typedef T Unsupported;
-};
-
-template <typename T>
-struct WCharHelper<T, wchar_t> {
-  typedef T Supported;
-  typedef Null<T> Unsupported;
-};
-
-typedef char Yes[1];
-typedef char No[2];
-
-template <typename T>
-T &get();
-
-// These are non-members to workaround an overload resolution bug in bcc32.
-Yes &convert(fmt::ULongLong);
-No &convert(...);
-
-template<typename T, bool ENABLE_CONVERSION>
-struct ConvertToIntImpl {
-  enum { value = ENABLE_CONVERSION };
-};
-
-template<typename T, bool ENABLE_CONVERSION>
-struct ConvertToIntImpl2 {
-  enum { value = false };
-};
-
-template<typename T>
-struct ConvertToIntImpl2<T, true> {
-  enum {
-    // Don't convert numeric types.
-    value = ConvertToIntImpl<T, !std::numeric_limits<T>::is_specialized>::value
-  };
-};
-
-template<typename T>
-struct ConvertToInt {
-  enum {
-    enable_conversion = sizeof(fmt::internal::convert(get<T>())) == sizeof(Yes)
-  };
-  enum { value = ConvertToIntImpl2<T, enable_conversion>::value };
-};
-
-#define FMT_DISABLE_CONVERSION_TO_INT(Type) \
-  template <> \
-  struct ConvertToInt<Type> {  enum { value = 0 }; }
-
-// Silence warnings about converting float to int.
-FMT_DISABLE_CONVERSION_TO_INT(float);
-FMT_DISABLE_CONVERSION_TO_INT(double);
-FMT_DISABLE_CONVERSION_TO_INT(long double);
-
-template<bool B, class T = void>
-struct EnableIf {};
-
-template<class T>
-struct EnableIf<true, T> { typedef T type; };
-
-template<bool B, class T, class F>
-struct Conditional { typedef T type; };
-
-template<class T, class F>
-struct Conditional<false, T, F> { typedef F type; };
-
-// For bcc32 which doesn't understand ! in template arguments.
-template <bool>
-struct Not { enum { value = 0 }; };
-
-template <>
-struct Not<false> { enum { value = 1 }; };
-
-template <typename T>
-struct FalseType { enum { value = 0 }; };
-
-template <typename T, T> struct LConvCheck {
-  LConvCheck(int) {}
-};
-
-// Returns the thousands separator for the current locale.
-// We check if ``lconv`` contains ``thousands_sep`` because on Android
-// ``lconv`` is stubbed as an empty struct.
-template <typename LConv>
-inline StringRef thousands_sep(
-    LConv *lc, LConvCheck<char *LConv::*, &LConv::thousands_sep> = 0) {
-  return lc->thousands_sep;
-}
-
-inline fmt::StringRef thousands_sep(...) { return ""; }
-
-#define FMT_CONCAT(a, b) a##b
-
-#if FMT_GCC_VERSION >= 303
-# define FMT_UNUSED __attribute__((unused))
-#else
-# define FMT_UNUSED
-#endif
-
-#ifndef FMT_USE_STATIC_ASSERT
-# define FMT_USE_STATIC_ASSERT 0
-#endif
-
-#if FMT_USE_STATIC_ASSERT || FMT_HAS_FEATURE(cxx_static_assert) || \
-  (FMT_GCC_VERSION >= 403 && FMT_HAS_GXX_CXX11) || _MSC_VER >= 1600
-# define FMT_STATIC_ASSERT(cond, message) static_assert(cond, message)
-#else
-# define FMT_CONCAT_(a, b) FMT_CONCAT(a, b)
-# define FMT_STATIC_ASSERT(cond, message) \
-  typedef int FMT_CONCAT_(Assert, __LINE__)[(cond) ? 1 : -1] FMT_UNUSED
-#endif
-
-template <typename Formatter, typename Char, typename T>
-void format_arg(Formatter &, const Char *, const T &) {
-  FMT_STATIC_ASSERT(FalseType<T>::value,
-                    "Cannot format argument. To enable the use of ostream "
-                    "operator<< include fmt/ostream.h. Otherwise provide "
-                    "an overload of format_arg.");
-}
-
-// Makes an Arg object from any type.
-template <typename Formatter>
-class MakeValue : public Arg {
- public:
-  typedef typename Formatter::Char Char;
-
- private:
-  // The following two methods are private to disallow formatting of
-  // arbitrary pointers. If you want to output a pointer cast it to
-  // "void *" or "const void *". In particular, this forbids formatting
-  // of "[const] volatile char *" which is printed as bool by iostreams.
-  // Do not implement!
-  template <typename T>
-  MakeValue(const T *value);
-  template <typename T>
-  MakeValue(T *value);
-
-  // The following methods are private to disallow formatting of wide
-  // characters and strings into narrow strings as in
-  //   fmt::format("{}", L"test");
-  // To fix this, use a wide format string: fmt::format(L"{}", L"test").
-#if !FMT_MSC_VER || defined(_NATIVE_WCHAR_T_DEFINED)
-  MakeValue(typename WCharHelper<wchar_t, Char>::Unsupported);
-#endif
-  MakeValue(typename WCharHelper<wchar_t *, Char>::Unsupported);
-  MakeValue(typename WCharHelper<const wchar_t *, Char>::Unsupported);
-  MakeValue(typename WCharHelper<const std::wstring &, Char>::Unsupported);
-  MakeValue(typename WCharHelper<WStringRef, Char>::Unsupported);
-
-  void set_string(StringRef str) {
-    string.value = str.data();
-    string.size = str.size();
-  }
-
-  void set_string(WStringRef str) {
-    wstring.value = str.data();
-    wstring.size = str.size();
-  }
-
-  // Formats an argument of a custom type, such as a user-defined class.
-  template <typename T>
-  static void format_custom_arg(
-      void *formatter, const void *arg, void *format_str_ptr) {
-    format_arg(*static_cast<Formatter*>(formatter),
-               *static_cast<const Char**>(format_str_ptr),
-               *static_cast<const T*>(arg));
-  }
-
- public:
-  MakeValue() {}
-
-#define FMT_MAKE_VALUE_(Type, field, TYPE, rhs) \
-  MakeValue(Type value) { field = rhs; } \
-  static uint64_t type(Type) { return Arg::TYPE; }
-
-#define FMT_MAKE_VALUE(Type, field, TYPE) \
-  FMT_MAKE_VALUE_(Type, field, TYPE, value)
-
-  FMT_MAKE_VALUE(bool, int_value, BOOL)
-  FMT_MAKE_VALUE(short, int_value, INT)
-  FMT_MAKE_VALUE(unsigned short, uint_value, UINT)
-  FMT_MAKE_VALUE(int, int_value, INT)
-  FMT_MAKE_VALUE(unsigned, uint_value, UINT)
-
-  MakeValue(long value) {
-    // To minimize the number of types we need to deal with, long is
-    // translated either to int or to long long depending on its size.
-    if (const_check(sizeof(long) == sizeof(int)))
-      int_value = static_cast<int>(value);
-    else
-      long_long_value = value;
-  }
-  static uint64_t type(long) {
-    return sizeof(long) == sizeof(int) ? Arg::INT : Arg::LONG_LONG;
-  }
-
-  MakeValue(unsigned long value) {
-    if (const_check(sizeof(unsigned long) == sizeof(unsigned)))
-      uint_value = static_cast<unsigned>(value);
-    else
-      ulong_long_value = value;
-  }
-  static uint64_t type(unsigned long) {
-    return sizeof(unsigned long) == sizeof(unsigned) ?
-          Arg::UINT : Arg::ULONG_LONG;
-  }
-
-  FMT_MAKE_VALUE(LongLong, long_long_value, LONG_LONG)
-  FMT_MAKE_VALUE(ULongLong, ulong_long_value, ULONG_LONG)
-  FMT_MAKE_VALUE(float, double_value, DOUBLE)
-  FMT_MAKE_VALUE(double, double_value, DOUBLE)
-  FMT_MAKE_VALUE(long double, long_double_value, LONG_DOUBLE)
-  FMT_MAKE_VALUE(signed char, int_value, INT)
-  FMT_MAKE_VALUE(unsigned char, uint_value, UINT)
-  FMT_MAKE_VALUE(char, int_value, CHAR)
-
-#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED)
-  MakeValue(typename WCharHelper<wchar_t, Char>::Supported value) {
-    int_value = value;
-  }
-  static uint64_t type(wchar_t) { return Arg::CHAR; }
-#endif
-
-#define FMT_MAKE_STR_VALUE(Type, TYPE) \
-  MakeValue(Type value) { set_string(value); } \
-  static uint64_t type(Type) { return Arg::TYPE; }
-
-  FMT_MAKE_VALUE(char *, string.value, CSTRING)
-  FMT_MAKE_VALUE(const char *, string.value, CSTRING)
-  FMT_MAKE_VALUE(signed char *, sstring.value, CSTRING)
-  FMT_MAKE_VALUE(const signed char *, sstring.value, CSTRING)
-  FMT_MAKE_VALUE(unsigned char *, ustring.value, CSTRING)
-  FMT_MAKE_VALUE(const unsigned char *, ustring.value, CSTRING)
-  FMT_MAKE_STR_VALUE(const std::string &, STRING)
-  FMT_MAKE_STR_VALUE(StringRef, STRING)
-  FMT_MAKE_VALUE_(CStringRef, string.value, CSTRING, value.c_str())
-
-#define FMT_MAKE_WSTR_VALUE(Type, TYPE) \
-  MakeValue(typename WCharHelper<Type, Char>::Supported value) { \
-    set_string(value); \
-  } \
-  static uint64_t type(Type) { return Arg::TYPE; }
-
-  FMT_MAKE_WSTR_VALUE(wchar_t *, WSTRING)
-  FMT_MAKE_WSTR_VALUE(const wchar_t *, WSTRING)
-  FMT_MAKE_WSTR_VALUE(const std::wstring &, WSTRING)
-  FMT_MAKE_WSTR_VALUE(WStringRef, WSTRING)
-
-  FMT_MAKE_VALUE(void *, pointer, POINTER)
-  FMT_MAKE_VALUE(const void *, pointer, POINTER)
-
-  template <typename T>
-  MakeValue(const T &value,
-            typename EnableIf<Not<
-              ConvertToInt<T>::value>::value, int>::type = 0) {
-    custom.value = &value;
-    custom.format = &format_custom_arg<T>;
-  }
-
-  template <typename T>
-  static typename EnableIf<Not<ConvertToInt<T>::value>::value, uint64_t>::type
-      type(const T &) {
-    return Arg::CUSTOM;
-  }
-
-  // Additional template param `Char_` is needed here because make_type always
-  // uses char.
-  template <typename Char_>
-  MakeValue(const NamedArg<Char_> &value) { pointer = &value; }
-  template <typename Char_, typename T>
-  MakeValue(const NamedArgWithType<Char_, T> &value) { pointer = &value; }
-
-  template <typename Char_>
-  static uint64_t type(const NamedArg<Char_> &) { return Arg::NAMED_ARG; }
-  template <typename Char_, typename T>
-  static uint64_t type(const NamedArgWithType<Char_, T> &) { return Arg::NAMED_ARG; }
-};
-
-template <typename Formatter>
-class MakeArg : public Arg {
-public:
-  MakeArg() {
-    type = Arg::NONE;
-  }
-
-  template <typename T>
-  MakeArg(const T &value)
-  : Arg(MakeValue<Formatter>(value)) {
-    type = static_cast<Arg::Type>(MakeValue<Formatter>::type(value));
-  }
-};
-
-template <typename Char>
-struct NamedArg : Arg {
-  BasicStringRef<Char> name;
-
-  template <typename T>
-  NamedArg(BasicStringRef<Char> argname, const T &value)
-  : Arg(MakeArg< BasicFormatter<Char> >(value)), name(argname) {}
-};
-
-template <typename Char, typename T>
-struct NamedArgWithType : NamedArg<Char> {
-  NamedArgWithType(BasicStringRef<Char> argname, const T &value)
-  : NamedArg<Char>(argname, value) {}
-};
-
-class RuntimeError : public std::runtime_error {
- protected:
-  RuntimeError() : std::runtime_error("") {}
-  RuntimeError(const RuntimeError &rerr) : std::runtime_error(rerr) {}
-  FMT_API ~RuntimeError() FMT_DTOR_NOEXCEPT;
-};
-
-template <typename Char>
-class ArgMap;
-}  // namespace internal
-
-/** An argument list. */
-class ArgList {
- private:
-  // To reduce compiled code size per formatting function call, types of first
-  // MAX_PACKED_ARGS arguments are passed in the types_ field.
-  uint64_t types_;
-  union {
-    // If the number of arguments is less than MAX_PACKED_ARGS, the argument
-    // values are stored in values_, otherwise they are stored in args_.
-    // This is done to reduce compiled code size as storing larger objects
-    // may require more code (at least on x86-64) even if the same amount of
-    // data is actually copied to stack. It saves ~10% on the bloat test.
-    const internal::Value *values_;
-    const internal::Arg *args_;
-  };
-
-  internal::Arg::Type type(unsigned index) const {
-    return type(types_, index);
-  }
-
-  template <typename Char>
-  friend class internal::ArgMap;
-
- public:
-  // Maximum number of arguments with packed types.
-  enum { MAX_PACKED_ARGS = 16 };
-
-  ArgList() : types_(0) {}
-
-  ArgList(ULongLong types, const internal::Value *values)
-  : types_(types), values_(values) {}
-  ArgList(ULongLong types, const internal::Arg *args)
-  : types_(types), args_(args) {}
-
-  uint64_t types() const { return types_; }
-
-  /** Returns the argument at specified index. */
-  internal::Arg operator[](unsigned index) const {
-    using internal::Arg;
-    Arg arg;
-    bool use_values = type(MAX_PACKED_ARGS - 1) == Arg::NONE;
-    if (index < MAX_PACKED_ARGS) {
-      Arg::Type arg_type = type(index);
-      internal::Value &val = arg;
-      if (arg_type != Arg::NONE)
-        val = use_values ? values_[index] : args_[index];
-      arg.type = arg_type;
-      return arg;
-    }
-    if (use_values) {
-      // The index is greater than the number of arguments that can be stored
-      // in values, so return a "none" argument.
-      arg.type = Arg::NONE;
-      return arg;
-    }
-    for (unsigned i = MAX_PACKED_ARGS; i <= index; ++i) {
-      if (args_[i].type == Arg::NONE)
-        return args_[i];
-    }
-    return args_[index];
-  }
-
-  static internal::Arg::Type type(uint64_t types, unsigned index) {
-    unsigned shift = index * 4;
-    uint64_t mask = 0xf;
-    return static_cast<internal::Arg::Type>(
-          (types & (mask << shift)) >> shift);
-  }
-};
-
-#define FMT_DISPATCH(call) static_cast<Impl*>(this)->call
-
-/**
-  \rst
-  An argument visitor based on the `curiously recurring template pattern
-  <http://en.wikipedia.org/wiki/Curiously_recurring_template_pattern>`_.
-
-  To use `~fmt::ArgVisitor` define a subclass that implements some or all of the
-  visit methods with the same signatures as the methods in `~fmt::ArgVisitor`,
-  for example, `~fmt::ArgVisitor::visit_int()`.
-  Pass the subclass as the *Impl* template parameter. Then calling
-  `~fmt::ArgVisitor::visit` for some argument will dispatch to a visit method
-  specific to the argument type. For example, if the argument type is
-  ``double`` then the `~fmt::ArgVisitor::visit_double()` method of a subclass
-  will be called. If the subclass doesn't contain a method with this signature,
-  then a corresponding method of `~fmt::ArgVisitor` will be called.
-
-  **Example**::
-
-    class MyArgVisitor : public fmt::ArgVisitor<MyArgVisitor, void> {
-     public:
-      void visit_int(int value) { fmt::print("{}", value); }
-      void visit_double(double value) { fmt::print("{}", value ); }
-    };
-  \endrst
- */
-template <typename Impl, typename Result>
-class ArgVisitor {
- private:
-  typedef internal::Arg Arg;
-
- public:
-  void report_unhandled_arg() {}
-
-  Result visit_unhandled_arg() {
-    FMT_DISPATCH(report_unhandled_arg());
-    return Result();
-  }
-
-  /** Visits an ``int`` argument. **/
-  Result visit_int(int value) {
-    return FMT_DISPATCH(visit_any_int(value));
-  }
-
-  /** Visits a ``long long`` argument. **/
-  Result visit_long_long(LongLong value) {
-    return FMT_DISPATCH(visit_any_int(value));
-  }
-
-  /** Visits an ``unsigned`` argument. **/
-  Result visit_uint(unsigned value) {
-    return FMT_DISPATCH(visit_any_int(value));
-  }
-
-  /** Visits an ``unsigned long long`` argument. **/
-  Result visit_ulong_long(ULongLong value) {
-    return FMT_DISPATCH(visit_any_int(value));
-  }
-
-  /** Visits a ``bool`` argument. **/
-  Result visit_bool(bool value) {
-    return FMT_DISPATCH(visit_any_int(value));
-  }
-
-  /** Visits a ``char`` or ``wchar_t`` argument. **/
-  Result visit_char(int value) {
-    return FMT_DISPATCH(visit_any_int(value));
-  }
-
-  /** Visits an argument of any integral type. **/
-  template <typename T>
-  Result visit_any_int(T) {
-    return FMT_DISPATCH(visit_unhandled_arg());
-  }
-
-  /** Visits a ``double`` argument. **/
-  Result visit_double(double value) {
-    return FMT_DISPATCH(visit_any_double(value));
-  }
-
-  /** Visits a ``long double`` argument. **/
-  Result visit_long_double(long double value) {
-    return FMT_DISPATCH(visit_any_double(value));
-  }
-
-  /** Visits a ``double`` or ``long double`` argument. **/
-  template <typename T>
-  Result visit_any_double(T) {
-    return FMT_DISPATCH(visit_unhandled_arg());
-  }
-
-  /** Visits a null-terminated C string (``const char *``) argument. **/
-  Result visit_cstring(const char *) {
-    return FMT_DISPATCH(visit_unhandled_arg());
-  }
-
-  /** Visits a string argument. **/
-  Result visit_string(Arg::StringValue<char>) {
-    return FMT_DISPATCH(visit_unhandled_arg());
-  }
-
-  /** Visits a wide string argument. **/
-  Result visit_wstring(Arg::StringValue<wchar_t>) {
-    return FMT_DISPATCH(visit_unhandled_arg());
-  }
-
-  /** Visits a pointer argument. **/
-  Result visit_pointer(const void *) {
-    return FMT_DISPATCH(visit_unhandled_arg());
-  }
-
-  /** Visits an argument of a custom (user-defined) type. **/
-  Result visit_custom(Arg::CustomValue) {
-    return FMT_DISPATCH(visit_unhandled_arg());
-  }
-
-  /**
-    \rst
-    Visits an argument dispatching to the appropriate visit method based on
-    the argument type. For example, if the argument type is ``double`` then
-    the `~fmt::ArgVisitor::visit_double()` method of the *Impl* class will be
-    called.
-    \endrst
-   */
-  Result visit(const Arg &arg) {
-    switch (arg.type) {
-    case Arg::NONE:
-    case Arg::NAMED_ARG:
-      FMT_ASSERT(false, "invalid argument type");
-      break;
-    case Arg::INT:
-      return FMT_DISPATCH(visit_int(arg.int_value));
-    case Arg::UINT:
-      return FMT_DISPATCH(visit_uint(arg.uint_value));
-    case Arg::LONG_LONG:
-      return FMT_DISPATCH(visit_long_long(arg.long_long_value));
-    case Arg::ULONG_LONG:
-      return FMT_DISPATCH(visit_ulong_long(arg.ulong_long_value));
-    case Arg::BOOL:
-      return FMT_DISPATCH(visit_bool(arg.int_value != 0));
-    case Arg::CHAR:
-      return FMT_DISPATCH(visit_char(arg.int_value));
-    case Arg::DOUBLE:
-      return FMT_DISPATCH(visit_double(arg.double_value));
-    case Arg::LONG_DOUBLE:
-      return FMT_DISPATCH(visit_long_double(arg.long_double_value));
-    case Arg::CSTRING:
-      return FMT_DISPATCH(visit_cstring(arg.string.value));
-    case Arg::STRING:
-      return FMT_DISPATCH(visit_string(arg.string));
-    case Arg::WSTRING:
-      return FMT_DISPATCH(visit_wstring(arg.wstring));
-    case Arg::POINTER:
-      return FMT_DISPATCH(visit_pointer(arg.pointer));
-    case Arg::CUSTOM:
-      return FMT_DISPATCH(visit_custom(arg.custom));
-    }
-    return Result();
-  }
-};
-
-enum Alignment {
-  ALIGN_DEFAULT, ALIGN_LEFT, ALIGN_RIGHT, ALIGN_CENTER, ALIGN_NUMERIC
-};
-
-// Flags.
-enum {
-  SIGN_FLAG = 1, PLUS_FLAG = 2, MINUS_FLAG = 4, HASH_FLAG = 8,
-  CHAR_FLAG = 0x10  // Argument has char type - used in error reporting.
-};
-
-// An empty format specifier.
-struct EmptySpec {};
-
-// A type specifier.
-template <char TYPE>
-struct TypeSpec : EmptySpec {
-  Alignment align() const { return ALIGN_DEFAULT; }
-  unsigned width() const { return 0; }
-  int precision() const { return -1; }
-  bool flag(unsigned) const { return false; }
-  char type() const { return TYPE; }
-  char type_prefix() const { return TYPE; }
-  char fill() const { return ' '; }
-};
-
-// A width specifier.
-struct WidthSpec {
-  unsigned width_;
-  // Fill is always wchar_t and cast to char if necessary to avoid having
-  // two specialization of WidthSpec and its subclasses.
-  wchar_t fill_;
-
-  WidthSpec(unsigned width, wchar_t fill) : width_(width), fill_(fill) {}
-
-  unsigned width() const { return width_; }
-  wchar_t fill() const { return fill_; }
-};
-
-// An alignment specifier.
-struct AlignSpec : WidthSpec {
-  Alignment align_;
-
-  AlignSpec(unsigned width, wchar_t fill, Alignment align = ALIGN_DEFAULT)
-  : WidthSpec(width, fill), align_(align) {}
-
-  Alignment align() const { return align_; }
-
-  int precision() const { return -1; }
-};
-
-// An alignment and type specifier.
-template <char TYPE>
-struct AlignTypeSpec : AlignSpec {
-  AlignTypeSpec(unsigned width, wchar_t fill) : AlignSpec(width, fill) {}
-
-  bool flag(unsigned) const { return false; }
-  char type() const { return TYPE; }
-  char type_prefix() const { return TYPE; }
-};
-
-// A full format specifier.
-struct FormatSpec : AlignSpec {
-  unsigned flags_;
-  int precision_;
-  char type_;
-
-  FormatSpec(
-    unsigned width = 0, char type = 0, wchar_t fill = ' ')
-  : AlignSpec(width, fill), flags_(0), precision_(-1), type_(type) {}
-
-  bool flag(unsigned f) const { return (flags_ & f) != 0; }
-  int precision() const { return precision_; }
-  char type() const { return type_; }
-  char type_prefix() const { return type_; }
-};
-
-// An integer format specifier.
-template <typename T, typename SpecT = TypeSpec<0>, typename Char = char>
-class IntFormatSpec : public SpecT {
- private:
-  T value_;
-
- public:
-  IntFormatSpec(T val, const SpecT &spec = SpecT())
-  : SpecT(spec), value_(val) {}
-
-  T value() const { return value_; }
-};
-
-// A string format specifier.
-template <typename Char>
-class StrFormatSpec : public AlignSpec {
- private:
-  const Char *str_;
-
- public:
-  template <typename FillChar>
-  StrFormatSpec(const Char *str, unsigned width, FillChar fill)
-  : AlignSpec(width, fill), str_(str) {
-    internal::CharTraits<Char>::convert(FillChar());
-  }
-
-  const Char *str() const { return str_; }
-};
-
-/**
-  Returns an integer format specifier to format the value in base 2.
- */
-IntFormatSpec<int, TypeSpec<'b'> > bin(int value);
-
-/**
-  Returns an integer format specifier to format the value in base 8.
- */
-IntFormatSpec<int, TypeSpec<'o'> > oct(int value);
-
-/**
-  Returns an integer format specifier to format the value in base 16 using
-  lower-case letters for the digits above 9.
- */
-IntFormatSpec<int, TypeSpec<'x'> > hex(int value);
-
-/**
-  Returns an integer formatter format specifier to format in base 16 using
-  upper-case letters for the digits above 9.
- */
-IntFormatSpec<int, TypeSpec<'X'> > hexu(int value);
-
-/**
-  \rst
-  Returns an integer format specifier to pad the formatted argument with the
-  fill character to the specified width using the default (right) numeric
-  alignment.
-
-  **Example**::
-
-    MemoryWriter out;
-    out << pad(hex(0xcafe), 8, '0');
-    // out.str() == "0000cafe"
-
-  \endrst
- */
-template <char TYPE_CODE, typename Char>
-IntFormatSpec<int, AlignTypeSpec<TYPE_CODE>, Char> pad(
-    int value, unsigned width, Char fill = ' ');
-
-#define FMT_DEFINE_INT_FORMATTERS(TYPE) \
-inline IntFormatSpec<TYPE, TypeSpec<'b'> > bin(TYPE value) { \
-  return IntFormatSpec<TYPE, TypeSpec<'b'> >(value, TypeSpec<'b'>()); \
-} \
- \
-inline IntFormatSpec<TYPE, TypeSpec<'o'> > oct(TYPE value) { \
-  return IntFormatSpec<TYPE, TypeSpec<'o'> >(value, TypeSpec<'o'>()); \
-} \
- \
-inline IntFormatSpec<TYPE, TypeSpec<'x'> > hex(TYPE value) { \
-  return IntFormatSpec<TYPE, TypeSpec<'x'> >(value, TypeSpec<'x'>()); \
-} \
- \
-inline IntFormatSpec<TYPE, TypeSpec<'X'> > hexu(TYPE value) { \
-  return IntFormatSpec<TYPE, TypeSpec<'X'> >(value, TypeSpec<'X'>()); \
-} \
- \
-template <char TYPE_CODE> \
-inline IntFormatSpec<TYPE, AlignTypeSpec<TYPE_CODE> > pad( \
-    IntFormatSpec<TYPE, TypeSpec<TYPE_CODE> > f, unsigned width) { \
-  return IntFormatSpec<TYPE, AlignTypeSpec<TYPE_CODE> >( \
-      f.value(), AlignTypeSpec<TYPE_CODE>(width, ' ')); \
-} \
- \
-/* For compatibility with older compilers we provide two overloads for pad, */ \
-/* one that takes a fill character and one that doesn't. In the future this */ \
-/* can be replaced with one overload making the template argument Char      */ \
-/* default to char (C++11). */ \
-template <char TYPE_CODE, typename Char> \
-inline IntFormatSpec<TYPE, AlignTypeSpec<TYPE_CODE>, Char> pad( \
-    IntFormatSpec<TYPE, TypeSpec<TYPE_CODE>, Char> f, \
-    unsigned width, Char fill) { \
-  return IntFormatSpec<TYPE, AlignTypeSpec<TYPE_CODE>, Char>( \
-      f.value(), AlignTypeSpec<TYPE_CODE>(width, fill)); \
-} \
- \
-inline IntFormatSpec<TYPE, AlignTypeSpec<0> > pad( \
-    TYPE value, unsigned width) { \
-  return IntFormatSpec<TYPE, AlignTypeSpec<0> >( \
-      value, AlignTypeSpec<0>(width, ' ')); \
-} \
- \
-template <typename Char> \
-inline IntFormatSpec<TYPE, AlignTypeSpec<0>, Char> pad( \
-   TYPE value, unsigned width, Char fill) { \
- return IntFormatSpec<TYPE, AlignTypeSpec<0>, Char>( \
-     value, AlignTypeSpec<0>(width, fill)); \
-}
-
-FMT_DEFINE_INT_FORMATTERS(int)
-FMT_DEFINE_INT_FORMATTERS(long)
-FMT_DEFINE_INT_FORMATTERS(unsigned)
-FMT_DEFINE_INT_FORMATTERS(unsigned long)
-FMT_DEFINE_INT_FORMATTERS(LongLong)
-FMT_DEFINE_INT_FORMATTERS(ULongLong)
-
-/**
-  \rst
-  Returns a string formatter that pads the formatted argument with the fill
-  character to the specified width using the default (left) string alignment.
-
-  **Example**::
-
-    std::string s = str(MemoryWriter() << pad("abc", 8));
-    // s == "abc     "
-
-  \endrst
- */
-template <typename Char>
-inline StrFormatSpec<Char> pad(
-    const Char *str, unsigned width, Char fill = ' ') {
-  return StrFormatSpec<Char>(str, width, fill);
-}
-
-inline StrFormatSpec<wchar_t> pad(
-    const wchar_t *str, unsigned width, char fill = ' ') {
-  return StrFormatSpec<wchar_t>(str, width, fill);
-}
-
-namespace internal {
-
-template <typename Char>
-class ArgMap {
- private:
-  typedef std::vector<
-    std::pair<fmt::BasicStringRef<Char>, internal::Arg> > MapType;
-  typedef typename MapType::value_type Pair;
-
-  MapType map_;
-
- public:
-  FMT_API void init(const ArgList &args);
-
-  const internal::Arg *find(const fmt::BasicStringRef<Char> &name) const {
-    // The list is unsorted, so just return the first matching name.
-    for (typename MapType::const_iterator it = map_.begin(), end = map_.end();
-         it != end; ++it) {
-      if (it->first == name)
-        return &it->second;
-    }
-    return FMT_NULL;
-  }
-};
-
-template <typename Impl, typename Char, typename Spec = fmt::FormatSpec>
-class ArgFormatterBase : public ArgVisitor<Impl, void> {
- private:
-  BasicWriter<Char> &writer_;
-  Spec &spec_;
-
-  FMT_DISALLOW_COPY_AND_ASSIGN(ArgFormatterBase);
-
-  void write_pointer(const void *p) {
-    spec_.flags_ = HASH_FLAG;
-    spec_.type_ = 'x';
-    writer_.write_int(reinterpret_cast<uintptr_t>(p), spec_);
-  }
-
-  // workaround MSVC two-phase lookup issue
-  typedef internal::Arg Arg;
-
- protected:
-  BasicWriter<Char> &writer() { return writer_; }
-  Spec &spec() { return spec_; }
-
-  void write(bool value) {
-    const char *str_value = value ? "true" : "false";
-    Arg::StringValue<char> str = { str_value, std::strlen(str_value) };
-    writer_.write_str(str, spec_);
-  }
-
-  void write(const char *value) {
-    Arg::StringValue<char> str = {value, value ? std::strlen(value) : 0};
-    writer_.write_str(str, spec_);
-  }
-
- public:
-  typedef Spec SpecType;
-
-  ArgFormatterBase(BasicWriter<Char> &w, Spec &s)
-  : writer_(w), spec_(s) {}
-
-  template <typename T>
-  void visit_any_int(T value) { writer_.write_int(value, spec_); }
-
-  template <typename T>
-  void visit_any_double(T value) { writer_.write_double(value, spec_); }
-
-  void visit_bool(bool value) {
-    if (spec_.type_) {
-      visit_any_int(value);
-      return;
-    }
-    write(value);
-  }
-
-  void visit_char(int value) {
-    if (spec_.type_ && spec_.type_ != 'c') {
-      spec_.flags_ |= CHAR_FLAG;
-      writer_.write_int(value, spec_);
-      return;
-    }
-    if (spec_.align_ == ALIGN_NUMERIC || spec_.flags_ != 0)
-      FMT_THROW(FormatError("invalid format specifier for char"));
-    typedef typename BasicWriter<Char>::CharPtr CharPtr;
-    Char fill = internal::CharTraits<Char>::cast(spec_.fill());
-    CharPtr out = CharPtr();
-    const unsigned CHAR_SIZE = 1;
-    if (spec_.width_ > CHAR_SIZE) {
-      out = writer_.grow_buffer(spec_.width_);
-      if (spec_.align_ == ALIGN_RIGHT) {
-        std::uninitialized_fill_n(out, spec_.width_ - CHAR_SIZE, fill);
-        out += spec_.width_ - CHAR_SIZE;
-      } else if (spec_.align_ == ALIGN_CENTER) {
-        out = writer_.fill_padding(out, spec_.width_,
-                                   internal::const_check(CHAR_SIZE), fill);
-      } else {
-        std::uninitialized_fill_n(out + CHAR_SIZE,
-                                  spec_.width_ - CHAR_SIZE, fill);
-      }
-    } else {
-      out = writer_.grow_buffer(CHAR_SIZE);
-    }
-    *out = internal::CharTraits<Char>::cast(value);
-  }
-
-  void visit_cstring(const char *value) {
-    if (spec_.type_ == 'p')
-      return write_pointer(value);
-    write(value);
-  }
-
-  // Qualification with "internal" here and below is a workaround for nvcc.
-  void visit_string(internal::Arg::StringValue<char> value) {
-    writer_.write_str(value, spec_);
-  }
-
-  using ArgVisitor<Impl, void>::visit_wstring;
-
-  void visit_wstring(internal::Arg::StringValue<Char> value) {
-    writer_.write_str(value, spec_);
-  }
-
-  void visit_pointer(const void *value) {
-    if (spec_.type_ && spec_.type_ != 'p')
-      report_unknown_type(spec_.type_, "pointer");
-    write_pointer(value);
-  }
-};
-
-class FormatterBase {
- private:
-  ArgList args_;
-  int next_arg_index_;
-
-  // Returns the argument with specified index.
-  FMT_API Arg do_get_arg(unsigned arg_index, const char *&error);
-
- protected:
-  const ArgList &args() const { return args_; }
-
-  explicit FormatterBase(const ArgList &args) {
-    args_ = args;
-    next_arg_index_ = 0;
-  }
-
-  // Returns the next argument.
-  Arg next_arg(const char *&error) {
-    if (next_arg_index_ >= 0)
-      return do_get_arg(internal::to_unsigned(next_arg_index_++), error);
-    error = "cannot switch from manual to automatic argument indexing";
-    return Arg();
-  }
-
-  // Checks if manual indexing is used and returns the argument with
-  // specified index.
-  Arg get_arg(unsigned arg_index, const char *&error) {
-    return check_no_auto_index(error) ? do_get_arg(arg_index, error) : Arg();
-  }
-
-  bool check_no_auto_index(const char *&error) {
-    if (next_arg_index_ > 0) {
-      error = "cannot switch from automatic to manual argument indexing";
-      return false;
-    }
-    next_arg_index_ = -1;
-    return true;
-  }
-
-  template <typename Char>
-  void write(BasicWriter<Char> &w, const Char *start, const Char *end) {
-    if (start != end)
-      w << BasicStringRef<Char>(start, internal::to_unsigned(end - start));
-  }
-};
-}  // namespace internal
-
-/**
-  \rst
-  An argument formatter based on the `curiously recurring template pattern
-  <http://en.wikipedia.org/wiki/Curiously_recurring_template_pattern>`_.
-
-  To use `~fmt::BasicArgFormatter` define a subclass that implements some or
-  all of the visit methods with the same signatures as the methods in
-  `~fmt::ArgVisitor`, for example, `~fmt::ArgVisitor::visit_int()`.
-  Pass the subclass as the *Impl* template parameter. When a formatting
-  function processes an argument, it will dispatch to a visit method
-  specific to the argument type. For example, if the argument type is
-  ``double`` then the `~fmt::ArgVisitor::visit_double()` method of a subclass
-  will be called. If the subclass doesn't contain a method with this signature,
-  then a corresponding method of `~fmt::BasicArgFormatter` or its superclass
-  will be called.
-  \endrst
- */
-template <typename Impl, typename Char, typename Spec = fmt::FormatSpec>
-class BasicArgFormatter : public internal::ArgFormatterBase<Impl, Char, Spec> {
- private:
-  BasicFormatter<Char, Impl> &formatter_;
-  const Char *format_;
-
- public:
-  /**
-    \rst
-    Constructs an argument formatter object.
-    *formatter* is a reference to the main formatter object, *spec* contains
-    format specifier information for standard argument types, and *fmt* points
-    to the part of the format string being parsed for custom argument types.
-    \endrst
-   */
-  BasicArgFormatter(BasicFormatter<Char, Impl> &formatter,
-                    Spec &spec, const Char *fmt)
-  : internal::ArgFormatterBase<Impl, Char, Spec>(formatter.writer(), spec),
-    formatter_(formatter), format_(fmt) {}
-
-  /** Formats an argument of a custom (user-defined) type. */
-  void visit_custom(internal::Arg::CustomValue c) {
-    c.format(&formatter_, c.value, &format_);
-  }
-};
-
-/** The default argument formatter. */
-template <typename Char>
-class ArgFormatter :
-    public BasicArgFormatter<ArgFormatter<Char>, Char, FormatSpec> {
- public:
-  /** Constructs an argument formatter object. */
-  ArgFormatter(BasicFormatter<Char> &formatter,
-               FormatSpec &spec, const Char *fmt)
-  : BasicArgFormatter<ArgFormatter<Char>,
-                      Char, FormatSpec>(formatter, spec, fmt) {}
-};
-
-/** This template formats data and writes the output to a writer. */
-template <typename CharType, typename ArgFormatter>
-class BasicFormatter : private internal::FormatterBase {
- public:
-  /** The character type for the output. */
-  typedef CharType Char;
-
- private:
-  BasicWriter<Char> &writer_;
-  internal::ArgMap<Char> map_;
-
-  FMT_DISALLOW_COPY_AND_ASSIGN(BasicFormatter);
-
-  using internal::FormatterBase::get_arg;
-
-  // Checks if manual indexing is used and returns the argument with
-  // specified name.
-  internal::Arg get_arg(BasicStringRef<Char> arg_name, const char *&error);
-
-  // Parses argument index and returns corresponding argument.
-  internal::Arg parse_arg_index(const Char *&s);
-
-  // Parses argument name and returns corresponding argument.
-  internal::Arg parse_arg_name(const Char *&s);
-
- public:
-  /**
-   \rst
-   Constructs a ``BasicFormatter`` object. References to the arguments and
-   the writer are stored in the formatter object so make sure they have
-   appropriate lifetimes.
-   \endrst
-   */
-  BasicFormatter(const ArgList &args, BasicWriter<Char> &w)
-    : internal::FormatterBase(args), writer_(w) {}
-
-  /** Returns a reference to the writer associated with this formatter. */
-  BasicWriter<Char> &writer() { return writer_; }
-
-  /** Formats stored arguments and writes the output to the writer. */
-  void format(BasicCStringRef<Char> format_str);
-
-  // Formats a single argument and advances format_str, a format string pointer.
-  const Char *format(const Char *&format_str, const internal::Arg &arg);
-};
-
-// Generates a comma-separated list with results of applying f to
-// numbers 0..n-1.
-# define FMT_GEN(n, f) FMT_GEN##n(f)
-# define FMT_GEN1(f)  f(0)
-# define FMT_GEN2(f)  FMT_GEN1(f),  f(1)
-# define FMT_GEN3(f)  FMT_GEN2(f),  f(2)
-# define FMT_GEN4(f)  FMT_GEN3(f),  f(3)
-# define FMT_GEN5(f)  FMT_GEN4(f),  f(4)
-# define FMT_GEN6(f)  FMT_GEN5(f),  f(5)
-# define FMT_GEN7(f)  FMT_GEN6(f),  f(6)
-# define FMT_GEN8(f)  FMT_GEN7(f),  f(7)
-# define FMT_GEN9(f)  FMT_GEN8(f),  f(8)
-# define FMT_GEN10(f) FMT_GEN9(f),  f(9)
-# define FMT_GEN11(f) FMT_GEN10(f), f(10)
-# define FMT_GEN12(f) FMT_GEN11(f), f(11)
-# define FMT_GEN13(f) FMT_GEN12(f), f(12)
-# define FMT_GEN14(f) FMT_GEN13(f), f(13)
-# define FMT_GEN15(f) FMT_GEN14(f), f(14)
-
-namespace internal {
-inline uint64_t make_type() { return 0; }
-
-template <typename T>
-inline uint64_t make_type(const T &arg) {
-  return MakeValue< BasicFormatter<char> >::type(arg);
-}
-
-template <std::size_t N, bool/*IsPacked*/= (N < ArgList::MAX_PACKED_ARGS)>
-struct ArgArray;
-
-template <std::size_t N>
-struct ArgArray<N, true/*IsPacked*/> {
-  typedef Value Type[N > 0 ? N : 1];
-
-  template <typename Formatter, typename T>
-  static Value make(const T &value) {
-#ifdef __clang__
-    Value result = MakeValue<Formatter>(value);
-    // Workaround a bug in Apple LLVM version 4.2 (clang-425.0.28) of clang:
-    // https://github.com/fmtlib/fmt/issues/276
-    (void)result.custom.format;
-    return result;
-#else
-    return MakeValue<Formatter>(value);
-#endif
-  }
-};
-
-template <std::size_t N>
-struct ArgArray<N, false/*IsPacked*/> {
-  typedef Arg Type[N + 1]; // +1 for the list end Arg::NONE
-
-  template <typename Formatter, typename T>
-  static Arg make(const T &value) { return MakeArg<Formatter>(value); }
-};
-
-#if FMT_USE_VARIADIC_TEMPLATES
-template <typename Arg, typename... Args>
-inline uint64_t make_type(const Arg &first, const Args & ... tail) {
-  return make_type(first) | (make_type(tail...) << 4);
-}
-
-#else
-
-struct ArgType {
-  uint64_t type;
-
-  ArgType() : type(0) {}
-
-  template <typename T>
-  ArgType(const T &arg) : type(make_type(arg)) {}
-};
-
-# define FMT_ARG_TYPE_DEFAULT(n) ArgType t##n = ArgType()
-
-inline uint64_t make_type(FMT_GEN15(FMT_ARG_TYPE_DEFAULT)) {
-  return t0.type | (t1.type << 4) | (t2.type << 8) | (t3.type << 12) |
-      (t4.type << 16) | (t5.type << 20) | (t6.type << 24) | (t7.type << 28) |
-      (t8.type << 32) | (t9.type << 36) | (t10.type << 40) | (t11.type << 44) |
-      (t12.type << 48) | (t13.type << 52) | (t14.type << 56);
-}
-#endif
-}  // namespace internal
-
-# define FMT_MAKE_TEMPLATE_ARG(n) typename T##n
-# define FMT_MAKE_ARG_TYPE(n) T##n
-# define FMT_MAKE_ARG(n) const T##n &v##n
-# define FMT_ASSIGN_char(n) \
-  arr[n] = fmt::internal::MakeValue< fmt::BasicFormatter<char> >(v##n)
-# define FMT_ASSIGN_wchar_t(n) \
-  arr[n] = fmt::internal::MakeValue< fmt::BasicFormatter<wchar_t> >(v##n)
-
-#if FMT_USE_VARIADIC_TEMPLATES
-// Defines a variadic function returning void.
-# define FMT_VARIADIC_VOID(func, arg_type) \
-  template <typename... Args> \
-  void func(arg_type arg0, const Args & ... args) { \
-    typedef fmt::internal::ArgArray<sizeof...(Args)> ArgArray; \
-    typename ArgArray::Type array{ \
-      ArgArray::template make<fmt::BasicFormatter<Char> >(args)...}; \
-    func(arg0, fmt::ArgList(fmt::internal::make_type(args...), array)); \
-  }
-
-// Defines a variadic constructor.
-# define FMT_VARIADIC_CTOR(ctor, func, arg0_type, arg1_type) \
-  template <typename... Args> \
-  ctor(arg0_type arg0, arg1_type arg1, const Args & ... args) { \
-    typedef fmt::internal::ArgArray<sizeof...(Args)> ArgArray; \
-    typename ArgArray::Type array{ \
-      ArgArray::template make<fmt::BasicFormatter<Char> >(args)...}; \
-    func(arg0, arg1, fmt::ArgList(fmt::internal::make_type(args...), array)); \
-  }
-
-#else
-
-# define FMT_MAKE_REF(n) \
-  fmt::internal::MakeValue< fmt::BasicFormatter<Char> >(v##n)
-# define FMT_MAKE_REF2(n) v##n
-
-// Defines a wrapper for a function taking one argument of type arg_type
-// and n additional arguments of arbitrary types.
-# define FMT_WRAP1(func, arg_type, n) \
-  template <FMT_GEN(n, FMT_MAKE_TEMPLATE_ARG)> \
-  inline void func(arg_type arg1, FMT_GEN(n, FMT_MAKE_ARG)) { \
-    const fmt::internal::ArgArray<n>::Type array = {FMT_GEN(n, FMT_MAKE_REF)}; \
-    func(arg1, fmt::ArgList( \
-      fmt::internal::make_type(FMT_GEN(n, FMT_MAKE_REF2)), array)); \
-  }
-
-// Emulates a variadic function returning void on a pre-C++11 compiler.
-# define FMT_VARIADIC_VOID(func, arg_type) \
-  inline void func(arg_type arg) { func(arg, fmt::ArgList()); } \
-  FMT_WRAP1(func, arg_type, 1) FMT_WRAP1(func, arg_type, 2) \
-  FMT_WRAP1(func, arg_type, 3) FMT_WRAP1(func, arg_type, 4) \
-  FMT_WRAP1(func, arg_type, 5) FMT_WRAP1(func, arg_type, 6) \
-  FMT_WRAP1(func, arg_type, 7) FMT_WRAP1(func, arg_type, 8) \
-  FMT_WRAP1(func, arg_type, 9) FMT_WRAP1(func, arg_type, 10)
-
-# define FMT_CTOR(ctor, func, arg0_type, arg1_type, n) \
-  template <FMT_GEN(n, FMT_MAKE_TEMPLATE_ARG)> \
-  ctor(arg0_type arg0, arg1_type arg1, FMT_GEN(n, FMT_MAKE_ARG)) { \
-    const fmt::internal::ArgArray<n>::Type array = {FMT_GEN(n, FMT_MAKE_REF)}; \
-    func(arg0, arg1, fmt::ArgList( \
-      fmt::internal::make_type(FMT_GEN(n, FMT_MAKE_REF2)), array)); \
-  }
-
-// Emulates a variadic constructor on a pre-C++11 compiler.
-# define FMT_VARIADIC_CTOR(ctor, func, arg0_type, arg1_type) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 1) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 2) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 3) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 4) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 5) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 6) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 7) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 8) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 9) \
-  FMT_CTOR(ctor, func, arg0_type, arg1_type, 10)
-#endif
-
-// Generates a comma-separated list with results of applying f to pairs
-// (argument, index).
-#define FMT_FOR_EACH1(f, x0) f(x0, 0)
-#define FMT_FOR_EACH2(f, x0, x1) \
-  FMT_FOR_EACH1(f, x0), f(x1, 1)
-#define FMT_FOR_EACH3(f, x0, x1, x2) \
-  FMT_FOR_EACH2(f, x0 ,x1), f(x2, 2)
-#define FMT_FOR_EACH4(f, x0, x1, x2, x3) \
-  FMT_FOR_EACH3(f, x0, x1, x2), f(x3, 3)
-#define FMT_FOR_EACH5(f, x0, x1, x2, x3, x4) \
-  FMT_FOR_EACH4(f, x0, x1, x2, x3), f(x4, 4)
-#define FMT_FOR_EACH6(f, x0, x1, x2, x3, x4, x5) \
-  FMT_FOR_EACH5(f, x0, x1, x2, x3, x4), f(x5, 5)
-#define FMT_FOR_EACH7(f, x0, x1, x2, x3, x4, x5, x6) \
-  FMT_FOR_EACH6(f, x0, x1, x2, x3, x4, x5), f(x6, 6)
-#define FMT_FOR_EACH8(f, x0, x1, x2, x3, x4, x5, x6, x7) \
-  FMT_FOR_EACH7(f, x0, x1, x2, x3, x4, x5, x6), f(x7, 7)
-#define FMT_FOR_EACH9(f, x0, x1, x2, x3, x4, x5, x6, x7, x8) \
-  FMT_FOR_EACH8(f, x0, x1, x2, x3, x4, x5, x6, x7), f(x8, 8)
-#define FMT_FOR_EACH10(f, x0, x1, x2, x3, x4, x5, x6, x7, x8, x9) \
-  FMT_FOR_EACH9(f, x0, x1, x2, x3, x4, x5, x6, x7, x8), f(x9, 9)
-
-/**
- An error returned by an operating system or a language runtime,
- for example a file opening error.
-*/
-class SystemError : public internal::RuntimeError {
- private:
-  FMT_API void init(int err_code, CStringRef format_str, ArgList args);
-
- protected:
-  int error_code_;
-
-  typedef char Char;  // For FMT_VARIADIC_CTOR.
-
-  SystemError() {}
-
- public:
-  /**
-   \rst
-   Constructs a :class:`fmt::SystemError` object with a description
-   formatted with `fmt::format_system_error`. *message* and additional
-   arguments passed into the constructor are formatted similarly to
-   `fmt::format`.
-
-   **Example**::
-
-     // This throws a SystemError with the description
-     //   cannot open file 'madeup': No such file or directory
-     // or similar (system message may vary).
-     const char *filename = "madeup";
-     std::FILE *file = std::fopen(filename, "r");
-     if (!file)
-       throw fmt::SystemError(errno, "cannot open file '{}'", filename);
-   \endrst
-  */
-  SystemError(int error_code, CStringRef message) {
-    init(error_code, message, ArgList());
-  }
-  FMT_DEFAULTED_COPY_CTOR(SystemError)
-  FMT_VARIADIC_CTOR(SystemError, init, int, CStringRef)
-
-  FMT_API ~SystemError() FMT_DTOR_NOEXCEPT;
-
-  int error_code() const { return error_code_; }
-};
-
-/**
-  \rst
-  Formats an error returned by an operating system or a language runtime,
-  for example a file opening error, and writes it to *out* in the following
-  form:
-
-  .. parsed-literal::
-     *<message>*: *<system-message>*
-
-  where *<message>* is the passed message and *<system-message>* is
-  the system message corresponding to the error code.
-  *error_code* is a system error code as given by ``errno``.
-  If *error_code* is not a valid error code such as -1, the system message
-  may look like "Unknown error -1" and is platform-dependent.
-  \endrst
- */
-FMT_API void format_system_error(fmt::Writer &out, int error_code,
-                                 fmt::StringRef message) FMT_NOEXCEPT;
-
-/**
-  \rst
-  This template provides operations for formatting and writing data into
-  a character stream. The output is stored in a buffer provided by a subclass
-  such as :class:`fmt::BasicMemoryWriter`.
-
-  You can use one of the following typedefs for common character types:
-
-  +---------+----------------------+
-  | Type    | Definition           |
-  +=========+======================+
-  | Writer  | BasicWriter<char>    |
-  +---------+----------------------+
-  | WWriter | BasicWriter<wchar_t> |
-  +---------+----------------------+
-
-  \endrst
- */
-template <typename Char>
-class BasicWriter {
- private:
-  // Output buffer.
-  Buffer<Char> &buffer_;
-
-  FMT_DISALLOW_COPY_AND_ASSIGN(BasicWriter);
-
-  typedef typename internal::CharTraits<Char>::CharPtr CharPtr;
-
-#if FMT_SECURE_SCL
-  // Returns pointer value.
-  static Char *get(CharPtr p) { return p.base(); }
-#else
-  static Char *get(Char *p) { return p; }
-#endif
-
-  // Fills the padding around the content and returns the pointer to the
-  // content area.
-  static CharPtr fill_padding(CharPtr buffer,
-      unsigned total_size, std::size_t content_size, wchar_t fill);
-
-  // Grows the buffer by n characters and returns a pointer to the newly
-  // allocated area.
-  CharPtr grow_buffer(std::size_t n) {
-    std::size_t size = buffer_.size();
-    buffer_.resize(size + n);
-    return internal::make_ptr(&buffer_[size], n);
-  }
-
-  // Writes an unsigned decimal integer.
-  template <typename UInt>
-  Char *write_unsigned_decimal(UInt value, unsigned prefix_size = 0) {
-    unsigned num_digits = internal::count_digits(value);
-    Char *ptr = get(grow_buffer(prefix_size + num_digits));
-    internal::format_decimal(ptr + prefix_size, value, num_digits);
-    return ptr;
-  }
-
-  // Writes a decimal integer.
-  template <typename Int>
-  void write_decimal(Int value) {
-    typedef typename internal::IntTraits<Int>::MainType MainType;
-    MainType abs_value = static_cast<MainType>(value);
-    if (internal::is_negative(value)) {
-      abs_value = 0 - abs_value;
-      *write_unsigned_decimal(abs_value, 1) = '-';
-    } else {
-      write_unsigned_decimal(abs_value, 0);
-    }
-  }
-
-  // Prepare a buffer for integer formatting.
-  CharPtr prepare_int_buffer(unsigned num_digits,
-      const EmptySpec &, const char *prefix, unsigned prefix_size) {
-    unsigned size = prefix_size + num_digits;
-    CharPtr p = grow_buffer(size);
-    std::uninitialized_copy(prefix, prefix + prefix_size, p);
-    return p + size - 1;
-  }
-
-  template <typename Spec>
-  CharPtr prepare_int_buffer(unsigned num_digits,
-    const Spec &spec, const char *prefix, unsigned prefix_size);
-
-  // Formats an integer.
-  template <typename T, typename Spec>
-  void write_int(T value, Spec spec);
-
-  // Formats a floating-point number (double or long double).
-  template <typename T, typename Spec>
-  void write_double(T value, const Spec &spec);
-
-  // Writes a formatted string.
-  template <typename StrChar>
-  CharPtr write_str(const StrChar *s, std::size_t size, const AlignSpec &spec);
-
-  template <typename StrChar, typename Spec>
-  void write_str(const internal::Arg::StringValue<StrChar> &str,
-                 const Spec &spec);
-
-  // This following methods are private to disallow writing wide characters
-  // and strings to a char stream. If you want to print a wide string as a
-  // pointer as std::ostream does, cast it to const void*.
-  // Do not implement!
-  void operator<<(typename internal::WCharHelper<wchar_t, Char>::Unsupported);
-  void operator<<(
-      typename internal::WCharHelper<const wchar_t *, Char>::Unsupported);
-
-  // Appends floating-point length specifier to the format string.
-  // The second argument is only used for overload resolution.
-  void append_float_length(Char *&format_ptr, long double) {
-    *format_ptr++ = 'L';
-  }
-
-  template<typename T>
-  void append_float_length(Char *&, T) {}
-
-  template <typename Impl, typename Char_, typename Spec_>
-  friend class internal::ArgFormatterBase;
-
-  template <typename Impl, typename Char_, typename Spec_>
-  friend class BasicPrintfArgFormatter;
-
- protected:
-  /**
-    Constructs a ``BasicWriter`` object.
-   */
-  explicit BasicWriter(Buffer<Char> &b) : buffer_(b) {}
-
- public:
-  /**
-    \rst
-    Destroys a ``BasicWriter`` object.
-    \endrst
-   */
-  virtual ~BasicWriter() {}
-
-  /**
-    Returns the total number of characters written.
-   */
-  std::size_t size() const { return buffer_.size(); }
-
-  /**
-    Returns a pointer to the output buffer content. No terminating null
-    character is appended.
-   */
-  const Char *data() const FMT_NOEXCEPT { return &buffer_[0]; }
-
-  /**
-    Returns a pointer to the output buffer content with terminating null
-    character appended.
-   */
-  const Char *c_str() const {
-    std::size_t size = buffer_.size();
-    buffer_.reserve(size + 1);
-    buffer_[size] = '\0';
-    return &buffer_[0];
-  }
-
-  /**
-    \rst
-    Returns the content of the output buffer as an `std::string`.
-    \endrst
-   */
-  std::basic_string<Char> str() const {
-    return std::basic_string<Char>(&buffer_[0], buffer_.size());
-  }
-
-  /**
-    \rst
-    Writes formatted data.
-
-    *args* is an argument list representing arbitrary arguments.
-
-    **Example**::
-
-       MemoryWriter out;
-       out.write("Current point:\n");
-       out.write("({:+f}, {:+f})", -3.14, 3.14);
-
-    This will write the following output to the ``out`` object:
-
-    .. code-block:: none
-
-       Current point:
-       (-3.140000, +3.140000)
-
-    The output can be accessed using :func:`data()`, :func:`c_str` or
-    :func:`str` methods.
-
-    See also :ref:`syntax`.
-    \endrst
-   */
-  void write(BasicCStringRef<Char> format, ArgList args) {
-    BasicFormatter<Char>(args, *this).format(format);
-  }
-  FMT_VARIADIC_VOID(write, BasicCStringRef<Char>)
-
-  BasicWriter &operator<<(int value) {
-    write_decimal(value);
-    return *this;
-  }
-  BasicWriter &operator<<(unsigned value) {
-    return *this << IntFormatSpec<unsigned>(value);
-  }
-  BasicWriter &operator<<(long value) {
-    write_decimal(value);
-    return *this;
-  }
-  BasicWriter &operator<<(unsigned long value) {
-    return *this << IntFormatSpec<unsigned long>(value);
-  }
-  BasicWriter &operator<<(LongLong value) {
-    write_decimal(value);
-    return *this;
-  }
-
-  /**
-    \rst
-    Formats *value* and writes it to the stream.
-    \endrst
-   */
-  BasicWriter &operator<<(ULongLong value) {
-    return *this << IntFormatSpec<ULongLong>(value);
-  }
-
-  BasicWriter &operator<<(double value) {
-    write_double(value, FormatSpec());
-    return *this;
-  }
-
-  /**
-    \rst
-    Formats *value* using the general format for floating-point numbers
-    (``'g'``) and writes it to the stream.
-    \endrst
-   */
-  BasicWriter &operator<<(long double value) {
-    write_double(value, FormatSpec());
-    return *this;
-  }
-
-  /**
-    Writes a character to the stream.
-   */
-  BasicWriter &operator<<(char value) {
-    buffer_.push_back(value);
-    return *this;
-  }
-
-  BasicWriter &operator<<(
-      typename internal::WCharHelper<wchar_t, Char>::Supported value) {
-    buffer_.push_back(value);
-    return *this;
-  }
-
-  /**
-    \rst
-    Writes *value* to the stream.
-    \endrst
-   */
-  BasicWriter &operator<<(fmt::BasicStringRef<Char> value) {
-    const Char *str = value.data();
-    buffer_.append(str, str + value.size());
-    return *this;
-  }
-
-  BasicWriter &operator<<(
-      typename internal::WCharHelper<StringRef, Char>::Supported value) {
-    const char *str = value.data();
-    buffer_.append(str, str + value.size());
-    return *this;
-  }
-
-  template <typename T, typename Spec, typename FillChar>
-  BasicWriter &operator<<(IntFormatSpec<T, Spec, FillChar> spec) {
-    internal::CharTraits<Char>::convert(FillChar());
-    write_int(spec.value(), spec);
-    return *this;
-  }
-
-  template <typename StrChar>
-  BasicWriter &operator<<(const StrFormatSpec<StrChar> &spec) {
-    const StrChar *s = spec.str();
-    write_str(s, std::char_traits<Char>::length(s), spec);
-    return *this;
-  }
-
-  void clear() FMT_NOEXCEPT { buffer_.clear(); }
-
-  Buffer<Char> &buffer() FMT_NOEXCEPT { return buffer_; }
-};
-
-template <typename Char>
-template <typename StrChar>
-typename BasicWriter<Char>::CharPtr BasicWriter<Char>::write_str(
-      const StrChar *s, std::size_t size, const AlignSpec &spec) {
-  CharPtr out = CharPtr();
-  if (spec.width() > size) {
-    out = grow_buffer(spec.width());
-    Char fill = internal::CharTraits<Char>::cast(spec.fill());
-    if (spec.align() == ALIGN_RIGHT) {
-      std::uninitialized_fill_n(out, spec.width() - size, fill);
-      out += spec.width() - size;
-    } else if (spec.align() == ALIGN_CENTER) {
-      out = fill_padding(out, spec.width(), size, fill);
-    } else {
-      std::uninitialized_fill_n(out + size, spec.width() - size, fill);
-    }
-  } else {
-    out = grow_buffer(size);
-  }
-  std::uninitialized_copy(s, s + size, out);
-  return out;
-}
-
-template <typename Char>
-template <typename StrChar, typename Spec>
-void BasicWriter<Char>::write_str(
-    const internal::Arg::StringValue<StrChar> &s, const Spec &spec) {
-  // Check if StrChar is convertible to Char.
-  internal::CharTraits<Char>::convert(StrChar());
-  if (spec.type_ && spec.type_ != 's')
-    internal::report_unknown_type(spec.type_, "string");
-  const StrChar *str_value = s.value;
-  std::size_t str_size = s.size;
-  if (str_size == 0) {
-    if (!str_value) {
-      FMT_THROW(FormatError("string pointer is null"));
-    }
-  }
-  std::size_t precision = static_cast<std::size_t>(spec.precision_);
-  if (spec.precision_ >= 0 && precision < str_size)
-    str_size = precision;
-  write_str(str_value, str_size, spec);
-}
-
-template <typename Char>
-typename BasicWriter<Char>::CharPtr
-  BasicWriter<Char>::fill_padding(
-    CharPtr buffer, unsigned total_size,
-    std::size_t content_size, wchar_t fill) {
-  std::size_t padding = total_size - content_size;
-  std::size_t left_padding = padding / 2;
-  Char fill_char = internal::CharTraits<Char>::cast(fill);
-  std::uninitialized_fill_n(buffer, left_padding, fill_char);
-  buffer += left_padding;
-  CharPtr content = buffer;
-  std::uninitialized_fill_n(buffer + content_size,
-                            padding - left_padding, fill_char);
-  return content;
-}
-
-template <typename Char>
-template <typename Spec>
-typename BasicWriter<Char>::CharPtr
-  BasicWriter<Char>::prepare_int_buffer(
-    unsigned num_digits, const Spec &spec,
-    const char *prefix, unsigned prefix_size) {
-  unsigned width = spec.width();
-  Alignment align = spec.align();
-  Char fill = internal::CharTraits<Char>::cast(spec.fill());
-  if (spec.precision() > static_cast<int>(num_digits)) {
-    // Octal prefix '0' is counted as a digit, so ignore it if precision
-    // is specified.
-    if (prefix_size > 0 && prefix[prefix_size - 1] == '0')
-      --prefix_size;
-    unsigned number_size =
-        prefix_size + internal::to_unsigned(spec.precision());
-    AlignSpec subspec(number_size, '0', ALIGN_NUMERIC);
-    if (number_size >= width)
-      return prepare_int_buffer(num_digits, subspec, prefix, prefix_size);
-    buffer_.reserve(width);
-    unsigned fill_size = width - number_size;
-    if (align != ALIGN_LEFT) {
-      CharPtr p = grow_buffer(fill_size);
-      std::uninitialized_fill(p, p + fill_size, fill);
-    }
-    CharPtr result = prepare_int_buffer(
-        num_digits, subspec, prefix, prefix_size);
-    if (align == ALIGN_LEFT) {
-      CharPtr p = grow_buffer(fill_size);
-      std::uninitialized_fill(p, p + fill_size, fill);
-    }
-    return result;
-  }
-  unsigned size = prefix_size + num_digits;
-  if (width <= size) {
-    CharPtr p = grow_buffer(size);
-    std::uninitialized_copy(prefix, prefix + prefix_size, p);
-    return p + size - 1;
-  }
-  CharPtr p = grow_buffer(width);
-  CharPtr end = p + width;
-  if (align == ALIGN_LEFT) {
-    std::uninitialized_copy(prefix, prefix + prefix_size, p);
-    p += size;
-    std::uninitialized_fill(p, end, fill);
-  } else if (align == ALIGN_CENTER) {
-    p = fill_padding(p, width, size, fill);
-    std::uninitialized_copy(prefix, prefix + prefix_size, p);
-    p += size;
-  } else {
-    if (align == ALIGN_NUMERIC) {
-      if (prefix_size != 0) {
-        p = std::uninitialized_copy(prefix, prefix + prefix_size, p);
-        size -= prefix_size;
-      }
-    } else {
-      std::uninitialized_copy(prefix, prefix + prefix_size, end - size);
-    }
-    std::uninitialized_fill(p, end - size, fill);
-    p = end;
-  }
-  return p - 1;
-}
-
-template <typename Char>
-template <typename T, typename Spec>
-void BasicWriter<Char>::write_int(T value, Spec spec) {
-  unsigned prefix_size = 0;
-  typedef typename internal::IntTraits<T>::MainType UnsignedType;
-  UnsignedType abs_value = static_cast<UnsignedType>(value);
-  char prefix[4] = "";
-  if (internal::is_negative(value)) {
-    prefix[0] = '-';
-    ++prefix_size;
-    abs_value = 0 - abs_value;
-  } else if (spec.flag(SIGN_FLAG)) {
-    prefix[0] = spec.flag(PLUS_FLAG) ? '+' : ' ';
-    ++prefix_size;
-  }
-  switch (spec.type()) {
-  case 0: case 'd': {
-    unsigned num_digits = internal::count_digits(abs_value);
-    CharPtr p = prepare_int_buffer(num_digits, spec, prefix, prefix_size) + 1;
-    internal::format_decimal(get(p), abs_value, 0);
-    break;
-  }
-  case 'x': case 'X': {
-    UnsignedType n = abs_value;
-    if (spec.flag(HASH_FLAG)) {
-      prefix[prefix_size++] = '0';
-      prefix[prefix_size++] = spec.type_prefix();
-    }
-    unsigned num_digits = 0;
-    do {
-      ++num_digits;
-    } while ((n >>= 4) != 0);
-    Char *p = get(prepare_int_buffer(
-      num_digits, spec, prefix, prefix_size));
-    n = abs_value;
-    const char *digits = spec.type() == 'x' ?
-        "0123456789abcdef" : "0123456789ABCDEF";
-    do {
-      *p-- = digits[n & 0xf];
-    } while ((n >>= 4) != 0);
-    break;
-  }
-  case 'b': case 'B': {
-    UnsignedType n = abs_value;
-    if (spec.flag(HASH_FLAG)) {
-      prefix[prefix_size++] = '0';
-      prefix[prefix_size++] = spec.type_prefix();
-    }
-    unsigned num_digits = 0;
-    do {
-      ++num_digits;
-    } while ((n >>= 1) != 0);
-    Char *p = get(prepare_int_buffer(num_digits, spec, prefix, prefix_size));
-    n = abs_value;
-    do {
-      *p-- = static_cast<Char>('0' + (n & 1));
-    } while ((n >>= 1) != 0);
-    break;
-  }
-  case 'o': {
-    UnsignedType n = abs_value;
-    if (spec.flag(HASH_FLAG))
-      prefix[prefix_size++] = '0';
-    unsigned num_digits = 0;
-    do {
-      ++num_digits;
-    } while ((n >>= 3) != 0);
-    Char *p = get(prepare_int_buffer(num_digits, spec, prefix, prefix_size));
-    n = abs_value;
-    do {
-      *p-- = static_cast<Char>('0' + (n & 7));
-    } while ((n >>= 3) != 0);
-    break;
-  }
-  case 'n': {
-    unsigned num_digits = internal::count_digits(abs_value);
-    fmt::StringRef sep = "";
-#if !(defined(ANDROID) || defined(__ANDROID__))
-    sep = internal::thousands_sep(std::localeconv());
-#endif
-    unsigned size = static_cast<unsigned>(
-          num_digits + sep.size() * ((num_digits - 1) / 3));
-    CharPtr p = prepare_int_buffer(size, spec, prefix, prefix_size) + 1;
-    internal::format_decimal(get(p), abs_value, 0, internal::ThousandsSep(sep));
-    break;
-  }
-  default:
-    internal::report_unknown_type(
-      spec.type(), spec.flag(CHAR_FLAG) ? "char" : "integer");
-    break;
-  }
-}
-
-template <typename Char>
-template <typename T, typename Spec>
-void BasicWriter<Char>::write_double(T value, const Spec &spec) {
-  // Check type.
-  char type = spec.type();
-  bool upper = false;
-  switch (type) {
-  case 0:
-    type = 'g';
-    break;
-  case 'e': case 'f': case 'g': case 'a':
-    break;
-  case 'F':
-#if FMT_MSC_VER
-    // MSVC's printf doesn't support 'F'.
-    type = 'f';
-#endif
-    // Fall through.
-  case 'E': case 'G': case 'A':
-    upper = true;
-    break;
-  default:
-    internal::report_unknown_type(type, "double");
-    break;
-  }
-
-  char sign = 0;
-  // Use isnegative instead of value < 0 because the latter is always
-  // false for NaN.
-  if (internal::FPUtil::isnegative(static_cast<double>(value))) {
-    sign = '-';
-    value = -value;
-  } else if (spec.flag(SIGN_FLAG)) {
-    sign = spec.flag(PLUS_FLAG) ? '+' : ' ';
-  }
-
-  if (internal::FPUtil::isnotanumber(value)) {
-    // Format NaN ourselves because sprintf's output is not consistent
-    // across platforms.
-    std::size_t nan_size = 4;
-    const char *nan = upper ? " NAN" : " nan";
-    if (!sign) {
-      --nan_size;
-      ++nan;
-    }
-    CharPtr out = write_str(nan, nan_size, spec);
-    if (sign)
-      *out = sign;
-    return;
-  }
-
-  if (internal::FPUtil::isinfinity(value)) {
-    // Format infinity ourselves because sprintf's output is not consistent
-    // across platforms.
-    std::size_t inf_size = 4;
-    const char *inf = upper ? " INF" : " inf";
-    if (!sign) {
-      --inf_size;
-      ++inf;
-    }
-    CharPtr out = write_str(inf, inf_size, spec);
-    if (sign)
-      *out = sign;
-    return;
-  }
-
-  std::size_t offset = buffer_.size();
-  unsigned width = spec.width();
-  if (sign) {
-    buffer_.reserve(buffer_.size() + (width > 1u ? width : 1u));
-    if (width > 0)
-      --width;
-    ++offset;
-  }
-
-  // Build format string.
-  enum { MAX_FORMAT_SIZE = 10}; // longest format: %#-*.*Lg
-  Char format[MAX_FORMAT_SIZE];
-  Char *format_ptr = format;
-  *format_ptr++ = '%';
-  unsigned width_for_sprintf = width;
-  if (spec.flag(HASH_FLAG))
-    *format_ptr++ = '#';
-  if (spec.align() == ALIGN_CENTER) {
-    width_for_sprintf = 0;
-  } else {
-    if (spec.align() == ALIGN_LEFT)
-      *format_ptr++ = '-';
-    if (width != 0)
-      *format_ptr++ = '*';
-  }
-  if (spec.precision() >= 0) {
-    *format_ptr++ = '.';
-    *format_ptr++ = '*';
-  }
-
-  append_float_length(format_ptr, value);
-  *format_ptr++ = type;
-  *format_ptr = '\0';
-
-  // Format using snprintf.
-  Char fill = internal::CharTraits<Char>::cast(spec.fill());
-  unsigned n = 0;
-  Char *start = FMT_NULL;
-  for (;;) {
-    std::size_t buffer_size = buffer_.capacity() - offset;
-#if FMT_MSC_VER
-    // MSVC's vsnprintf_s doesn't work with zero size, so reserve
-    // space for at least one extra character to make the size non-zero.
-    // Note that the buffer's capacity will increase by more than 1.
-    if (buffer_size == 0) {
-      buffer_.reserve(offset + 1);
-      buffer_size = buffer_.capacity() - offset;
-    }
-#endif
-    start = &buffer_[offset];
-    int result = internal::CharTraits<Char>::format_float(
-        start, buffer_size, format, width_for_sprintf, spec.precision(), value);
-    if (result >= 0) {
-      n = internal::to_unsigned(result);
-      if (offset + n < buffer_.capacity())
-        break;  // The buffer is large enough - continue with formatting.
-      buffer_.reserve(offset + n + 1);
-    } else {
-      // If result is negative we ask to increase the capacity by at least 1,
-      // but as std::vector, the buffer grows exponentially.
-      buffer_.reserve(buffer_.capacity() + 1);
-    }
-  }
-  if (sign) {
-    if ((spec.align() != ALIGN_RIGHT && spec.align() != ALIGN_DEFAULT) ||
-        *start != ' ') {
-      *(start - 1) = sign;
-      sign = 0;
-    } else {
-      *(start - 1) = fill;
-    }
-    ++n;
-  }
-  if (spec.align() == ALIGN_CENTER && spec.width() > n) {
-    width = spec.width();
-    CharPtr p = grow_buffer(width);
-    std::memmove(get(p) + (width - n) / 2, get(p), n * sizeof(Char));
-    fill_padding(p, spec.width(), n, fill);
-    return;
-  }
-  if (spec.fill() != ' ' || sign) {
-    while (*start == ' ')
-      *start++ = fill;
-    if (sign)
-      *(start - 1) = sign;
-  }
-  grow_buffer(n);
-}
-
-/**
-  \rst
-  This class template provides operations for formatting and writing data
-  into a character stream. The output is stored in a memory buffer that grows
-  dynamically.
-
-  You can use one of the following typedefs for common character types
-  and the standard allocator:
-
-  +---------------+-----------------------------------------------------+
-  | Type          | Definition                                          |
-  +===============+=====================================================+
-  | MemoryWriter  | BasicMemoryWriter<char, std::allocator<char>>       |
-  +---------------+-----------------------------------------------------+
-  | WMemoryWriter | BasicMemoryWriter<wchar_t, std::allocator<wchar_t>> |
-  +---------------+-----------------------------------------------------+
-
-  **Example**::
-
-     MemoryWriter out;
-     out << "The answer is " << 42 << "\n";
-     out.write("({:+f}, {:+f})", -3.14, 3.14);
-
-  This will write the following output to the ``out`` object:
-
-  .. code-block:: none
-
-     The answer is 42
-     (-3.140000, +3.140000)
-
-  The output can be converted to an ``std::string`` with ``out.str()`` or
-  accessed as a C string with ``out.c_str()``.
-  \endrst
- */
-template <typename Char, typename Allocator = std::allocator<Char> >
-class BasicMemoryWriter : public BasicWriter<Char> {
- private:
-  internal::MemoryBuffer<Char, internal::INLINE_BUFFER_SIZE, Allocator> buffer_;
-
- public:
-  explicit BasicMemoryWriter(const Allocator& alloc = Allocator())
-    : BasicWriter<Char>(buffer_), buffer_(alloc) {}
-
-#if FMT_USE_RVALUE_REFERENCES
-  /**
-    \rst
-    Constructs a :class:`fmt::BasicMemoryWriter` object moving the content
-    of the other object to it.
-    \endrst
-   */
-  BasicMemoryWriter(BasicMemoryWriter &&other)
-    : BasicWriter<Char>(buffer_), buffer_(std::move(other.buffer_)) {
-  }
-
-  /**
-    \rst
-    Moves the content of the other ``BasicMemoryWriter`` object to this one.
-    \endrst
-   */
-  BasicMemoryWriter &operator=(BasicMemoryWriter &&other) {
-    buffer_ = std::move(other.buffer_);
-    return *this;
-  }
-#endif
-};
-
-typedef BasicMemoryWriter<char> MemoryWriter;
-typedef BasicMemoryWriter<wchar_t> WMemoryWriter;
-
-/**
-  \rst
-  This class template provides operations for formatting and writing data
-  into a fixed-size array. For writing into a dynamically growing buffer
-  use :class:`fmt::BasicMemoryWriter`.
-
-  Any write method will throw ``std::runtime_error`` if the output doesn't fit
-  into the array.
-
-  You can use one of the following typedefs for common character types:
-
-  +--------------+---------------------------+
-  | Type         | Definition                |
-  +==============+===========================+
-  | ArrayWriter  | BasicArrayWriter<char>    |
-  +--------------+---------------------------+
-  | WArrayWriter | BasicArrayWriter<wchar_t> |
-  +--------------+---------------------------+
-  \endrst
- */
-template <typename Char>
-class BasicArrayWriter : public BasicWriter<Char> {
- private:
-  internal::FixedBuffer<Char> buffer_;
-
- public:
-  /**
-   \rst
-   Constructs a :class:`fmt::BasicArrayWriter` object for *array* of the
-   given size.
-   \endrst
-   */
-  BasicArrayWriter(Char *array, std::size_t size)
-    : BasicWriter<Char>(buffer_), buffer_(array, size) {}
-
-  /**
-   \rst
-   Constructs a :class:`fmt::BasicArrayWriter` object for *array* of the
-   size known at compile time.
-   \endrst
-   */
-  template <std::size_t SIZE>
-  explicit BasicArrayWriter(Char (&array)[SIZE])
-    : BasicWriter<Char>(buffer_), buffer_(array, SIZE) {}
-};
-
-typedef BasicArrayWriter<char> ArrayWriter;
-typedef BasicArrayWriter<wchar_t> WArrayWriter;
-
-// Reports a system error without throwing an exception.
-// Can be used to report errors from destructors.
-FMT_API void report_system_error(int error_code,
-                                 StringRef message) FMT_NOEXCEPT;
-
-#if FMT_USE_WINDOWS_H
-
-/** A Windows error. */
-class WindowsError : public SystemError {
- private:
-  FMT_API void init(int error_code, CStringRef format_str, ArgList args);
-
- public:
-  /**
-   \rst
-   Constructs a :class:`fmt::WindowsError` object with the description
-   of the form
-
-   .. parsed-literal::
-     *<message>*: *<system-message>*
-
-   where *<message>* is the formatted message and *<system-message>* is the
-   system message corresponding to the error code.
-   *error_code* is a Windows error code as given by ``GetLastError``.
-   If *error_code* is not a valid error code such as -1, the system message
-   will look like "error -1".
-
-   **Example**::
-
-     // This throws a WindowsError with the description
-     //   cannot open file 'madeup': The system cannot find the file specified.
-     // or similar (system message may vary).
-     const char *filename = "madeup";
-     LPOFSTRUCT of = LPOFSTRUCT();
-     HFILE file = OpenFile(filename, &of, OF_READ);
-     if (file == HFILE_ERROR) {
-       throw fmt::WindowsError(GetLastError(),
-                               "cannot open file '{}'", filename);
-     }
-   \endrst
-  */
-  WindowsError(int error_code, CStringRef message) {
-    init(error_code, message, ArgList());
-  }
-  FMT_VARIADIC_CTOR(WindowsError, init, int, CStringRef)
-};
-
-// Reports a Windows error without throwing an exception.
-// Can be used to report errors from destructors.
-FMT_API void report_windows_error(int error_code,
-                                  StringRef message) FMT_NOEXCEPT;
-
-#endif
-
-enum Color { BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE };
-
-/**
-  Formats a string and prints it to stdout using ANSI escape sequences
-  to specify color (experimental).
-  Example:
-    print_colored(fmt::RED, "Elapsed time: {0:.2f} seconds", 1.23);
- */
-FMT_API void print_colored(Color c, CStringRef format, ArgList args);
-
-/**
-  \rst
-  Formats arguments and returns the result as a string.
-
-  **Example**::
-
-    std::string message = format("The answer is {}", 42);
-  \endrst
-*/
-inline std::string format(CStringRef format_str, ArgList args) {
-  MemoryWriter w;
-  w.write(format_str, args);
-  return w.str();
-}
-
-inline std::wstring format(WCStringRef format_str, ArgList args) {
-  WMemoryWriter w;
-  w.write(format_str, args);
-  return w.str();
-}
-
-/**
-  \rst
-  Prints formatted data to the file *f*.
-
-  **Example**::
-
-    print(stderr, "Don't {}!", "panic");
-  \endrst
- */
-FMT_API void print(std::FILE *f, CStringRef format_str, ArgList args);
-
-/**
-  \rst
-  Prints formatted data to ``stdout``.
-
-  **Example**::
-
-    print("Elapsed time: {0:.2f} seconds", 1.23);
-  \endrst
- */
-FMT_API void print(CStringRef format_str, ArgList args);
-
-/**
-  Fast integer formatter.
- */
-class FormatInt {
- private:
-  // Buffer should be large enough to hold all digits (digits10 + 1),
-  // a sign and a null character.
-  enum {BUFFER_SIZE = std::numeric_limits<ULongLong>::digits10 + 3};
-  mutable char buffer_[BUFFER_SIZE];
-  char *str_;
-
-  // Formats value in reverse and returns the number of digits.
-  char *format_decimal(ULongLong value) {
-    char *buffer_end = buffer_ + BUFFER_SIZE - 1;
-    while (value >= 100) {
-      // Integer division is slow so do it for a group of two digits instead
-      // of for every digit. The idea comes from the talk by Alexandrescu
-      // "Three Optimization Tips for C++". See speed-test for a comparison.
-      unsigned index = static_cast<unsigned>((value % 100) * 2);
-      value /= 100;
-      *--buffer_end = internal::Data::DIGITS[index + 1];
-      *--buffer_end = internal::Data::DIGITS[index];
-    }
-    if (value < 10) {
-      *--buffer_end = static_cast<char>('0' + value);
-      return buffer_end;
-    }
-    unsigned index = static_cast<unsigned>(value * 2);
-    *--buffer_end = internal::Data::DIGITS[index + 1];
-    *--buffer_end = internal::Data::DIGITS[index];
-    return buffer_end;
-  }
-
-  void FormatSigned(LongLong value) {
-    ULongLong abs_value = static_cast<ULongLong>(value);
-    bool negative = value < 0;
-    if (negative)
-      abs_value = 0 - abs_value;
-    str_ = format_decimal(abs_value);
-    if (negative)
-      *--str_ = '-';
-  }
-
- public:
-  explicit FormatInt(int value) { FormatSigned(value); }
-  explicit FormatInt(long value) { FormatSigned(value); }
-  explicit FormatInt(LongLong value) { FormatSigned(value); }
-  explicit FormatInt(unsigned value) : str_(format_decimal(value)) {}
-  explicit FormatInt(unsigned long value) : str_(format_decimal(value)) {}
-  explicit FormatInt(ULongLong value) : str_(format_decimal(value)) {}
-
-  /** Returns the number of characters written to the output buffer. */
-  std::size_t size() const {
-    return internal::to_unsigned(buffer_ - str_ + BUFFER_SIZE - 1);
-  }
-
-  /**
-    Returns a pointer to the output buffer content. No terminating null
-    character is appended.
-   */
-  const char *data() const { return str_; }
-
-  /**
-    Returns a pointer to the output buffer content with terminating null
-    character appended.
-   */
-  const char *c_str() const {
-    buffer_[BUFFER_SIZE - 1] = '\0';
-    return str_;
-  }
-
-  /**
-    \rst
-    Returns the content of the output buffer as an ``std::string``.
-    \endrst
-   */
-  std::string str() const { return std::string(str_, size()); }
-};
-
-// Formats a decimal integer value writing into buffer and returns
-// a pointer to the end of the formatted string. This function doesn't
-// write a terminating null character.
-template <typename T>
-inline void format_decimal(char *&buffer, T value) {
-  typedef typename internal::IntTraits<T>::MainType MainType;
-  MainType abs_value = static_cast<MainType>(value);
-  if (internal::is_negative(value)) {
-    *buffer++ = '-';
-    abs_value = 0 - abs_value;
-  }
-  if (abs_value < 100) {
-    if (abs_value < 10) {
-      *buffer++ = static_cast<char>('0' + abs_value);
-      return;
-    }
-    unsigned index = static_cast<unsigned>(abs_value * 2);
-    *buffer++ = internal::Data::DIGITS[index];
-    *buffer++ = internal::Data::DIGITS[index + 1];
-    return;
-  }
-  unsigned num_digits = internal::count_digits(abs_value);
-  internal::format_decimal(buffer, abs_value, num_digits);
-  buffer += num_digits;
-}
-
-/**
-  \rst
-  Returns a named argument for formatting functions.
-
-  **Example**::
-
-    print("Elapsed time: {s:.2f} seconds", arg("s", 1.23));
-
-  \endrst
- */
-template <typename T>
-inline internal::NamedArgWithType<char, T> arg(StringRef name, const T &arg) {
-  return internal::NamedArgWithType<char, T>(name, arg);
-}
-
-template <typename T>
-inline internal::NamedArgWithType<wchar_t, T> arg(WStringRef name, const T &arg) {
-  return internal::NamedArgWithType<wchar_t, T>(name, arg);
-}
-
-// The following two functions are deleted intentionally to disable
-// nested named arguments as in ``format("{}", arg("a", arg("b", 42)))``.
-template <typename Char>
-void arg(StringRef, const internal::NamedArg<Char>&) FMT_DELETED_OR_UNDEFINED;
-template <typename Char>
-void arg(WStringRef, const internal::NamedArg<Char>&) FMT_DELETED_OR_UNDEFINED;
-}
-
-#if FMT_GCC_VERSION
-// Use the system_header pragma to suppress warnings about variadic macros
-// because suppressing -Wvariadic-macros with the diagnostic pragma doesn't
-// work. It is used at the end because we want to suppress as little warnings
-// as possible.
-# pragma GCC system_header
-#endif
-
-// This is used to work around VC++ bugs in handling variadic macros.
-#define FMT_EXPAND(args) args
-
-// Returns the number of arguments.
-// Based on https://groups.google.com/forum/#!topic/comp.std.c/d-6Mj5Lko_s.
-#define FMT_NARG(...) FMT_NARG_(__VA_ARGS__, FMT_RSEQ_N())
-#define FMT_NARG_(...) FMT_EXPAND(FMT_ARG_N(__VA_ARGS__))
-#define FMT_ARG_N(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, N, ...) N
-#define FMT_RSEQ_N() 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
-#define FMT_FOR_EACH_(N, f, ...) \
-  FMT_EXPAND(FMT_CONCAT(FMT_FOR_EACH, N)(f, __VA_ARGS__))
-#define FMT_FOR_EACH(f, ...) \
-  FMT_EXPAND(FMT_FOR_EACH_(FMT_NARG(__VA_ARGS__), f, __VA_ARGS__))
-
-#define FMT_ADD_ARG_NAME(type, index) type arg##index
-#define FMT_GET_ARG_NAME(type, index) arg##index
-
-#if FMT_USE_VARIADIC_TEMPLATES
-# define FMT_VARIADIC_(Char, ReturnType, func, call, ...) \
-  template <typename... Args> \
-  ReturnType func(FMT_FOR_EACH(FMT_ADD_ARG_NAME, __VA_ARGS__), \
-      const Args & ... args) { \
-    typedef fmt::internal::ArgArray<sizeof...(Args)> ArgArray; \
-    typename ArgArray::Type array{ \
-      ArgArray::template make<fmt::BasicFormatter<Char> >(args)...}; \
-    call(FMT_FOR_EACH(FMT_GET_ARG_NAME, __VA_ARGS__), \
-      fmt::ArgList(fmt::internal::make_type(args...), array)); \
-  }
-#else
-// Defines a wrapper for a function taking __VA_ARGS__ arguments
-// and n additional arguments of arbitrary types.
-# define FMT_WRAP(Char, ReturnType, func, call, n, ...) \
-  template <FMT_GEN(n, FMT_MAKE_TEMPLATE_ARG)> \
-  inline ReturnType func(FMT_FOR_EACH(FMT_ADD_ARG_NAME, __VA_ARGS__), \
-      FMT_GEN(n, FMT_MAKE_ARG)) { \
-    fmt::internal::ArgArray<n>::Type arr; \
-    FMT_GEN(n, FMT_ASSIGN_##Char); \
-    call(FMT_FOR_EACH(FMT_GET_ARG_NAME, __VA_ARGS__), fmt::ArgList( \
-      fmt::internal::make_type(FMT_GEN(n, FMT_MAKE_REF2)), arr)); \
-  }
-
-# define FMT_VARIADIC_(Char, ReturnType, func, call, ...) \
-  inline ReturnType func(FMT_FOR_EACH(FMT_ADD_ARG_NAME, __VA_ARGS__)) { \
-    call(FMT_FOR_EACH(FMT_GET_ARG_NAME, __VA_ARGS__), fmt::ArgList()); \
-  } \
-  FMT_WRAP(Char, ReturnType, func, call, 1, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 2, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 3, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 4, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 5, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 6, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 7, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 8, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 9, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 10, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 11, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 12, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 13, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 14, __VA_ARGS__) \
-  FMT_WRAP(Char, ReturnType, func, call, 15, __VA_ARGS__)
-#endif  // FMT_USE_VARIADIC_TEMPLATES
-
-/**
-  \rst
-  Defines a variadic function with the specified return type, function name
-  and argument types passed as variable arguments to this macro.
-
-  **Example**::
-
-    void print_error(const char *file, int line, const char *format,
-                     fmt::ArgList args) {
-      fmt::print("{}: {}: ", file, line);
-      fmt::print(format, args);
-    }
-    FMT_VARIADIC(void, print_error, const char *, int, const char *)
-
-  ``FMT_VARIADIC`` is used for compatibility with legacy C++ compilers that
-  don't implement variadic templates. You don't have to use this macro if
-  you don't need legacy compiler support and can use variadic templates
-  directly::
-
-    template <typename... Args>
-    void print_error(const char *file, int line, const char *format,
-                     const Args & ... args) {
-      fmt::print("{}: {}: ", file, line);
-      fmt::print(format, args...);
-    }
-  \endrst
- */
-#define FMT_VARIADIC(ReturnType, func, ...) \
-  FMT_VARIADIC_(char, ReturnType, func, return func, __VA_ARGS__)
-
-#define FMT_VARIADIC_W(ReturnType, func, ...) \
-  FMT_VARIADIC_(wchar_t, ReturnType, func, return func, __VA_ARGS__)
-
-#define FMT_CAPTURE_ARG_(id, index) ::fmt::arg(#id, id)
-
-#define FMT_CAPTURE_ARG_W_(id, index) ::fmt::arg(L###id, id)
-
-/**
-  \rst
-  Convenient macro to capture the arguments' names and values into several
-  ``fmt::arg(name, value)``.
-
-  **Example**::
-
-    int x = 1, y = 2;
-    print("point: ({x}, {y})", FMT_CAPTURE(x, y));
-    // same as:
-    // print("point: ({x}, {y})", arg("x", x), arg("y", y));
-
-  \endrst
- */
-#define FMT_CAPTURE(...) FMT_FOR_EACH(FMT_CAPTURE_ARG_, __VA_ARGS__)
-
-#define FMT_CAPTURE_W(...) FMT_FOR_EACH(FMT_CAPTURE_ARG_W_, __VA_ARGS__)
-
-namespace fmt {
-FMT_VARIADIC(std::string, format, CStringRef)
-FMT_VARIADIC_W(std::wstring, format, WCStringRef)
-FMT_VARIADIC(void, print, CStringRef)
-FMT_VARIADIC(void, print, std::FILE *, CStringRef)
-FMT_VARIADIC(void, print_colored, Color, CStringRef)
-
-namespace internal {
-template <typename Char>
-inline bool is_name_start(Char c) {
-  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c;
-}
-
-// Parses an unsigned integer advancing s to the end of the parsed input.
-// This function assumes that the first character of s is a digit.
-template <typename Char>
-unsigned parse_nonnegative_int(const Char *&s) {
-  assert('0' <= *s && *s <= '9');
-  unsigned value = 0;
-  do {
-    unsigned new_value = value * 10 + (*s++ - '0');
-    // Check if value wrapped around.
-    if (new_value < value) {
-      value = (std::numeric_limits<unsigned>::max)();
-      break;
-    }
-    value = new_value;
-  } while ('0' <= *s && *s <= '9');
-  // Convert to unsigned to prevent a warning.
-  unsigned max_int = (std::numeric_limits<int>::max)();
-  if (value > max_int)
-    FMT_THROW(FormatError("number is too big"));
-  return value;
-}
-
-inline void require_numeric_argument(const Arg &arg, char spec) {
-  if (arg.type > Arg::LAST_NUMERIC_TYPE) {
-    std::string message =
-        fmt::format("format specifier '{}' requires numeric argument", spec);
-    FMT_THROW(fmt::FormatError(message));
-  }
-}
-
-template <typename Char>
-void check_sign(const Char *&s, const Arg &arg) {
-  char sign = static_cast<char>(*s);
-  require_numeric_argument(arg, sign);
-  if (arg.type == Arg::UINT || arg.type == Arg::ULONG_LONG) {
-    FMT_THROW(FormatError(fmt::format(
-      "format specifier '{}' requires signed argument", sign)));
-  }
-  ++s;
-}
-}  // namespace internal
-
-template <typename Char, typename AF>
-inline internal::Arg BasicFormatter<Char, AF>::get_arg(
-    BasicStringRef<Char> arg_name, const char *&error) {
-  if (check_no_auto_index(error)) {
-    map_.init(args());
-    const internal::Arg *arg = map_.find(arg_name);
-    if (arg)
-      return *arg;
-    error = "argument not found";
-  }
-  return internal::Arg();
-}
-
-template <typename Char, typename AF>
-inline internal::Arg BasicFormatter<Char, AF>::parse_arg_index(const Char *&s) {
-  const char *error = FMT_NULL;
-  internal::Arg arg = *s < '0' || *s > '9' ?
-        next_arg(error) : get_arg(internal::parse_nonnegative_int(s), error);
-  if (error) {
-    FMT_THROW(FormatError(
-                *s != '}' && *s != ':' ? "invalid format string" : error));
-  }
-  return arg;
-}
-
-template <typename Char, typename AF>
-inline internal::Arg BasicFormatter<Char, AF>::parse_arg_name(const Char *&s) {
-  assert(internal::is_name_start(*s));
-  const Char *start = s;
-  Char c;
-  do {
-    c = *++s;
-  } while (internal::is_name_start(c) || ('0' <= c && c <= '9'));
-  const char *error = FMT_NULL;
-  internal::Arg arg = get_arg(BasicStringRef<Char>(start, s - start), error);
-  if (error)
-    FMT_THROW(FormatError(error));
-  return arg;
-}
-
-template <typename Char, typename ArgFormatter>
-const Char *BasicFormatter<Char, ArgFormatter>::format(
-    const Char *&format_str, const internal::Arg &arg) {
-  using internal::Arg;
-  const Char *s = format_str;
-  typename ArgFormatter::SpecType spec;
-  if (*s == ':') {
-    if (arg.type == Arg::CUSTOM) {
-      arg.custom.format(this, arg.custom.value, &s);
-      return s;
-    }
-    ++s;
-    // Parse fill and alignment.
-    if (Char c = *s) {
-      const Char *p = s + 1;
-      spec.align_ = ALIGN_DEFAULT;
-      do {
-        switch (*p) {
-          case '<':
-            spec.align_ = ALIGN_LEFT;
-            break;
-          case '>':
-            spec.align_ = ALIGN_RIGHT;
-            break;
-          case '=':
-            spec.align_ = ALIGN_NUMERIC;
-            break;
-          case '^':
-            spec.align_ = ALIGN_CENTER;
-            break;
-        }
-        if (spec.align_ != ALIGN_DEFAULT) {
-          if (p != s) {
-            if (c == '}') break;
-            if (c == '{')
-              FMT_THROW(FormatError("invalid fill character '{'"));
-            s += 2;
-            spec.fill_ = c;
-          } else ++s;
-          if (spec.align_ == ALIGN_NUMERIC)
-            require_numeric_argument(arg, '=');
-          break;
-        }
-      } while (--p >= s);
-    }
-
-    // Parse sign.
-    switch (*s) {
-      case '+':
-        check_sign(s, arg);
-        spec.flags_ |= SIGN_FLAG | PLUS_FLAG;
-        break;
-      case '-':
-        check_sign(s, arg);
-        spec.flags_ |= MINUS_FLAG;
-        break;
-      case ' ':
-        check_sign(s, arg);
-        spec.flags_ |= SIGN_FLAG;
-        break;
-    }
-
-    if (*s == '#') {
-      require_numeric_argument(arg, '#');
-      spec.flags_ |= HASH_FLAG;
-      ++s;
-    }
-
-    // Parse zero flag.
-    if (*s == '0') {
-      require_numeric_argument(arg, '0');
-      spec.align_ = ALIGN_NUMERIC;
-      spec.fill_ = '0';
-      ++s;
-    }
-
-    // Parse width.
-    if ('0' <= *s && *s <= '9') {
-      spec.width_ = internal::parse_nonnegative_int(s);
-    } else if (*s == '{') {
-      ++s;
-      Arg width_arg = internal::is_name_start(*s) ?
-            parse_arg_name(s) : parse_arg_index(s);
-      if (*s++ != '}')
-        FMT_THROW(FormatError("invalid format string"));
-      ULongLong value = 0;
-      switch (width_arg.type) {
-      case Arg::INT:
-        if (width_arg.int_value < 0)
-          FMT_THROW(FormatError("negative width"));
-        value = width_arg.int_value;
-        break;
-      case Arg::UINT:
-        value = width_arg.uint_value;
-        break;
-      case Arg::LONG_LONG:
-        if (width_arg.long_long_value < 0)
-          FMT_THROW(FormatError("negative width"));
-        value = width_arg.long_long_value;
-        break;
-      case Arg::ULONG_LONG:
-        value = width_arg.ulong_long_value;
-        break;
-      default:
-        FMT_THROW(FormatError("width is not integer"));
-      }
-      if (value > (std::numeric_limits<int>::max)())
-        FMT_THROW(FormatError("number is too big"));
-      spec.width_ = static_cast<int>(value);
-    }
-
-    // Parse precision.
-    if (*s == '.') {
-      ++s;
-      spec.precision_ = 0;
-      if ('0' <= *s && *s <= '9') {
-        spec.precision_ = internal::parse_nonnegative_int(s);
-      } else if (*s == '{') {
-        ++s;
-        Arg precision_arg = internal::is_name_start(*s) ?
-              parse_arg_name(s) : parse_arg_index(s);
-        if (*s++ != '}')
-          FMT_THROW(FormatError("invalid format string"));
-        ULongLong value = 0;
-        switch (precision_arg.type) {
-          case Arg::INT:
-            if (precision_arg.int_value < 0)
-              FMT_THROW(FormatError("negative precision"));
-            value = precision_arg.int_value;
-            break;
-          case Arg::UINT:
-            value = precision_arg.uint_value;
-            break;
-          case Arg::LONG_LONG:
-            if (precision_arg.long_long_value < 0)
-              FMT_THROW(FormatError("negative precision"));
-            value = precision_arg.long_long_value;
-            break;
-          case Arg::ULONG_LONG:
-            value = precision_arg.ulong_long_value;
-            break;
-          default:
-            FMT_THROW(FormatError("precision is not integer"));
-        }
-        if (value > (std::numeric_limits<int>::max)())
-          FMT_THROW(FormatError("number is too big"));
-        spec.precision_ = static_cast<int>(value);
-      } else {
-        FMT_THROW(FormatError("missing precision specifier"));
-      }
-      if (arg.type <= Arg::LAST_INTEGER_TYPE || arg.type == Arg::POINTER) {
-        FMT_THROW(FormatError(
-            fmt::format("precision not allowed in {} format specifier",
-            arg.type == Arg::POINTER ? "pointer" : "integer")));
-      }
-    }
-
-    // Parse type.
-    if (*s != '}' && *s)
-      spec.type_ = static_cast<char>(*s++);
-  }
-
-  if (*s++ != '}')
-    FMT_THROW(FormatError("missing '}' in format string"));
-
-  // Format argument.
-  ArgFormatter(*this, spec, s - 1).visit(arg);
-  return s;
-}
-
-template <typename Char, typename AF>
-void BasicFormatter<Char, AF>::format(BasicCStringRef<Char> format_str) {
-  const Char *s = format_str.c_str();
-  const Char *start = s;
-  while (*s) {
-    Char c = *s++;
-    if (c != '{' && c != '}') continue;
-    if (*s == c) {
-      write(writer_, start, s);
-      start = ++s;
-      continue;
-    }
-    if (c == '}')
-      FMT_THROW(FormatError("unmatched '}' in format string"));
-    write(writer_, start, s - 1);
-    internal::Arg arg = internal::is_name_start(*s) ?
-          parse_arg_name(s) : parse_arg_index(s);
-    start = s = format(s, arg);
-  }
-  write(writer_, start, s);
-}
-
-template <typename Char, typename It>
-struct ArgJoin {
-  It first;
-  It last;
-  BasicCStringRef<Char> sep;
-
-  ArgJoin(It first, It last, const BasicCStringRef<Char>& sep) :
-    first(first),
-    last(last),
-    sep(sep) {}
-};
-
-template <typename It>
-ArgJoin<char, It> join(It first, It last, const BasicCStringRef<char>& sep) {
-  return ArgJoin<char, It>(first, last, sep);
-}
-
-template <typename It>
-ArgJoin<wchar_t, It> join(It first, It last, const BasicCStringRef<wchar_t>& sep) {
-  return ArgJoin<wchar_t, It>(first, last, sep);
-}
-
-#if FMT_HAS_GXX_CXX11
-template <typename Range>
-auto join(const Range& range, const BasicCStringRef<char>& sep)
-    -> ArgJoin<char, decltype(std::begin(range))> {
-  return join(std::begin(range), std::end(range), sep);
-}
-
-template <typename Range>
-auto join(const Range& range, const BasicCStringRef<wchar_t>& sep)
-    -> ArgJoin<wchar_t, decltype(std::begin(range))> {
-  return join(std::begin(range), std::end(range), sep);
-}
-#endif
-
-template <typename ArgFormatter, typename Char, typename It>
-void format_arg(fmt::BasicFormatter<Char, ArgFormatter> &f,
-    const Char *&format_str, const ArgJoin<Char, It>& e) {
-  const Char* end = format_str;
-  if (*end == ':')
-    ++end;
-  while (*end && *end != '}')
-    ++end;
-  if (*end != '}')
-    FMT_THROW(FormatError("missing '}' in format string"));
-
-  It it = e.first;
-  if (it != e.last) {
-    const Char* save = format_str;
-    f.format(format_str, internal::MakeArg<fmt::BasicFormatter<Char, ArgFormatter> >(*it++));
-    while (it != e.last) {
-      f.writer().write(e.sep);
-      format_str = save;
-      f.format(format_str, internal::MakeArg<fmt::BasicFormatter<Char, ArgFormatter> >(*it++));
-    }
-  }
-  format_str = end + 1;
-}
-}  // namespace fmt
-
-#if FMT_USE_USER_DEFINED_LITERALS
-namespace fmt {
-namespace internal {
-
-template <typename Char>
-struct UdlFormat {
-  const Char *str;
-
-  template <typename... Args>
-  auto operator()(Args && ... args) const
-                  -> decltype(format(str, std::forward<Args>(args)...)) {
-    return format(str, std::forward<Args>(args)...);
-  }
-};
-
-template <typename Char>
-struct UdlArg {
-  const Char *str;
-
-  template <typename T>
-  NamedArgWithType<Char, T> operator=(T &&value) const {
-    return {str, std::forward<T>(value)};
-  }
-};
-
-} // namespace internal
-
-inline namespace literals {
-
-/**
-  \rst
-  C++11 literal equivalent of :func:`fmt::format`.
-
-  **Example**::
-
-    using namespace fmt::literals;
-    std::string message = "The answer is {}"_format(42);
-  \endrst
- */
-inline internal::UdlFormat<char>
-operator"" _format(const char *s, std::size_t) { return {s}; }
-inline internal::UdlFormat<wchar_t>
-operator"" _format(const wchar_t *s, std::size_t) { return {s}; }
-
-/**
-  \rst
-  C++11 literal equivalent of :func:`fmt::arg`.
-
-  **Example**::
-
-    using namespace fmt::literals;
-    print("Elapsed time: {s:.2f} seconds", "s"_a=1.23);
-  \endrst
- */
-inline internal::UdlArg<char>
-operator"" _a(const char *s, std::size_t) { return {s}; }
-inline internal::UdlArg<wchar_t>
-operator"" _a(const wchar_t *s, std::size_t) { return {s}; }
-
-} // inline namespace literals
-} // namespace fmt
-#endif // FMT_USE_USER_DEFINED_LITERALS
-
-// Restore warnings.
-#if FMT_GCC_VERSION >= 406
-# pragma GCC diagnostic pop
-#endif
-
-#if defined(__clang__) && !defined(FMT_ICC_VERSION)
-# pragma clang diagnostic pop
-#endif
-
-#ifdef FMT_HEADER_ONLY
-# define FMT_FUNC inline
-# include "format.cc"
-#else
-# define FMT_FUNC
-#endif
-
-#endif  // FMT_FORMAT_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/ostream.cc b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/ostream.cc
deleted file mode 100644
index 2d443f730..000000000
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/ostream.cc
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- Formatting library for C++ - std::ostream support
-
- Copyright (c) 2012 - 2016, Victor Zverovich
- All rights reserved.
-
- For the license information refer to format.h.
- */
-
-#include "ostream.h"
-
-namespace fmt {
-
-namespace internal {
-FMT_FUNC void write(std::ostream &os, Writer &w) {
-  const char *data = w.data();
-  typedef internal::MakeUnsigned<std::streamsize>::Type UnsignedStreamSize;
-  UnsignedStreamSize size = w.size();
-  UnsignedStreamSize max_size =
-      internal::to_unsigned((std::numeric_limits<std::streamsize>::max)());
-  do {
-    UnsignedStreamSize n = size <= max_size ? size : max_size;
-    os.write(data, static_cast<std::streamsize>(n));
-    data += n;
-    size -= n;
-  } while (size != 0);
-}
-}
-
-FMT_FUNC void print(std::ostream &os, CStringRef format_str, ArgList args) {
-  MemoryWriter w;
-  w.write(format_str, args);
-  internal::write(os, w);
-}
-}  // namespace fmt
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/ostream.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/ostream.h
deleted file mode 100644
index 84a02d173..000000000
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/fmt/ostream.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- Formatting library for C++ - std::ostream support
-
- Copyright (c) 2012 - 2016, Victor Zverovich
- All rights reserved.
-
- For the license information refer to format.h.
- */
-
-#ifndef FMT_OSTREAM_H_
-#define FMT_OSTREAM_H_
-
-#include "format.h"
-#include <ostream>
-
-namespace fmt {
-
-namespace internal {
-
-template <class Char>
-class FormatBuf : public std::basic_streambuf<Char> {
- private:
-  typedef typename std::basic_streambuf<Char>::int_type int_type;
-  typedef typename std::basic_streambuf<Char>::traits_type traits_type;
-
-  Buffer<Char> &buffer_;
-
- public:
-  FormatBuf(Buffer<Char> &buffer) : buffer_(buffer) {}
-
- protected:
-  // The put-area is actually always empty. This makes the implementation
-  // simpler and has the advantage that the streambuf and the buffer are always
-  // in sync and sputc never writes into uninitialized memory. The obvious
-  // disadvantage is that each call to sputc always results in a (virtual) call
-  // to overflow. There is no disadvantage here for sputn since this always
-  // results in a call to xsputn.
-
-  int_type overflow(int_type ch = traits_type::eof()) FMT_OVERRIDE {
-    if (!traits_type::eq_int_type(ch, traits_type::eof()))
-      buffer_.push_back(static_cast<Char>(ch));
-    return ch;
-  }
-
-  std::streamsize xsputn(const Char *s, std::streamsize count) FMT_OVERRIDE {
-    buffer_.append(s, s + count);
-    return count;
-  }
-};
-
-Yes &convert(std::ostream &);
-
-struct DummyStream : std::ostream {
-  DummyStream();  // Suppress a bogus warning in MSVC.
-  // Hide all operator<< overloads from std::ostream.
-  void operator<<(Null<>);
-};
-
-No &operator<<(std::ostream &, int);
-
-template<typename T>
-struct ConvertToIntImpl<T, true> {
-  // Convert to int only if T doesn't have an overloaded operator<<.
-  enum {
-    value = sizeof(convert(get<DummyStream>() << get<T>())) == sizeof(No)
-  };
-};
-
-// Write the content of w to os.
-FMT_API void write(std::ostream &os, Writer &w);
-}  // namespace internal
-
-// Formats a value.
-template <typename Char, typename ArgFormatter_, typename T>
-void format_arg(BasicFormatter<Char, ArgFormatter_> &f,
-                const Char *&format_str, const T &value) {
-  internal::MemoryBuffer<Char, internal::INLINE_BUFFER_SIZE> buffer;
-
-  internal::FormatBuf<Char> format_buf(buffer);
-  std::basic_ostream<Char> output(&format_buf);
-  output << value;
-
-  BasicStringRef<Char> str(&buffer[0], buffer.size());
-  typedef internal::MakeArg< BasicFormatter<Char> > MakeArg;
-  format_str = f.format(format_str, MakeArg(str));
-}
-
-/**
-  \rst
-  Prints formatted data to the stream *os*.
-
-  **Example**::
-
-    print(cerr, "Don't {}!", "panic");
-  \endrst
- */
-FMT_API void print(std::ostream &os, CStringRef format_str, ArgList args);
-FMT_VARIADIC(void, print, std::ostream &, CStringRef)
-}  // namespace fmt
-
-#ifdef FMT_HEADER_ONLY
-# include "ostream.cc"
-#endif
-
-#endif  // FMT_OSTREAM_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/grid.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/grid.hpp
index 737ee1f85..43998f8c2 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/grid.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/grid.hpp
@@ -55,7 +55,7 @@ struct GridRef
         inline
         Vertex      vertex(Index idx) const;
 
-        Index       index(const Vertex& v) const                { Index idx = 0; for (unsigned i = 0; i < D; ++i) { idx += ((Index) v[i]) * ((Index) stride_[i]); } return idx; }
+        Index       index(const Vertex& v) const                { Index idx = 0; for (unsigned i = 0; i < D; ++i) { idx += ((Index) v[i]) * stride_[i]; } return idx; }
 
         Index       size() const                                { return size(shape()); }
         void        swap(GridRef& other)                        { std::swap(data_, other.data_); std::swap(shape_, other.shape_); std::swap(stride_, other.stride_); std::swap(c_order_, other.c_order_); }
@@ -73,10 +73,9 @@ struct GridRef
         {
             Index cur = 1;
             if (c_order_)
-                for (unsigned i = D; i > 0; --i) { stride_[i-1] = cur; cur *= shape_[i-1]; }
+                for (unsigned i = D; i > 0; --i) { stride_[i-1] = cur; cur *= static_cast<Index>(shape_[i-1]); }
             else
-                for (unsigned i = 0; i < D; ++i) { stride_[i] = cur; cur *= shape_[i]; }
-
+                for (unsigned i = 0; i < D; ++i) { stride_[i] = cur; cur *= static_cast<Index>(shape_[i]); }
         }
         void    set_shape(const Vertex& v)                      { shape_ = v; set_stride(); }
         void    set_data(C* data)                               { data_ = data; }
@@ -85,7 +84,7 @@ struct GridRef
     private:
         C*      data_;
         Vertex  shape_;
-        Vertex  stride_;
+        diy::Point<Index, D> stride_;
         bool    c_order_;
 };
 
@@ -107,8 +106,8 @@ struct Grid: public GridRef<C,D>
                 Grid():
                     Parent(new C[0], Vertex::zero())            {}
         template<class Int>
-                Grid(const Point<Int, D>& shape, bool c_order = true):
-                    Parent(new C[size(shape)], shape, c_order)
+                Grid(const Point<Int, D>& s, bool c_order = true):
+                    Parent(new C[size(s)], s, c_order)
                 {}
 
                 Grid(Grid&& g): Grid()                          { Parent::swap(g); }
@@ -147,11 +146,11 @@ struct Grid: public GridRef<C,D>
 
     private:
         template<class OC>
-        void    copy_data(const OC* data)
+        void    copy_data(const OC* data_)
         {
             Index s = size(shape());
             for (Index i = 0; i < s; ++i)
-                Parent::data()[i] = data[i];
+                Parent::data()[i] = data_[i];
         }
 };
 
@@ -181,13 +180,13 @@ vertex(typename GridRef<C, D>::Index idx) const
     if (c_order())
         for (unsigned i = 0; i < D; ++i)
         {
-            v[i] = idx / stride_[i];
+            v[i] = static_cast<int>(idx / stride_[i]);
             idx %= stride_[i];
         }
     else
         for (int i = D-1; i >= 0; --i)
         {
-            v[i] = idx / stride_[i];
+            v[i] = static_cast<int>(idx / stride_[i]);
             idx %= stride_[i];
         }
     return v;
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/block.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/block.hpp
index 55c610b19..9f5a1852c 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/block.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/block.hpp
@@ -205,12 +205,11 @@ namespace io
         extra.reset();
 
         // Get local gids from assigner
-        size_t size = all_offset_counts.size();
-        assigner.set_nblocks(size);
+        assigner.set_nblocks(static_cast<int>(all_offset_counts.size()));
         std::vector<int> gids;
         assigner.local_gids(comm.rank(), gids);
 
-        for (unsigned i = 0; i < gids.size(); ++i)
+        for (size_t i = 0; i < gids.size(); ++i)
         {
             if (gids[i] != all_offset_counts[gids[i]].gid)
                 get_logger()->warn("gids don't match in diy::io::read_blocks(), {} vs {}",
@@ -342,7 +341,7 @@ namespace split
     }
 
     // Get local gids from assigner
-    assigner.set_nblocks(size);
+    assigner.set_nblocks(static_cast<int>(size));
     std::vector<int> gids;
     assigner.local_gids(comm.rank(), gids);
 
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/bov.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/bov.hpp
index 8e3cb7953..1c60d821b 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/bov.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/bov.hpp
@@ -2,11 +2,8 @@
 #define VTKMDIY_IO_BOV_HPP
 
 #include <vector>
-#include <algorithm>
-#include <numeric>
 
-#include "../types.hpp"
-#include "../mpi.hpp"
+#include "../mpi/io.hpp"
 
 namespace diy
 {
@@ -39,8 +36,9 @@ namespace io
             shape_.push_back(shape[i]);
             stride_.push_back(1);
         }
-        for (int i = shape_.size() - 2; i >=  0; --i)
+        for (auto i = shape_.size() - 2; i ==  0; --i)
           stride_[i] = stride_[i+1] * shape_[i+1];
+        stride_[0] = stride_[1] * shape_[1];
       }
 
       const Shape&  shape() const                                       { return shape_; }
@@ -71,50 +69,7 @@ void
 diy::io::BOV::
 read(const DiscreteBounds& bounds, T* buffer, bool collective, int chunk) const
 {
-#ifndef VTKM_DIY_NO_MPI
-  int dim   = shape_.size();
-  int total = 1;
-  std::vector<int> subsizes;
-  for (int i = 0; i < dim; ++i)
-  {
-    subsizes.push_back(bounds.max[i] - bounds.min[i] + 1);
-    total *= subsizes.back();
-  }
-
-  MPI_Datatype T_type;
-  if (chunk == 1)
-    T_type = mpi::detail::get_mpi_datatype<T>();
-  else
-  {
-    // create an MPI struct of size chunk to read the data in those chunks
-    // (this allows to work around MPI-IO weirdness where crucial quantities
-    // are ints, which are too narrow of a type)
-    int             array_of_blocklengths[]  = { chunk };
-    MPI_Aint        array_of_displacements[] = { 0 };
-    MPI_Datatype    array_of_types[]         = { mpi::detail::get_mpi_datatype<T>() };
-    MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements, array_of_types, &T_type);
-    MPI_Type_commit(&T_type);
-  }
-
-  MPI_Datatype fileblk;
-  MPI_Type_create_subarray(dim, (int*) &shape_[0], &subsizes[0], (int*) &bounds.min[0], MPI_ORDER_C, T_type, &fileblk);
-  MPI_Type_commit(&fileblk);
-
-  MPI_File_set_view(f_.handle(), offset_, T_type, fileblk, (char*)"native", MPI_INFO_NULL);
-
-  mpi::status s;
-  if (!collective)
-      MPI_File_read(f_.handle(), buffer, total, T_type, &s.s);
-  else
-      MPI_File_read_all(f_.handle(), buffer, total, T_type, &s.s);
-
-  if (chunk != 1)
-    MPI_Type_free(&T_type);
-  MPI_Type_free(&fileblk);
-#else
-  (void) bounds; (void) buffer; (void) collective; (void)chunk;
-  DIY_UNSUPPORTED_MPI_CALL(diy::io::BOV::read);
-#endif
+  f_.read_bov(bounds, static_cast<int>(shape_.size()), shape_.data(), reinterpret_cast<char*>(buffer), offset_, mpi::detail::get_mpi_datatype<T>(), collective, chunk);
 }
 
 template<class T>
@@ -130,52 +85,7 @@ void
 diy::io::BOV::
 write(const DiscreteBounds& bounds, const T* buffer, const DiscreteBounds& core, bool collective, int chunk)
 {
-#ifndef VTKM_DIY_NO_MPI
-  int dim   = shape_.size();
-  std::vector<int> subsizes;
-  std::vector<int> buffer_shape, buffer_start;
-  for (int i = 0; i < dim; ++i)
-  {
-    buffer_shape.push_back(bounds.max[i] - bounds.min[i] + 1);
-    buffer_start.push_back(core.min[i] - bounds.min[i]);
-    subsizes.push_back(core.max[i] - core.min[i] + 1);
-  }
-
-  MPI_Datatype T_type;
-  if (chunk == 1)
-    T_type = mpi::detail::get_mpi_datatype<T>();
-  else
-  {
-    // assume T is a binary block and create an MPI struct of appropriate size
-    int             array_of_blocklengths[]  = { chunk };
-    MPI_Aint        array_of_displacements[] = { 0 };
-    MPI_Datatype    array_of_types[]         = { mpi::detail::get_mpi_datatype<T>() };
-    MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements, array_of_types, &T_type);
-    MPI_Type_commit(&T_type);
-  }
-
-  MPI_Datatype fileblk, subbuffer;
-  MPI_Type_create_subarray(dim, (int*) &shape_[0],       &subsizes[0], (int*) &core.min[0],     MPI_ORDER_C, T_type, &fileblk);
-  MPI_Type_create_subarray(dim, (int*) &buffer_shape[0], &subsizes[0], (int*) &buffer_start[0], MPI_ORDER_C, T_type, &subbuffer);
-  MPI_Type_commit(&fileblk);
-  MPI_Type_commit(&subbuffer);
-
-  MPI_File_set_view(f_.handle(), offset_, T_type, fileblk, (char*)"native", MPI_INFO_NULL);
-
-  mpi::status s;
-  if (!collective)
-    MPI_File_write(f_.handle(), (void*)buffer, 1, subbuffer, &s.s);
-  else
-    MPI_File_write_all(f_.handle(), (void*)buffer, 1, subbuffer, &s.s);
-
-  if (chunk != 1)
-    MPI_Type_free(&T_type);
-  MPI_Type_free(&fileblk);
-  MPI_Type_free(&subbuffer);
-#else
-  (void) bounds; (void) buffer;(void) core; (void) collective; (void) chunk;
-  DIY_UNSUPPORTED_MPI_CALL(diy::io::bov::write);
-#endif
+  f_.write_bov(bounds, core, static_cast<int>(shape_.size()), shape_.data(), reinterpret_cast<const char*>(buffer), offset_, mpi::detail::get_mpi_datatype<T>(), collective, chunk);
 }
 
 #endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/numpy.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/numpy.hpp
index 4d74a589e..9c506b91e 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/numpy.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/numpy.hpp
@@ -79,21 +79,21 @@ parse_npy_header(BOV::Shape& shape, bool& fortran_order)
     header = header.substr(11, nl - 11 + 1);
     size_t header_size = nl + 1;
 
-    int loc1, loc2;
+    size_t loc1, loc2;
 
     //fortran order
     loc1 = header.find("fortran_order")+16;
     fortran_order = (header.substr(loc1,4) == "True" ? true : false);
 
     //shape
-    unsigned ndims;
+    size_t ndims;
     loc1 = header.find("(");
     loc2 = header.find(")");
     std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
     if(str_shape[str_shape.size()-1] == ',') ndims = 1;
     else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1;
     shape.resize(ndims);
-    for(unsigned int i = 0;i < ndims;i++) {
+    for(size_t i = 0;i < ndims;i++) {
         loc1 = str_shape.find(",");
         shape[i] = atoi(str_shape.substr(0,loc1).c_str());
         str_shape = str_shape.substr(loc1+1);
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/shared.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/shared.hpp
index a49db1399..c143b241f 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/shared.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/shared.hpp
@@ -31,8 +31,8 @@ class SharedOutFile: public std::ostringstream
 
                 // write the file serially
                 std::ofstream out(filename_);
-                for (auto& contents : all_contents)
-                    out.write(contents.data(), contents.size());
+                for (auto& cntnts : all_contents)
+                    out.write(cntnts.data(), cntnts.size());
             } else
                 diy::mpi::gather(world_, contents, root_);
         }
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/utils.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/utils.hpp
index 092c64e9c..b096289c6 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/utils.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/io/utils.hpp
@@ -14,7 +14,7 @@
 #include <cstdlib>      // mkstemp() on Linux
 #include <sys/stat.h>
 
-#include "../constants.h" // for DIY_UNUSED
+#include "../constants.h" // for VTKMDIY_UNUSED
 
 namespace diy
 {
@@ -82,8 +82,8 @@ namespace utils
       _close(fd);
     }
 #else
-    auto r = ::truncate(filename.c_str(), static_cast<off_t>(length));
-    (void) r;
+    int error = ::truncate(filename.c_str(), static_cast<off_t>(length));
+    VTKMDIY_UNUSED(error);
 #endif
   }
 
@@ -141,7 +141,7 @@ namespace utils
   inline void sync(int fd)
   {
 #if defined(_WIN32)
-    DIY_UNUSED(fd);
+    VTKMDIY_UNUSED(fd);
 #else
     fsync(fd);
 #endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/link.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/link.hpp
index 680fedbd4..0bc892abc 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/link.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/link.hpp
@@ -1,5 +1,5 @@
-#ifndef VTKMDIY_COVER_HPP
-#define VTKMDIY_COVER_HPP
+#ifndef VTKMDIY_LINK_HPP
+#define VTKMDIY_LINK_HPP
 
 #include <vector>
 #include <map>
@@ -9,14 +9,22 @@
 #include "serialization.hpp"
 #include "assigner.hpp"
 
+#include "factory.hpp"
+
 namespace diy
 {
   // Local view of a distributed representation of a cover, a completely unstructured link
-  class Link
+  class Link: public Factory<Link>
   {
     public:
       using Neighbors = std::vector<BlockID>;
 
+                Link(Key)                           {}  // for Factory
+                Link()                              = default;
+                Link(const Link&)                   = default;
+                Link(Link&&)                        = default;
+      Link&     operator=(const Link&)              = default;
+      Link&     operator=(Link&&)                   = default;
       virtual   ~Link()                             {}  // need to be able to delete derived classes
 
       int       size() const                        { return static_cast<int>(neighbors_.size()); }
@@ -38,11 +46,11 @@ namespace diy
       Neighbors&
                 neighbors()                         { return neighbors_; }
 
+      virtual Link* clone() const                   { return new Link(*this); }
+
       virtual void  save(BinaryBuffer& bb) const    { diy::save(bb, neighbors_); }
       virtual void  load(BinaryBuffer& bb)          { diy::load(bb, neighbors_); }
 
-      virtual size_t id() const                     { return 0; }
-
     private:
       Neighbors neighbors_;
   };
@@ -50,32 +58,13 @@ namespace diy
   template<class Bounds_>
   class RegularLink;
 
-  typedef       RegularLink<DiscreteBounds>         RegularGridLink;
-  typedef       RegularLink<ContinuousBounds>       RegularContinuousLink;
-
-  // Selector between regular discrete and contious links given bounds type
-  template<class Bounds_>
-  struct RegularLinkSelector;
-
-  template<>
-  struct RegularLinkSelector<DiscreteBounds>
-  {
-    typedef     RegularGridLink         type;
-    static const size_t id = 1;
-  };
-
-  template<>
-  struct RegularLinkSelector<ContinuousBounds>
-  {
-    typedef     RegularContinuousLink   type;
-    static const size_t id = 2;
-  };
-
+  using RegularGridLink         = RegularLink<DiscreteBounds>;
+  using RegularContinuousLink   = RegularLink<ContinuousBounds>;
 
   // for a regular decomposition, it makes sense to address the neighbors by direction
   // and store local and neighbor bounds
   template<class Bounds_>
-  class RegularLink: public Link
+  class RegularLink: public Link::Registrar<RegularLink<Bounds_>>
   {
     public:
       typedef   Bounds_                             Bounds;
@@ -84,6 +73,8 @@ namespace diy
       typedef   std::vector<Direction>              DirVec;
 
     public:
+                RegularLink():
+                  dim_(0), core_(0), bounds_(0)               {}        // for Factory
                 RegularLink(int dim, const Bounds& core__, const Bounds& bounds__):
                   dim_(dim), core_(core__), bounds_(bounds__) {}
 
@@ -93,7 +84,7 @@ namespace diy
       // direction
       int       direction(Direction dir) const;         // convert direction to a neighbor (-1 if no neighbor)
       Direction direction(int i) const                  { return dir_vec_[i]; }
-      void      add_direction(Direction dir)            { int c = dir_map_.size(); dir_map_[dir] = c; dir_vec_.push_back(dir); }
+      void      add_direction(Direction dir)            { auto c = static_cast<int>(dir_map_.size()); dir_map_[dir] = c; dir_vec_.push_back(dir); }
 
       // wrap
       void       add_wrap(Direction dir)                { wrap_.push_back(dir); }
@@ -105,12 +96,16 @@ namespace diy
       Bounds&       core()                              { return core_; }
       const Bounds& bounds() const                      { return bounds_; }
       Bounds&       bounds()                            { return bounds_; }
+      const Bounds& core(int i) const                   { return nbr_cores_[i]; }
       const Bounds& bounds(int i) const                 { return nbr_bounds_[i]; }
+      void          add_core(const Bounds& core__)      { nbr_cores_.push_back(core__); }
       void          add_bounds(const Bounds& bounds__)  { nbr_bounds_.push_back(bounds__); }
 
       void      swap(RegularLink& other)                { Link::swap(other); dir_map_.swap(other.dir_map_); dir_vec_.swap(other.dir_vec_); nbr_bounds_.swap(other.nbr_bounds_); std::swap(dim_, other.dim_); wrap_.swap(other.wrap_); std::swap(core_, other.core_); std::swap(bounds_, other.bounds_); }
 
-      void      save(BinaryBuffer& bb) const
+      Link*     clone() const override                  { return new RegularLink(*this); }
+
+      void      save(BinaryBuffer& bb) const override
       {
           Link::save(bb);
           diy::save(bb, dim_);
@@ -118,11 +113,12 @@ namespace diy
           diy::save(bb, dir_vec_);
           diy::save(bb, core_);
           diy::save(bb, bounds_);
+          diy::save(bb, nbr_cores_);
           diy::save(bb, nbr_bounds_);
           diy::save(bb, wrap_);
       }
 
-      void      load(BinaryBuffer& bb)
+      void      load(BinaryBuffer& bb) override
       {
           Link::load(bb);
           diy::load(bb, dim_);
@@ -130,12 +126,11 @@ namespace diy
           diy::load(bb, dir_vec_);
           diy::load(bb, core_);
           diy::load(bb, bounds_);
+          diy::load(bb, nbr_cores_);
           diy::load(bb, nbr_bounds_);
           diy::load(bb, wrap_);
       }
 
-      virtual size_t id() const                         { return RegularLinkSelector<Bounds>::id; }
-
     private:
       int       dim_;
 
@@ -144,31 +139,139 @@ namespace diy
 
       Bounds                    core_;
       Bounds                    bounds_;
+      std::vector<Bounds>       nbr_cores_;
       std::vector<Bounds>       nbr_bounds_;
       std::vector<Direction>    wrap_;
   };
 
-  // Other cover candidates: KDTreeLink, AMRGridLink
+  struct AMRLink: public Link::Registrar<AMRLink>
+  {
+    public:
+      using Bounds      = DiscreteBounds;
+      using Directions  = std::vector<Direction>;
+      using Point       = Bounds::Point;
+
+      struct Description
+      {
+          int       level       { -1 };
+          Point     refinement  { 0 };      // refinement of this level w.r.t. level 0
+          Bounds    core        { 0 };
+          Bounds    bounds      { 0 };      // with ghosts
+
+                    Description() = default;
+                    Description(int level_, Point refinement_, Bounds core_, Bounds bounds_):
+                        level(level_), refinement(refinement_), core(core_), bounds(bounds_)    {}
+      };
+      using Descriptions = std::vector<Description>;
+
+    public:
+                    AMRLink(int dim, int level, Point refinement, const Bounds& core, const Bounds& bounds):
+                        dim_(dim), local_ { level, refinement, core, bounds }               {}
+                    AMRLink(int dim, int level, int refinement, const Bounds& core, const Bounds& bounds):
+                        AMRLink(dim, level, refinement * Point::one(dim), core, bounds)     {}
+                    AMRLink(): AMRLink(0, -1, 0, Bounds(0), Bounds(0))                      {}        // for Factory
+
+      // dimension
+      int           dimension() const                       { return dim_; }
+
+      // local information
+      int           level() const                           { return local_.level; }
+      int           level(int i) const                      { return nbr_descriptions_[i].level; }
+      Point         refinement() const                      { return local_.refinement; }
+      Point         refinement(int i) const                 { return nbr_descriptions_[i].refinement; }
+
+      // wrap
+      void          add_wrap(Direction dir)                 { wrap_.push_back(dir); }
+      const Directions&
+                    wrap() const                            { return wrap_; }
+
+      // bounds
+      const Bounds& core() const                            { return local_.core; }
+      Bounds&       core()                                  { return local_.core; }
+      const Bounds& bounds() const                          { return local_.bounds; }
+      Bounds&       bounds()                                { return local_.bounds; }
+      const Bounds& core(int i) const                       { return nbr_descriptions_[i].core; }
+      const Bounds& bounds(int i) const                     { return nbr_descriptions_[i].bounds; }
+      void          add_bounds(int level_,
+                               Point refinement_,
+                               const Bounds& core_,
+                               const Bounds& bounds_)       { nbr_descriptions_.emplace_back(Description {level_, refinement_, core_, bounds_}); }
+      void          add_bounds(int level_,
+                               int refinement_,
+                               const Bounds& core_,
+                               const Bounds& bounds_)       { add_bounds(level_, refinement_ * Point::one(dim_), core_, bounds_); }
+
+      Link*         clone() const override                  { return new AMRLink(*this); }
+
+      void          save(BinaryBuffer& bb) const override
+      {
+          Link::save(bb);
+          diy::save(bb, dim_);
+          diy::save(bb, local_);
+          diy::save(bb, nbr_descriptions_);
+          diy::save(bb, wrap_);
+      }
+
+      void          load(BinaryBuffer& bb) override
+      {
+          Link::load(bb);
+          diy::load(bb, dim_);
+          diy::load(bb, local_);
+          diy::load(bb, nbr_descriptions_);
+          diy::load(bb, wrap_);
+      }
+
+    private:
+        int                         dim_;
+
+        Description                 local_;
+        Descriptions                nbr_descriptions_;
+        Directions                  wrap_;
+  };
 
   struct LinkFactory
   {
     public:
-      static Link*          create(size_t id)
+      static Link*          create(std::string name)
       {
-          // not pretty, but will do for now
-          if (id == 0)
-            return new Link;
-          else if (id == 1)
-            return new RegularGridLink(0, DiscreteBounds(), DiscreteBounds());
-          else if (id == 2)
-            return new RegularContinuousLink(0, ContinuousBounds(), ContinuousBounds());
-          else
-            return 0;
+          return Link::make(name);
       }
 
       inline static void    save(BinaryBuffer& bb, const Link* l);
       inline static Link*   load(BinaryBuffer& bb);
   };
+
+  namespace detail
+  {
+      inline void instantiate_common_regular_links()
+      {
+          // Instantiate the common types to register them
+          RegularLink<Bounds<int>>      rl_int;
+          RegularLink<Bounds<float>>    rl_float;
+          RegularLink<Bounds<double>>   rl_double;
+          RegularLink<Bounds<long>>     rl_long;
+      }
+  }
+
+    template<>
+    struct Serialization<diy::AMRLink::Description>
+    {
+        static void         save(diy::BinaryBuffer& bb, const diy::AMRLink::Description& x)
+        {
+            diy::save(bb, x.level);
+            diy::save(bb, x.refinement);
+            diy::save(bb, x.core);
+            diy::save(bb, x.bounds);
+        }
+
+        static void         load(diy::BinaryBuffer& bb, diy::AMRLink::Description& x)
+        {
+            diy::load(bb, x.level);
+            diy::load(bb, x.refinement);
+            diy::load(bb, x.core);
+            diy::load(bb, x.bounds);
+        }
+    };
 }
 
 
@@ -184,7 +287,7 @@ diy::Link*
 diy::LinkFactory::
 load(BinaryBuffer& bb)
 {
-    size_t id;
+    std::string id;
     diy::load(bb, id);
     Link* l = create(id);
     l->load(bb);
@@ -223,4 +326,4 @@ direction(Direction dir) const
     return it->second;
 }
 
-#endif
+#endif      // VTKMDIY_LINK_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/log.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/log.hpp
index cc18b0118..292851ccc 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/log.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/log.hpp
@@ -4,8 +4,8 @@
 #ifndef VTKMDIY_USE_SPDLOG
 
 #include <memory>
-#include "fmt/format.h"
-#include "fmt/ostream.h"
+#include "thirdparty/fmt/format.h"
+#include "thirdparty/fmt/ostream.h"
 
 namespace diy
 {
@@ -47,12 +47,13 @@ set_logger(Args...)
 
 }   // diy
 
-#else // DIY_USE_SPDLOG
+#else // VTKMDIY_USE_SPDLOG
 
 #include <string>
 
 #include <spdlog/spdlog.h>
 #include <spdlog/sinks/null_sink.h>
+#include <spdlog/sinks/stdout_sinks.h>
 
 #include <spdlog/fmt/bundled/format.h>
 #include <spdlog/fmt/bundled/ostream.h>
@@ -80,10 +81,7 @@ std::shared_ptr<spd::logger>
 create_logger(std::string log_level)
 {
     auto log = spd::stderr_logger_mt("diy");
-    int lvl;
-    for (lvl = spd::level::trace; lvl < spd::level::off; ++lvl)
-        if (spd::level::level_names[lvl] == log_level)
-            break;
+    int lvl = spd::level::from_str(log_level);
     log->set_level(static_cast<spd::level::level_enum>(lvl));
     return log;
 }
@@ -100,4 +98,4 @@ set_logger(Args... args)
 #endif
 
 
-#endif // DIY_LOG_HPP
+#endif // VTKMDIY_LOG_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/master.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/master.hpp
index c91cbb8d6..716948446 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/master.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/master.hpp
@@ -9,6 +9,7 @@
 #include <functional>
 #include <numeric>
 #include <memory>
+#include <chrono>
 
 #include "link.hpp"
 #include "collection.hpp"
@@ -83,7 +84,7 @@ namespace diy
       {
                 QueueSizePolicy(size_t sz): size(sz)          {}
         bool    unload_incoming(const Master&, int, int, size_t sz) const         { return sz > size; }
-        bool    unload_outgoing(const Master& master, int from, size_t sz) const  { return sz > size*master.outgoing_count(from); }
+        bool    unload_outgoing(const Master&, int, size_t sz) const              { return sz > size; }
 
         size_t  size;
       };
@@ -92,10 +93,11 @@ namespace diy
       struct MessageInfo;
       struct InFlightSend;
       struct InFlightRecv;
-      struct tags;
 
       struct GidSendOrder;
       struct IExchangeInfo;
+      struct IExchangeInfoDUD;
+      struct IExchangeInfoCollective;
 
       // forward declarations, defined in detail/master/collectives.hpp
       struct Collective;
@@ -105,32 +107,38 @@ namespace diy
       struct CollectivesList;       // std::list<Collective>
       struct CollectivesMap;        // std::map<int, CollectivesList>       // gid -> [collectives]
 
-
       struct QueueRecord
       {
-                        QueueRecord(size_t s = 0, int e = -1): size(s), external(e)     {}
-        size_t          size;
-        int             external;
+                        QueueRecord(MemoryBuffer&& b):
+                            buffer_(std::move(b))                                       { size_ = buffer_.size(); external_ = -1; }
+                        QueueRecord(size_t s = 0, int e = -1): size_(s), external_(e)   {}
+                        QueueRecord(const QueueRecord&) =delete;
+                        QueueRecord(QueueRecord&&)      =default;
+        QueueRecord&    operator=(const QueueRecord&)   =delete;
+        QueueRecord&    operator=(QueueRecord&&)        =default;
+
+        bool            external() const                                                { return external_ != -1; }
+        MemoryBuffer&&  move()                                                          { return std::move(buffer_); }
+        size_t          size() const                                                    { if (external()) return size_; return buffer_.size(); }
+
+        void            reset()                                                         { buffer_.reset(); }
+
+        void            unload(ExternalStorage* storage)                                { size_ = buffer_.size(); external_ = storage->put(buffer_); }
+        void            load(ExternalStorage* storage)                                  { storage->get(external_, buffer_); external_ = -1; }
+
+        private:
+            size_t          size_;
+            int             external_;
+            MemoryBuffer    buffer_;
       };
 
-      typedef           std::map<int,     QueueRecord>      InQueueRecords;     //  gid         -> (size, external)
-      typedef           std::map<int,     MemoryBuffer>     IncomingQueues;     //  gid         -> queue
-      typedef           std::map<BlockID, MemoryBuffer>     OutgoingQueues;     // (gid, proc)  -> queue
-      typedef           std::map<BlockID, QueueRecord>      OutQueueRecords;    // (gid, proc)  -> (size, external)
-      struct IncomingQueuesRecords
-      {
-        InQueueRecords  records;
-        IncomingQueues  queues;
-      };
-      struct OutgoingQueuesRecord
-      {
-                        OutgoingQueuesRecord(int e = -1): external(e)       {}
-        int             external;
-        OutQueueRecords external_local;
-        OutgoingQueues  queues;
-      };
-      typedef           std::map<int,     IncomingQueuesRecords>    IncomingQueuesMap;  //  gid         -> {  gid       -> queue }
-      typedef           std::map<int,     OutgoingQueuesRecord>     OutgoingQueuesMap;  //  gid         -> { (gid,proc) -> queue }
+      using RecordQueue = critical_resource<std::deque<QueueRecord>>;
+
+      using IncomingQueues = concurrent_map<int,      RecordQueue>;       // gid  -> [(size, external, buffer), ...]
+      using OutgoingQueues = concurrent_map<BlockID,  RecordQueue>;       // bid  -> [(size, external, buffer), ...]
+
+      using IncomingQueuesMap = std::map<int, IncomingQueues>;      // gid  -> {  gid -> [(size, external, buffer), ...]}
+      using OutgoingQueuesMap = std::map<int, OutgoingQueues>;      // gid  -> {  bid -> [(size, external, buffer), ...]}
 
       struct IncomingRound
       {
@@ -209,7 +217,7 @@ namespace diy
 
       //! nonblocking exchange of the queues between all the blocks
       template<class Block>
-      void          iexchange_(const ICallback<Block>&   f);
+      void          iexchange_(const ICallback<Block>&  f);
 
       template<class F>
       void          iexchange(const F& f)
@@ -221,10 +229,7 @@ namespace diy
       inline void   process_collectives();
 
       inline
-      ProxyWithLink proxy(int i) const;
-
-      inline
-      ProxyWithLink proxy(int i, IExchangeInfo* iexchange) const;
+      ProxyWithLink proxy(int i, IExchangeInfo* iex = 0) const;
 
       //! return the number of local blocks
       unsigned int  size() const                        { return static_cast<unsigned int>(blocks_.size()); }
@@ -235,7 +240,13 @@ namespace diy
       int           threads() const                     { return threads_; }
       int           in_memory() const                   { return *blocks_.in_memory().const_access(); }
 
-      void          set_threads(int threads__)          { threads_ = threads__; }
+      void          set_threads(int threads__)
+      {
+          threads_ = threads__;
+#if defined(VTKMDIY_NO_THREADS)
+          threads_ = 1;
+#endif
+      }
 
       CreateBlock   creator() const                     { return blocks_.creator(); }
       DestroyBlock  destroyer() const                   { return blocks_.destroyer(); }
@@ -260,9 +271,8 @@ namespace diy
 
     public:
       // Communicator functionality
-      IncomingQueues&   incoming(int gid__)             { return incoming_[exchange_round_].map[gid__].queues; }
-      OutgoingQueues&   outgoing(int gid__)             { return outgoing_[gid__].queues; }
-      size_t            outgoing_count(int gid__) const { OutgoingQueuesMap::const_iterator it = outgoing_.find(gid__); if (it == outgoing_.end()) return 0; return it->second.queues.size(); }
+      IncomingQueues&   incoming(int gid__)             { return incoming_[exchange_round_].map[gid__]; }
+      OutgoingQueues&   outgoing(int gid__)             { return outgoing_[gid__]; }
       inline CollectivesList&  collectives(int gid__);
       inline CollectivesMap&   collectives();
 
@@ -277,28 +287,30 @@ namespace diy
 
     private:
       // Communicator functionality
-      inline void       comm_exchange(GidSendOrder& gid_order, IExchangeInfo*    iexchange = 0);
+      inline void       comm_exchange(GidSendOrder& gid_order, IExchangeInfo*    iex = 0);
       inline void       rcomm_exchange();    // possibly called in between block computations
-      inline bool       nudge();
+      inline bool       nudge(IExchangeInfo* iex = 0);
+      inline void       send_queue(int from_gid, int to_gid, int to_proc, QueueRecord& qr, bool remote, IExchangeInfo* iex);
       inline void       send_outgoing_queues(GidSendOrder&   gid_order,
                                              bool            remote,
-                                             IExchangeInfo*  iexchange = 0);
-      inline void       check_incoming_queues(IExchangeInfo* iexchange = 0);
+                                             IExchangeInfo*  iex = 0);
+      inline void       check_incoming_queues(IExchangeInfo* iex = 0);
       inline GidSendOrder
                         order_gids();
       inline void       touch_queues();
-      inline void       move_external_local(int from);
-      inline void       send_same_rank(int from, int to, MemoryBuffer& bb, IExchangeInfo* iexchange);
-      inline void       send_different_rank(int from, int to, int proc, MemoryBuffer& bb, bool remote, IExchangeInfo* iexchange);
+      inline void       send_same_rank(int from, int to, QueueRecord& qr, IExchangeInfo* iex);
+      inline void       send_different_rank(int from, int to, int proc, QueueRecord& qr, bool remote, IExchangeInfo* iex);
 
       inline InFlightRecv&         inflight_recv(int proc);
       inline InFlightSendsList&    inflight_sends();
 
       // iexchange commmunication
-      inline void       icommunicate(IExchangeInfo* iexchange);     // async communication
+      inline void       icommunicate(IExchangeInfo* iex);     // async communication
 
-      // debug
-      inline void       show_incoming_records() const;
+      struct tags       { enum {
+                                    queue,
+                                    iexchange
+                                }; };
 
     private:
       std::vector<Link*>    links_;
@@ -333,12 +345,14 @@ namespace diy
     public:
       std::shared_ptr<spd::logger>  log = get_logger();
       stats::Profiler               prof;
+      stats::Annotation             exchange_round_annotation { "diy.exchange-round" };
   };
 
   struct Master::SkipNoIncoming
   { bool operator()(int i, const Master& master) const   { return !master.has_incoming(i); } };
 }
 
+#include "detail/master/iexchange.hpp"
 #include "detail/master/communication.hpp"
 #include "detail/master/collectives.hpp"
 #include "detail/master/commands.hpp"
@@ -358,13 +372,20 @@ Master(mpi::communicator    comm,
   blocks_(create_, destroy_, storage, save, load_),
   queue_policy_(q_policy),
   limit_(limit__),
+#if !defined(VTKMDIY_NO_THREADS)
   threads_(threads__ == -1 ? static_cast<int>(thread::hardware_concurrency()) : threads__),
+#else
+  threads_(1),
+#endif
   storage_(storage),
   // Communicator functionality
   inflight_sends_(new InFlightSendsList),
   inflight_recvs_(new InFlightRecvsMap),
   collectives_(new CollectivesMap)
 {
+#ifdef VTKMDIY_NO_THREADS
+  (void) threads__;
+#endif
     comm_.duplicate(comm);
 }
 
@@ -415,18 +436,20 @@ unload_incoming(int gid__)
   {
     IncomingQueuesMap::iterator qmap_itr = round_itr->second.map.find(gid__);
     if (qmap_itr == round_itr->second.map.end())
-    {
       continue;
-    }
-    IncomingQueuesRecords& in_qrs = qmap_itr->second;
-    for (InQueueRecords::iterator it = in_qrs.records.begin(); it != in_qrs.records.end(); ++it)
+
+    IncomingQueues& in_qs = qmap_itr->second;
+    for (auto& x : in_qs)
     {
-      QueueRecord& qr = it->second;
-      if (queue_policy_->unload_incoming(*this, it->first, gid__, qr.size))
-      {
-        log->debug("Unloading queue: {} <- {}", gid__, it->first);
-        qr.external = storage_->put(in_qrs.queues[it->first]);
-      }
+        int from = x.first;
+        for (QueueRecord& qr : *x.second.access())
+        {
+          if (queue_policy_->unload_incoming(*this, from, gid__, qr.size()))
+          {
+            log->debug("Unloading queue: {} <- {}", gid__, from);
+            qr.unload(storage_);
+          }
+        }
     }
   }
 }
@@ -435,54 +458,18 @@ void
 diy::Master::
 unload_outgoing(int gid__)
 {
-  OutgoingQueuesRecord& out_qr = outgoing_[gid__];
-
-  size_t out_queues_size = sizeof(size_t);   // map size
-  size_t count = 0;
-  // count the size of the queues we need to pack
-  for (auto& rec : out_qr.queues)
+  OutgoingQueues& out_qs = outgoing_[gid__];
+  for (auto& x : out_qs)
   {
-    if (rec.first.proc == comm_.rank()) continue;
-
-    out_queues_size += sizeof(BlockID);                                 // target
-    out_queues_size += Serialization<MemoryBuffer>::size(rec.second);   // buffer contents
-    ++count;
-  }
-  if (queue_policy_->unload_outgoing(*this, gid__, out_queues_size - sizeof(size_t)))
-  {
-      log->debug("Unloading outgoing queues: {} -> ...; size = {}\n", gid__, out_queues_size);
-      MemoryBuffer  bb;     bb.reserve(out_queues_size);
-      diy::save(bb, count);
-
-      // pack queues going to a remote proc into bb; queues going to a
-      // different block on our rank, stay separated, recorded in external_local
-      for (auto it = out_qr.queues.begin(); it != out_qr.queues.end();)
+      int to = x.first.gid;
+      for (QueueRecord& qr : *x.second.access())
       {
-        auto  bid    = it->first;
-        auto& buffer = it->second;
-        if (bid.proc == comm_.rank())
+        if (queue_policy_->unload_outgoing(*this, gid__, qr.size()))
         {
-          // treat as incoming
-          if (queue_policy_->unload_incoming(*this, gid__, bid.gid, buffer.size()))
-          {
-            QueueRecord& qr = out_qr.external_local[bid];
-            qr.size     = buffer.size();
-            qr.external = storage_->put(buffer);
-
-            out_qr.queues.erase(it++);
-          } ++it; // else keep in memory
-        } else
-        {
-          diy::save(bb, bid);
-          diy::save(bb, buffer);
-
-          out_qr.queues.erase(it++);
+          log->debug("Unloading outgoing queue: {} -> {}", gid__, to);
+          qr.unload(storage_);
         }
       }
-
-      // TODO: this mechanism could be adjusted for direct saving to disk
-      //       (without intermediate binary buffer serialization)
-      out_qr.external = storage_->put(bb);
   }
 }
 
@@ -508,16 +495,22 @@ void
 diy::Master::
 load_incoming(int gid__)
 {
-  IncomingQueuesRecords& in_qrs = incoming_[exchange_round_].map[gid__];
-  for (InQueueRecords::iterator it = in_qrs.records.begin(); it != in_qrs.records.end(); ++it)
+  IncomingQueues& in_qs = incoming_[exchange_round_].map[gid__];
+  for (auto& x : in_qs)
   {
-    QueueRecord& qr = it->second;
-    if (qr.external != -1)
-    {
-        log->debug("Loading queue: {} <- {}", gid__, it->first);
-        storage_->get(qr.external, in_qrs.queues[it->first]);
-        qr.external = -1;
-    }
+      int from = x.first;
+      auto access = x.second.access();
+      if (!access->empty())
+      {
+          // NB: we only load the front queue, if we want to use out-of-core
+          //     machinery with iexchange, this will require changes
+          auto& qr = access->front();
+          if (qr.external())
+          {
+            log->debug("Loading queue: {} <- {}", gid__, from);
+            qr.load(storage_);
+          }
+      }
   }
 }
 
@@ -527,33 +520,30 @@ load_outgoing(int gid__)
 {
   // TODO: we could adjust this mechanism to read directly from storage,
   //       bypassing an intermediate MemoryBuffer
-  OutgoingQueuesRecord& out_qr = outgoing_[gid__];
-  if (out_qr.external != -1)
+  OutgoingQueues& out_qs = outgoing_[gid__];
+  for (auto& x : out_qs)
   {
-    MemoryBuffer bb;
-    storage_->get(out_qr.external, bb);
-    out_qr.external = -1;
-
-    size_t count;
-    diy::load(bb, count);
-    for (size_t i = 0; i < count; ++i)
-    {
-      BlockID to;
-      diy::load(bb, to);
-      diy::load(bb, out_qr.queues[to]);
-    }
+      int to      = x.first.gid;
+      int to_rank = x.first.proc;
+      auto access = x.second.access();
+      if (!access->empty())
+      {
+          // NB: we only load the front queue, if we want to use out-of-core
+          //     machinery with iexchange, this will require changes
+          auto& qr = access->front();
+          if (qr.external() && comm_.rank() != to_rank)     // skip queues to the same rank
+          {
+            log->debug("Loading queue: {} -> {}", gid__, to);
+            qr.load(storage_);
+          }
+      }
   }
 }
 
 diy::Master::ProxyWithLink
 diy::Master::
-proxy(int i) const
-{ return ProxyWithLink(Proxy(const_cast<Master*>(this), gid(i)), block(i), link(i)); }
-
-diy::Master::ProxyWithLink
-diy::Master::
-proxy(int i, IExchangeInfo* iexchange) const
-{ return ProxyWithLink(Proxy(const_cast<Master*>(this), gid(i)), block(i), link(i), iexchange); }
+proxy(int i, IExchangeInfo* iex) const
+{ return ProxyWithLink(Proxy(const_cast<Master*>(this), gid(i), iex), block(i), link(i)); }
 
 int
 diy::Master::
@@ -589,12 +579,12 @@ bool
 diy::Master::
 has_incoming(int i) const
 {
-  const IncomingQueuesRecords& in_qrs = const_cast<Master&>(*this).incoming_[exchange_round_].map[gid(i)];
-  for (InQueueRecords::const_iterator it = in_qrs.records.begin(); it != in_qrs.records.end(); ++it)
+  const IncomingQueues& in_qs = const_cast<Master&>(*this).incoming_[exchange_round_].map[gid(i)];
+  for (auto& x : in_qs)
   {
-    const QueueRecord& qr = it->second;
-    if (qr.size != 0)
-        return true;
+      auto access = x.second.const_access();
+      if (!access->empty() && access->front().size() != 0)
+          return true;
   }
   return false;
 }
@@ -604,8 +594,10 @@ void
 diy::Master::
 foreach_(const Callback<Block>& f, const Skip& skip)
 {
+    exchange_round_annotation.set(exchange_round_);
+
     auto scoped = prof.scoped("foreach");
-    DIY_UNUSED(scoped);
+    VTKMDIY_UNUSED(scoped);
 
     commands_.emplace_back(new Command<Block>(f, skip));
 
@@ -618,17 +610,16 @@ diy::Master::
 exchange(bool remote)
 {
   auto scoped = prof.scoped("exchange");
-  DIY_UNUSED(scoped);
+  VTKMDIY_UNUSED(scoped);
 
   execute();
 
   log->debug("Starting exchange");
 
-#ifdef VTKM_DIY_NO_MPI
-  // remote doesn't need to do anything special if there is no mpi, but we also
-  // can't just use it because of the ibarrier
-  remote = false;
-#endif
+  if (comm_.size() == 1)
+  {
+    remote = false;
+  }
 
   // make sure there is a queue for each neighbor
   if (!remote)
@@ -644,14 +635,13 @@ touch_queues()
 {
   for (int i = 0; i < (int)size(); ++i)
   {
-      OutgoingQueues&  outgoing_queues  = outgoing_[gid(i)].queues;
-      OutQueueRecords& external_local   = outgoing_[gid(i)].external_local;
-      if (outgoing_queues.size() < (size_t)link(i)->size())
-          for (unsigned j = 0; j < (unsigned)link(i)->size(); ++j)
-          {
-              if (external_local.find(link(i)->target(j)) == external_local.end())
-                  outgoing_queues[link(i)->target(j)];        // touch the outgoing queue, creating it if necessary
-          }
+      OutgoingQueues&  outgoing_queues  = outgoing_[gid(i)];
+      for (BlockID target : link(i)->neighbors())
+      {
+          auto access = outgoing_queues[target].access();
+          if (access->empty())
+              access->emplace_back();
+      }
   }
 }
 
@@ -671,63 +661,100 @@ diy::Master::
 iexchange_(const ICallback<Block>& f)
 {
     auto scoped = prof.scoped("iexchange");
-    DIY_UNUSED(scoped);
+    VTKMDIY_UNUSED(scoped);
+
+#if !defined(VTKMDIY_NO_THREADS) && (!defined(VTKMDIY_USE_CALIPER) && defined(VTKMDIY_PROFILE))
+    static_assert(false, "Cannot use DIY's internal profiler; it's not thread safe. Use caliper.");
+#endif
 
     // prepare for next round
     incoming_.erase(exchange_round_);
     ++exchange_round_;
+    exchange_round_annotation.set(exchange_round_);
 
-    IExchangeInfo iexchange(size(), comm_);
-    iexchange.add_work(size());                 // start with one work unit for each block
-    comm_.barrier();                            // make sure that everyone's original work is accounted for
+    // touch the outgoing and incoming queues to make sure they exist
+    for (unsigned i = 0; i < size(); ++i)
+    {
+      outgoing(gid(i));
+      incoming(gid(i));
+    }
 
-    int global_work_ = -1;
+    //IExchangeInfoDUD iexchange(comm_, min_queue_size, max_hold_time, fine, prof);
+    IExchangeInfoCollective iex(comm_, prof);
+    iex.add_work(size());                 // start with one work unit for each block
 
+    thread comm_thread;
+    if (threads() > 1)
+        comm_thread = thread([this,&iex]()
+        {
+            while(!iex.all_done())
+            {
+                icommunicate(&iex);
+                iex.control();
+                //std::this_thread::sleep_for(std::chrono::microseconds(1));
+            }
+        });
+
+    auto empty_incoming = [this](int gid)
+    {
+        for (auto& x : incoming(gid))
+            if (!x.second.access()->empty())
+                return false;
+        return true;
+    };
+
+    std::map<int, bool> done_result;
     do
     {
-        for (size_t i = 0; i < size(); i++)     // for all blocks
+        size_t work_done = 0;
+        for (int i = 0; i < static_cast<int>(size()); i++)     // for all blocks
         {
+            int gid = this->gid(i);
+            stats::Annotation::Guard g( stats::Annotation("diy.block").set(gid) );
 
-            icommunicate(&iexchange);            // TODO: separate comm thread std::thread t(icommunicate);
-            ProxyWithLink cp = proxy(i, &iexchange);
-
-            prof << "callback";
-            bool done = f(block<Block>(i), cp);
-            prof >> "callback";
-
-            int nundeq_after = 0;
-            int nunenq_after = 0;
-            for (size_t j = 0; j < static_cast<size_t>(cp.link()->size()); j++)
+            if (threads() == 1)
+                icommunicate(&iex);
+            bool done = done_result[gid];
+            if (!done || !empty_incoming(gid))
             {
-                if (cp.incoming(cp.link()->target(j).gid))
-                    ++nundeq_after;
-                if (cp.outgoing(cp.link()->target(j)).size())
-                    ++nunenq_after;
-            }
-
-            done &= (nundeq_after == 0);
-            done &= (nunenq_after == 0);
-
-            int gid = cp.gid();
-            if (iexchange.done[gid] != done)
-            {
-                iexchange.done[gid] = done;
-                if (done)
+                prof << "callback";
+                iex.inc_work();       // even if we remove the queues, when constructing the proxy, we still have work to do
                 {
-                    int work = iexchange.dec_work();
-                    log->debug("[{}] Decrementing work when switching done after callback, for {}: work = {}\n", comm_.rank(), gid, work);
-                }
-                else
-                {
-                    int work = iexchange.inc_work();
-                    log->debug("[{}] Incrementing work when switching done after callback, for {}: work = {}\n", comm_.rank(), gid, work);
-                }
+                    ProxyWithLink cp = proxy(i, &iex);
+                    done = f(block<Block>(i), cp);
+                    if (done_result[gid] ^ done)        // status changed
+                    {
+                        if (done)
+                            iex.dec_work();
+                        else
+                            iex.inc_work();
+                    }
+                }   // NB: we need cp to go out of scope and copy out its queues before we can decrement the work
+                iex.dec_work();
+                prof >> "callback";
+                ++work_done;
             }
+            done_result[gid] = done;
+            log->debug("Done: {}", done);
         }
 
-        global_work_ = iexchange.global_work();
-    } while (global_work_ > 0);
-    log->debug("[{}] ==== Leaving iexchange ====\n", iexchange.comm.rank());
+        if (threads() == 1)
+        {
+            prof << "iexchange-control";
+            iex.control();
+            prof >> "iexchange-control";
+        }
+        //else
+        //if (work_done == 0)
+        //    std::this_thread::sleep_for(std::chrono::microseconds(1));
+    } while (!iex.all_done());
+    log->info("[{}] ==== Leaving iexchange ====\n", iex.comm.rank());
+
+    if (threads() > 1)
+        comm_thread.join();
+
+    //comm_.barrier();        // TODO: this is only necessary for DUD
+    prof >> "consensus-time";
 
     outgoing_.clear();
 }
@@ -735,13 +762,17 @@ iexchange_(const ICallback<Block>& f)
 /* Communicator */
 void
 diy::Master::
-comm_exchange(GidSendOrder& gid_order, IExchangeInfo* iexchange)
+comm_exchange(GidSendOrder& gid_order, IExchangeInfo* iex)
 {
     auto scoped = prof.scoped("comm-exchange");
-    DIY_UNUSED(scoped);
-    send_outgoing_queues(gid_order, false, iexchange);
-    while(nudge());                   // kick requests
-    check_incoming_queues(iexchange);
+    VTKMDIY_UNUSED(scoped);
+
+    send_outgoing_queues(gid_order, false, iex);
+
+    while(nudge(iex))                         // kick requests
+        ;
+
+    check_incoming_queues(iex);
 }
 
 /* Remote communicator */
@@ -798,15 +829,8 @@ rcomm_exchange()
         {
             if (gid_order.empty() && inflight_sends().empty())
             {
-            #ifndef VTKM_DIY_NO_MPI
                 ibarr_req = comm_.ibarrier();
                 ibarr_act = true;
-            #else
-              // ibarrier() in communicator.hpp does not support MPI right now. For now it's
-              // trying to throw an std::runtime_error in a function with return value which
-              // would cause nvcc refuses to build. So here we just simply set it to be done.
-              done = true;
-            #endif
             }
         }
     }                                                 // while !done
@@ -818,15 +842,23 @@ diy::Master::GidSendOrder
 diy::Master::
 order_gids()
 {
+    auto scoped = prof.scoped("order-gids");
+
     GidSendOrder order;
 
-    for (OutgoingQueuesMap::iterator it = outgoing_.begin(); it != outgoing_.end(); ++it)
+    for (auto& x : outgoing_)
     {
-        OutgoingQueuesRecord& out = it->second;
-        if (out.external == -1)
-            order.list.push_front(it->first);
-        else
-            order.list.push_back(it->first);
+        OutgoingQueues& out = x.second;
+        if (!out.empty())
+        {
+            auto access = out.begin()->second.access();
+            if (!access->empty() && !access->front().external())
+            {
+                order.list.push_front(x.first);
+                continue;
+            }
+        }
+        order.list.push_back(x.first);
     }
     log->debug("order.size(): {}", order.size());
 
@@ -845,24 +877,17 @@ order_gids()
 // iexchange communicator
 void
 diy::Master::
-icommunicate(IExchangeInfo* iexchange)
+icommunicate(IExchangeInfo* iex)
 {
     auto scoped = prof.scoped("icommunicate");
-    DIY_UNUSED(scoped);
+    VTKMDIY_UNUSED(scoped);
 
     log->debug("Entering icommunicate()");
 
-    // lock out other threads
-    // TODO: not threaded yet
-    // if (!CAS(comm_flag, 0, 1))
-    //     return;
-
-    // debug
-//     log->info("out_queues_limit: {}", out_queues_limit);
+    auto gid_order = order_gids();
 
     // exchange
-    auto gid_order = order_gids();
-    comm_exchange(gid_order, iexchange);
+    comm_exchange(gid_order, iex);
 
     // cleanup
 
@@ -873,133 +898,119 @@ icommunicate(IExchangeInfo* iexchange)
     log->debug("Exiting icommunicate()");
 }
 
+// send a single queue, either to same rank or different rank
+void
+diy::Master::
+send_queue(int              from_gid,
+           int              to_gid,
+           int              to_proc,
+           QueueRecord&     qr,
+           bool             remote,
+           IExchangeInfo*   iex)
+{
+    stats::Annotation::Guard gb( stats::Annotation("diy.block").set(from_gid) );
+    stats::Annotation::Guard gt( stats::Annotation("diy.to").set(to_gid) );
+    stats::Annotation::Guard gq( stats::Annotation("diy.q-size").set(stats::Variant(static_cast<uint64_t>(qr.size()))) );
+
+    // skip empty queues and hold queues shorter than some limit for some time
+    assert(!iex || qr.size() != 0);
+    log->debug("[{}] Sending queue: {} <- {} of size {}, iexchange = {}", comm_.rank(), to_gid, from_gid, qr.size(), iex ? 1 : 0);
+
+    if (to_proc == comm_.rank())            // sending to same rank, simply swap buffers
+        send_same_rank(from_gid, to_gid, qr, iex);
+    else                                    // sending to an actual message to a different rank
+        send_different_rank(from_gid, to_gid, to_proc, qr, remote, iex);
+}
+
 void
 diy::Master::
 send_outgoing_queues(GidSendOrder&   gid_order,
-                     bool            remote,                     // TODO: are remote and iexchange mutually exclusive? If so, use single enum?
-                     IExchangeInfo*  iexchange)
+                     bool            remote,            // TODO: are remote and iexchange mutually exclusive? If so, use single enum?
+                     IExchangeInfo*  iex)
 {
     auto scoped = prof.scoped("send-outgoing-queues");
-    DIY_UNUSED(scoped);
+    VTKMDIY_UNUSED(scoped);
 
-    while (inflight_sends().size() < gid_order.limit && !gid_order.empty())
+    if (iex)                                      // for iex, send queues from a single block
     {
-        int from = gid_order.pop();
-
-        // move external queues going to our rank
-        move_external_local(from);
-
-        if (outgoing_[from].external != -1)
-            load_outgoing(from);
-
-        OutgoingQueues& outgoing = outgoing_[from].queues;
-        for (OutgoingQueues::iterator it = outgoing.begin(); it != outgoing.end(); ++it)
+        for (int from : gid_order.list)
         {
-            BlockID to_proc = it->first;
-            int     to      = to_proc.gid;
-            int     proc    = to_proc.proc;
-            log->debug("Processing queue:      {} <- {} of size {}", to, from, outgoing_[from].queues[to_proc].size());
-
-            // skip empty queues
-            if (iexchange && !it->second.size())
+            OutgoingQueues& outgoing = this->outgoing(from);
+            for (auto& x : outgoing)
             {
-                log->debug("Skipping empty queue: {} <- {}", to, from);
-                continue;
+                BlockID to_block    = x.first;
+                int     to_gid      = to_block.gid;
+                int     to_proc     = to_block.proc;
+
+                auto access = x.second.access();
+                while (!access->empty())
+                {
+                    auto qr = std::move(access->front());
+                    access->pop_front();
+                    access.unlock();            // others can push on this queue, while we are working
+                    assert(!qr.external());
+                    log->debug("Processing queue:      {} <- {} of size {}", to_gid, from, qr.size());
+                    send_queue(from, to_gid, to_proc, qr, remote, iex);
+                    access.lock();
+                }
+            }
+        }
+    }
+    else                                                // normal mode: send all outgoing queues
+    {
+        while (inflight_sends().size() < gid_order.limit && !gid_order.empty())
+        {
+            int from_gid = gid_order.pop();
+
+            load_outgoing(from_gid);
+
+            OutgoingQueues& outgoing = outgoing_[from_gid];
+            for (auto& x : outgoing)
+            {
+                BlockID to_block    = x.first;
+                int     to_gid      = to_block.gid;
+                int     to_proc     = to_block.proc;
+
+                auto access = x.second.access();
+                if (access->empty())
+                    continue;
+
+                // NB: send only front
+                auto& qr = access->front();
+                log->debug("Processing queue:      {} <- {} of size {}", to_gid, from_gid, qr.size());
+                send_queue(from_gid, to_gid, to_proc, qr, remote, iex);
+                access->pop_front();
             }
-
-            // sending to same rank: simply swap buffers
-            if (proc == comm_.rank())
-                send_same_rank(from, to, it->second, iexchange);
-            else
-                send_different_rank(from, to, proc, it->second, remote, iexchange);
-
-        }                                       // for (OutgoingQueues::iterator it ...
-    }                                           // while (inflight_sends().size() ...
-}
-
-void
-diy::Master::
-move_external_local(int from)
-{
-    IncomingRound& current_incoming = incoming_[exchange_round_];
-
-    // deal with external_local queues
-    for (auto& x : outgoing_[from].external_local)
-    {
-        int to = x.first.gid;
-
-        log->debug("Processing local queue: {} <- {} of size {}", to, from, x.second.size);
-
-        QueueRecord& in_qr        = current_incoming.map[to].records[from];
-        bool         to_external  = block(lid(to)) == 0;
-
-        if (to_external)
-            in_qr = x.second;
-        else
-        {
-            // load the queue
-            in_qr.size     = x.second.size;
-            in_qr.external = -1;
-
-            MemoryBuffer bb;
-            storage_->get(x.second.external, bb);
-
-            current_incoming.map[to].queues[from].swap(bb);
         }
-        current_incoming.received++;
     }
-    outgoing_[from].external_local.clear();
 }
 
 void
 diy::Master::
-send_same_rank(int from, int to, MemoryBuffer& bb, IExchangeInfo* iexchange)
+send_same_rank(int from, int to, QueueRecord& qr, IExchangeInfo*)
 {
+    auto scoped = prof.scoped("send-same-rank");
+
     log->debug("Moving queue in-place: {} <- {}", to, from);
 
     IncomingRound& current_incoming = incoming_[exchange_round_];
 
-    QueueRecord& in_qr       = current_incoming.map[to].records[from];
-    bool         to_external = block(lid(to)) == 0;
-    if (to_external)
+    auto access_incoming = current_incoming.map[to][from].access();
+
+    access_incoming->emplace_back(std::move(qr));
+    QueueRecord& in_qr = access_incoming->back();
+
+    if (!in_qr.external())
     {
-        log->debug("Unloading outgoing directly as incoming: {} <- {}", to, from);
-        in_qr.size = bb.size();
-        if (queue_policy_->unload_incoming(*this, from, to, in_qr.size))
-            in_qr.external = storage_->put(bb);
-        else
+        in_qr.reset();
+
+        bool to_external = block(lid(to)) == 0;
+        if (to_external)
         {
-            MemoryBuffer& in_bb = current_incoming.map[to].queues[from];
-            if (!iexchange)
-            {
-                in_bb.swap(bb);
-                in_bb.reset();
-            }
-            else
-            {
-                iexchange->not_done(to);
-                in_bb.append_binary(&bb.buffer[0], bb.size());
-                bb.clear();
-            }
-            in_qr.external = -1;
+            log->debug("Unloading outgoing directly as incoming: {} <- {}", to, from);
+            if (queue_policy_->unload_incoming(*this, from, to, in_qr.size()))
+                in_qr.unload(storage_);
         }
-    } else        // !to_external
-    {
-        log->debug("Swapping in memory:    {} <- {}", to, from);
-        MemoryBuffer& in_bb = current_incoming.map[to].queues[from];
-        if (!iexchange)
-        {
-            in_bb.swap(bb);
-            in_bb.reset();
-        }
-        else
-        {
-            iexchange->not_done(to);
-            in_bb.append_binary(&bb.buffer[0], bb.size());
-            bb.wipe();
-        }
-        in_qr.size = bb.size();
-        in_qr.external = -1;
     }
 
     ++current_incoming.received;
@@ -1007,15 +1018,18 @@ send_same_rank(int from, int to, MemoryBuffer& bb, IExchangeInfo* iexchange)
 
 void
 diy::Master::
-send_different_rank(int from, int to, int proc, MemoryBuffer& bb, bool remote, IExchangeInfo* iexchange)
+send_different_rank(int from, int to, int proc, QueueRecord& qr, bool remote, IExchangeInfo* iex)
 {
+    auto scoped = prof.scoped("send-different-rank");
+
+    assert(!qr.external());
+
     static const size_t MAX_MPI_MESSAGE_COUNT = INT_MAX;
 
     // sending to a different rank
-    std::shared_ptr<MemoryBuffer> buffer = std::make_shared<MemoryBuffer>();
-    buffer->swap(bb);
+    std::shared_ptr<MemoryBuffer> buffer = std::make_shared<MemoryBuffer>(qr.move());
 
-    MessageInfo info{from, to, exchange_round_};
+    MessageInfo info{from, to, 1, exchange_round_};
     // size fits in one message
     if (Serialization<MemoryBuffer>::size(*buffer) + Serialization<MessageInfo>::size(info) <= MAX_MPI_MESSAGE_COUNT)
     {
@@ -1025,15 +1039,8 @@ send_different_rank(int from, int to, int proc, MemoryBuffer& bb, bool remote, I
         auto& inflight_send = inflight_sends().back();
 
         inflight_send.info = info;
-        if (remote || iexchange)
-        {
-            if (iexchange)
-            {
-                int work = iexchange->inc_work();
-                log->debug("[{}] Incrementing work when sending queue: work = {}\n", comm_.rank(), work);
-            }
+        if (remote || iex)
             inflight_send.request = comm_.issend(proc, tags::queue, buffer->buffer);
-        }
         else
             inflight_send.request = comm_.isend(proc, tags::queue, buffer->buffer);
         inflight_send.message = buffer;
@@ -1041,36 +1048,39 @@ send_different_rank(int from, int to, int proc, MemoryBuffer& bb, bool remote, I
     else // large message gets broken into chunks
     {
         int npieces = static_cast<int>((buffer->size() + MAX_MPI_MESSAGE_COUNT - 1)/MAX_MPI_MESSAGE_COUNT);
+        info.nparts += npieces;
 
         // first send the head
         std::shared_ptr<MemoryBuffer> hb = std::make_shared<MemoryBuffer>();
         diy::save(*hb, buffer->size());
         diy::save(*hb, info);
 
-        inflight_sends().emplace_back();
-        auto& inflight_send = inflight_sends().back();
-
-        inflight_send.info = info;
-        if (remote || iexchange)
         {
-            // add one unit of work for the entire large message (upon sending the head, not the individual pieces below)
-            if (iexchange)
-            {
-                int work = iexchange->inc_work();
-                log->debug("[{}] Incrementing work when sending the first piece: work = {}\n", comm_.rank(), work);
-            }
-            inflight_send.request = comm_.issend(proc, tags::piece, hb->buffer);
+          inflight_sends().emplace_back();
+          auto& inflight_send = inflight_sends().back();
+
+          inflight_send.info = info;
+          if (remote || iex)
+          {
+              // add one unit of work for the entire large message (upon sending the head, not the individual pieces below)
+              if (iex)
+              {
+                  iex->inc_work();
+                  log->debug("[{}] Incrementing work when sending the leading piece\n", comm_.rank());
+              }
+              inflight_send.request = comm_.issend(proc, tags::queue, hb->buffer);
+          }
+          else
+          {
+              inflight_send.request = comm_.isend(proc, tags::queue, hb->buffer);
+          }
+          inflight_send.message = hb;
         }
-        else
-            inflight_send.request = comm_.isend(proc, tags::piece, hb->buffer);
-        inflight_send.message = hb;
 
         // send the message pieces
         size_t msg_buff_idx = 0;
         for (int i = 0; i < npieces; ++i, msg_buff_idx += MAX_MPI_MESSAGE_COUNT)
         {
-            int tag = (i == (npieces - 1)) ? tags::queue : tags::piece;     // last piece is marked as queue, to indicate that we are done
-
             detail::VectorWindow<char> window;
             window.begin = &buffer->buffer[msg_buff_idx];
             window.count = std::min(MAX_MPI_MESSAGE_COUNT, buffer->size() - msg_buff_idx);
@@ -1079,10 +1089,17 @@ send_different_rank(int from, int to, int proc, MemoryBuffer& bb, bool remote, I
             auto& inflight_send = inflight_sends().back();
 
             inflight_send.info = info;
-            if (remote || iexchange)
-                inflight_send.request = comm_.issend(proc, tag, window);
+            if (remote || iex)
+            {
+                if (iex)
+                {
+                    iex->inc_work();
+                    log->debug("[{}] Incrementing work when sending non-leading piece\n", comm_.rank());
+                }
+                inflight_send.request = comm_.issend(proc, tags::queue, window);
+            }
             else
-                inflight_send.request = comm_.isend(proc, tag, window);
+                inflight_send.request = comm_.isend(proc, tags::queue, window);
             inflight_send.message = buffer;
         }
     }   // large message broken into pieces
@@ -1090,19 +1107,23 @@ send_different_rank(int from, int to, int proc, MemoryBuffer& bb, bool remote, I
 
 void
 diy::Master::
-check_incoming_queues(IExchangeInfo* iexchange)
+check_incoming_queues(IExchangeInfo* iex)
 {
     auto scoped = prof.scoped("check-incoming-queues");
-    DIY_UNUSED(scoped);
+    VTKMDIY_UNUSED(scoped);
 
-    mpi::optional<mpi::status> ostatus = comm_.iprobe(mpi::any_source, mpi::any_tag);
+    mpi::optional<mpi::status> ostatus = comm_.iprobe(mpi::any_source, tags::queue);
     while (ostatus)
     {
         InFlightRecv& ir = inflight_recv(ostatus->source());
 
-        ir.recv(comm_, *ostatus);     // possibly partial recv, in case of a multi-piece message
+        if (iex)
+            iex->inc_work();                      // increment work before sender's issend request can complete (so we are now responsible for the queue)
+        bool first_message = ir.recv(comm_, *ostatus);  // possibly partial recv, in case of a multi-piece message
+        if (!first_message && iex)
+            iex->dec_work();
 
-        if (ir.done)                 // all pieces assembled
+        if (ir.done)                // all pieces assembled
         {
             assert(ir.info.round >= exchange_round_);
             IncomingRound* in = &incoming_[ir.info.round];
@@ -1110,11 +1131,11 @@ check_incoming_queues(IExchangeInfo* iexchange)
             bool unload = ((ir.info.round == exchange_round_) ? (block(lid(ir.info.to)) == 0) : (limit_ != -1))
                           && queue_policy_->unload_incoming(*this, ir.info.from, ir.info.to, ir.message.size());
 
-            ir.place(in, unload, storage_, iexchange);
+            ir.place(in, unload, storage_, iex);
             ir.reset();
         }
 
-        ostatus = comm_.iprobe(mpi::any_source, mpi::any_tag);
+        ostatus = comm_.iprobe(mpi::any_source, tags::queue);
     }
 }
 
@@ -1130,6 +1151,8 @@ flush(bool remote)
   // prepare for next round
   incoming_.erase(exchange_round_);
   ++exchange_round_;
+  exchange_round_annotation.set(exchange_round_);
+
 
   if (remote)
       rcomm_exchange();
@@ -1155,14 +1178,13 @@ flush(bool remote)
   outgoing_.clear();
 
   log->debug("Done in flush");
-  //show_incoming_records();
 
   process_collectives();
 }
 
 bool
 diy::Master::
-nudge()
+nudge(IExchangeInfo* iex)
 {
   bool success = false;
   for (InFlightSendsList::iterator it = inflight_sends().begin(); it != inflight_sends().end();)
@@ -1172,6 +1194,11 @@ nudge()
     {
       success = true;
       it = inflight_sends().erase(it);
+      if (iex)
+      {
+          log->debug("[{}] message left, decrementing work", iex->comm.rank());
+          iex->dec_work();                // this message is receiver's responsibility now
+      }
     }
     else
     {
@@ -1181,75 +1208,4 @@ nudge()
   return success;
 }
 
-void
-diy::Master::
-show_incoming_records() const
-{
-  for (IncomingRoundMap::const_iterator rounds_itr = incoming_.begin(); rounds_itr != incoming_.end(); ++rounds_itr)
-  {
-    for (IncomingQueuesMap::const_iterator it = rounds_itr->second.map.begin(); it != rounds_itr->second.map.end(); ++it)
-    {
-      const IncomingQueuesRecords& in_qrs = it->second;
-      for (InQueueRecords::const_iterator cur = in_qrs.records.begin(); cur != in_qrs.records.end(); ++cur)
-      {
-        const QueueRecord& qr = cur->second;
-        log->info("round: {}, {} <- {}: (size,external) = ({},{})",
-                  rounds_itr->first,
-                  it->first, cur->first,
-                  qr.size,
-                  qr.external);
-      }
-      for (IncomingQueues::const_iterator cur = in_qrs.queues.begin(); cur != in_qrs.queues.end(); ++cur)
-      {
-        log->info("round: {}, {} <- {}: queue.size() = {}",
-                  rounds_itr->first,
-                  it->first, cur->first,
-                  const_cast<IncomingQueuesRecords&>(in_qrs).queues[cur->first].size());
-      }
-    }
-  }
-}
-
-// return global work status (for debugging)
-int
-diy::Master::IExchangeInfo::
-global_work()
-{
-    int global_work;
-    global_work_->fetch(global_work, 0, 0);
-    global_work_->flush_local(0);
-    return global_work;
-}
-
-// get global all done status
-bool
-diy::Master::IExchangeInfo::
-all_done()
-{
-    return global_work() == 0;
-}
-
-// reset global work counter
-void
-diy::Master::IExchangeInfo::
-reset_work()
-{
-    int val = 0;
-    global_work_->replace(val, 0, 0);
-    global_work_->flush(0);
-}
-
-// add arbitrary units of work to global work counter
-int
-diy::Master::IExchangeInfo::
-add_work(int work)
-{
-    int global_work;                                               // unused
-    global_work_->fetch_and_op(&work, &global_work, 0, 0, MPI_SUM);
-    global_work_->flush(0);
-    if (global_work + work < 0)
-        throw std::runtime_error(fmt::format("error: attempting to subtract {} units of work when global_work prior to subtraction = {}", work, global_work));
-    return global_work + work;
-}
-
 #endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi.hpp
index 35fd5893c..f7c434e68 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi.hpp
@@ -1,14 +1,9 @@
 #ifndef VTKMDIY_MPI_HPP
 #define VTKMDIY_MPI_HPP
 
-#ifndef VTKM_DIY_NO_MPI
-#include <mpi.h>
-#else
-#include "mpi/no-mpi.hpp"
-#endif
-
-#include "mpi/constants.hpp"
+#include "mpi/config.hpp"
 #include "mpi/datatypes.hpp"
+#include "mpi/environment.hpp"
 #include "mpi/optional.hpp"
 #include "mpi/status.hpp"
 #include "mpi/request.hpp"
@@ -18,54 +13,4 @@
 #include "mpi/io.hpp"
 #include "mpi/window.hpp"
 
-namespace diy
-{
-namespace mpi
-{
-
-//! \ingroup MPI
-struct environment
-{
-  inline environment(int threading = MPI_THREAD_FUNNELED);
-  inline environment(int argc, char* argv[], int threading = MPI_THREAD_FUNNELED);
-  inline ~environment();
-
-  int   threading() const           { return provided_threading; }
-
-  int   provided_threading;
-};
-
-}
-}
-
-diy::mpi::environment::
-environment(int threading)
-{
-#ifndef VTKM_DIY_NO_MPI
-  int argc = 0; char** argv;
-  MPI_Init_thread(&argc, &argv, threading, &provided_threading);
-#else
-  provided_threading = threading;
-#endif
-}
-
-diy::mpi::environment::
-environment(int argc, char* argv[], int threading)
-{
-#ifndef VTKM_DIY_NO_MPI
-  MPI_Init_thread(&argc, &argv, threading, &provided_threading);
-#else
-  (void) argc; (void) argv;
-  provided_threading = threading;
-#endif
-}
-
-diy::mpi::environment::
-~environment()
-{
-#ifndef VTKM_DIY_NO_MPI
-  MPI_Finalize();
-#endif
-}
-
-#endif
+#endif // VTKMDIY_MPI_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/collectives.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/collectives.cpp
new file mode 100644
index 000000000..b89e6a8c7
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/collectives.cpp
@@ -0,0 +1,161 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "collectives.hpp"
+#endif
+
+namespace diy
+{
+namespace mpi
+{
+namespace detail
+{
+
+inline void copy_buffer(const void* src, void* dst, size_t size, int count)
+{
+  if (src != dst)
+  {
+    std::copy_n(static_cast<const int8_t*>(src),
+                size * static_cast<size_t>(count),
+                static_cast<int8_t*>(dst));
+  }
+}
+
+void broadcast(const communicator& comm, void* data, int count, const datatype& type, int root)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Bcast(data, count, mpi_cast(type.handle), root, mpi_cast(comm.handle()));
+#else
+  (void) comm; (void) data; (void) count; (void) type; (void) root;
+#endif
+}
+
+request ibroadcast(const communicator& comm, void* data, int count, const datatype& type, int root)
+{
+  request r;
+#if VTKMDIY_HAS_MPI
+  MPI_Ibcast(data, count, mpi_cast(type.handle), root, mpi_cast(comm.handle()), &mpi_cast(r.handle));
+#else
+  (void) comm; (void) data; (void) count; (void) type; (void) root;
+#endif
+  return r;
+}
+
+void gather(const communicator& comm,
+            const void* dataIn, int count, const datatype& type, void* dataOut,
+            int root)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Gather(dataIn, count, mpi_cast(type.handle),
+             dataOut, count, mpi_cast(type.handle),
+             root, mpi_cast(comm.handle()));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
+  (void)comm; (void)root;
+#endif
+}
+
+void gather_v(const communicator& comm,
+              const void* dataIn, int countIn, const datatype& type,
+              void* dataOut, const int counts[], const int offsets[],
+              int root)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Gatherv(dataIn, countIn, mpi_cast(type.handle),
+              dataOut, counts, offsets, mpi_cast(type.handle),
+              root, mpi_cast(comm.handle()));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), countIn);
+  (void)comm; (void)counts, (void)offsets, (void)root;
+#endif
+}
+
+void all_gather(const communicator& comm,
+                const void* dataIn, int count, const datatype& type, void* dataOut)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Allgather(dataIn, count, mpi_cast(type.handle),
+                dataOut, count, mpi_cast(type.handle),
+                mpi_cast(comm.handle()));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
+  (void)comm;
+#endif
+}
+
+void all_gather_v(const communicator& comm,
+                  const void* dataIn, int countIn, const datatype& type,
+                  void* dataOut, const int counts[], const int offsets[])
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Allgatherv(dataIn, countIn, mpi_cast(type.handle),
+                 dataOut, counts, offsets, mpi_cast(type.handle),
+                 mpi_cast(comm.handle()));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), countIn);
+  (void)comm; (void)counts; (void)offsets;
+#endif
+}
+
+void reduce(const communicator& comm,
+            const void* dataIn, int count, const datatype& type, void* dataOut,
+            const operation& op, int root)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Reduce(dataIn, dataOut, count, mpi_cast(type.handle), mpi_cast(op.handle), root, mpi_cast(comm.handle()));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
+  (void)comm; (void)op; (void)root;
+#endif
+}
+
+void all_reduce(const communicator& comm,
+                const void* dataIn, void* dataOut, int count, const datatype& type,
+                const operation& op)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Allreduce(dataIn, dataOut, count, mpi_cast(type.handle), mpi_cast(op.handle), mpi_cast(comm.handle()));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
+  (void)comm; (void)op;
+#endif
+}
+
+request iall_reduce(const communicator& comm,
+                    const void* dataIn, void* dataOut, int count, const datatype& type,
+                    const operation& op)
+{
+  request r;
+#if VTKMDIY_HAS_MPI
+  MPI_Iallreduce(dataIn, dataOut, count, mpi_cast(type.handle), mpi_cast(op.handle), mpi_cast(comm.handle()), &mpi_cast(r.handle));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
+  (void)comm; (void)op;
+#endif
+  return r;
+}
+
+void scan(const communicator& comm,
+          const void* dataIn, void* dataOut, int count, const datatype& type,
+          const operation& op)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Scan(dataIn, dataOut, count, mpi_cast(type.handle), mpi_cast(op.handle), mpi_cast(comm.handle()));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
+  (void)comm; (void)op;
+#endif
+}
+
+void all_to_all(const communicator& comm,
+                const void* dataIn, int count, const datatype& type, void* dataOut)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Alltoall(dataIn, count, mpi_cast(type.handle), dataOut, count, mpi_cast(type.handle), mpi_cast(comm.handle()));
+#else
+  copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
+  (void)comm;
+#endif
+}
+
+}
+}
+} // diy::mpi::detail
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/collectives.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/collectives.hpp
index f4c2683a3..c87414c98 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/collectives.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/collectives.hpp
@@ -1,12 +1,80 @@
-#include <vector>
+#ifndef VTKMDIY_MPI_COLLECTIVES_HPP
+#define VTKMDIY_MPI_COLLECTIVES_HPP
 
-#include "../constants.h" // for DIY_UNUSED.
+#include "config.hpp"
+#include "communicator.hpp"
+#include "datatypes.hpp"
 #include "operations.hpp"
+#include "request.hpp"
+
+#include <algorithm>
+#include <vector>
+#include <numeric>
 
 namespace diy
 {
 namespace mpi
 {
+
+namespace detail
+{
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void broadcast(const communicator& comm,
+               void* data, int count, const datatype& type,
+               int root);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+request ibroadcast(const communicator& comm,
+                   void* data, int count, const datatype& type,
+                   int root);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void gather(const communicator& comm,
+            const void* dataIn, int count, const datatype& type, void* dataOut,
+            int root);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void gather_v(const communicator& comm,
+              const void* dataIn, int countIn, const datatype& type,
+              void* dataOut, const int counts[], const int offsets[],
+              int root);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void all_gather(const communicator& comm,
+                const void* dataIn, int count, const datatype& type, void* dataOut);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void all_gather_v(const communicator& comm,
+                  const void* dataIn, int countIn, const datatype& type,
+                  void* dataOut, const int counts[], const int offsets[]);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void reduce(const communicator& comm,
+            const void* dataIn, int count, const datatype& type, void* dataOut,
+            const operation& op, int root);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void all_reduce(const communicator& comm,
+                const void* dataIn, void* dataOut, int count, const datatype& type,
+                const operation& op);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+request iall_reduce(const communicator& comm,
+                    const void* dataIn, void* dataOut, int count, const datatype& type,
+                    const operation& op);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void scan(const communicator& comm,
+          const void* dataIn, void* dataOut, int count, const datatype& type,
+          const operation& op);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void all_to_all(const communicator& comm,
+                const void* dataIn, int count, const datatype& type, void* dataOut);
+
+} // detail
+
   //!\addtogroup MPI
   //!@{
 
@@ -15,259 +83,181 @@ namespace mpi
   {
     static void broadcast(const communicator& comm, T& x, int root)
     {
-#ifndef VTKM_DIY_NO_MPI
-      MPI_Bcast(address(x), count(x), datatype(x), root, comm);
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(x);
-      DIY_UNUSED(root);
-#endif
+      detail::broadcast(comm, address(x), count(x), datatype_of(x), root);
     }
 
     static void broadcast(const communicator& comm, std::vector<T>& x, int root)
     {
-#ifndef VTKM_DIY_NO_MPI
       size_t sz = x.size();
-      Collectives<size_t, void*>::broadcast(comm, sz, root);
+      detail::broadcast(comm, &sz, 1, datatype_of(sz), root);
 
       if (comm.rank() != root)
           x.resize(sz);
 
-      MPI_Bcast(address(x), count(x), datatype(x), root, comm);
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(x);
-      DIY_UNUSED(root);
-#endif
+      detail::broadcast(comm, address(x), count(x), datatype_of(x), root);
     }
 
     static request ibroadcast(const communicator& comm, T& x, int root)
     {
-#ifndef VTKM_DIY_NO_MPI
-      request r;
-      MPI_Ibcast(address(x), count(x), datatype(x), root, comm, &r.r);
-      return r;
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(x);
-      DIY_UNUSED(root);
-      DIY_UNSUPPORTED_MPI_CALL(MPI_Ibcast);
-#endif
+      return detail::ibroadcast(comm, address(x), count(x), datatype_of(x), root);
     }
 
     static void gather(const communicator& comm, const T& in, std::vector<T>& out, int root)
     {
       out.resize(comm.size());
-#ifndef VTKM_DIY_NO_MPI
-      MPI_Gather(address(in), count(in), datatype(in), address(out), count(in), datatype(out), root, comm);
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(root);
-      out[0] = in;
-#endif
+      detail::gather(comm, address(in), count(in), datatype_of(in), address(out), root);
     }
 
     static void gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out, int root)
     {
-#ifndef VTKM_DIY_NO_MPI
-      std::vector<int>  counts(comm.size());
+      std::vector<int> counts;
+      if (comm.rank() == root)
+      {
+        counts.resize(static_cast<size_t>(comm.size()));
+      }
+
       Collectives<int,void*>::gather(comm, count(in), counts, root);
 
-      std::vector<int>  offsets(comm.size(), 0);
-      for (unsigned i = 1; i < offsets.size(); ++i)
-        offsets[i] = offsets[i-1] + counts[i-1];
+      std::vector<int> offsets;
+      if (comm.rank() == root)
+      {
+        offsets.resize(counts.size());
+        offsets[0] = 0;
+        std::partial_sum(counts.begin(), counts.end() - 1, offsets.begin() + 1);
+      }
 
       int elem_size = count(in[0]);     // size of 1 vector element in units of mpi datatype
-      std::vector<T> buffer((offsets.back() + counts.back()) / elem_size);
-      MPI_Gatherv(address(in), count(in), datatype(in),
-                  address(buffer),
-                  &counts[0],
-                  &offsets[0],
-                  datatype(buffer),
-                  root, comm);
-
-      out.resize(comm.size());
-      size_t cur = 0;
-      for (unsigned i = 0; i < (unsigned)comm.size(); ++i)
+      std::vector<T> buffer;
+      if (comm.rank() == root)
       {
-          out[i].reserve(counts[i] / elem_size);
-          for (unsigned j = 0; j < (unsigned)(counts[i] / elem_size); ++j)
-              out[i].push_back(buffer[cur++]);
+        buffer.resize((offsets.back() + counts.back()) / elem_size);
+      }
+
+      detail::gather_v(comm, address(in), count(in), datatype_of(in),
+                       address(buffer), counts.data(), offsets.data(),
+                       root);
+
+      if (comm.rank() == root)
+      {
+          out.resize(static_cast<size_t>(comm.size()));
+          size_t offset = 0;
+          for (size_t i = 0; i < out.size(); ++i)
+          {
+            auto count = static_cast<size_t>(counts[i] / elem_size);
+            out[i].insert(out[i].end(), buffer.data() + offset, buffer.data() + offset + count);
+            offset += count;
+          }
       }
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(root);
-      out.resize(1);
-      out[0] = in;
-#endif
     }
 
     static void gather(const communicator& comm, const T& in, int root)
     {
-#ifndef VTKM_DIY_NO_MPI
-      MPI_Gather(address(in), count(in), datatype(in), address(in), count(in), datatype(in), root, comm);
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(in);
-      DIY_UNUSED(root);
-      DIY_UNSUPPORTED_MPI_CALL("MPI_Gather");
-#endif
+      detail::gather(comm, address(in), count(in), datatype_of(in), address(in), root);
     }
 
     static void gather(const communicator& comm, const std::vector<T>& in, int root)
     {
-#ifndef VTKM_DIY_NO_MPI
       Collectives<int,void*>::gather(comm, count(in), root);
-
-      MPI_Gatherv(address(in), count(in), datatype(in),
-                  0, 0, 0,
-                  datatype(in),
-                  root, comm);
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(in);
-      DIY_UNUSED(root);
-      DIY_UNSUPPORTED_MPI_CALL("MPI_Gatherv");
-#endif
+      detail::gather_v(comm, address(in), count(in), datatype_of(in), 0, 0, 0, root);
     }
 
     static void all_gather(const communicator& comm, const T& in, std::vector<T>& out)
     {
       out.resize(comm.size());
-#ifndef VTKM_DIY_NO_MPI
-      MPI_Allgather(address(in), count(in), datatype(in),
-                    address(out), count(in), datatype(in),
-                    comm);
-#else
-      DIY_UNUSED(comm);
-      out[0] = in;
-#endif
+      detail::all_gather(comm, address(in), count(in), datatype_of(in), address(out));
     }
 
     static void all_gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out)
     {
-#ifndef VTKM_DIY_NO_MPI
-      std::vector<int>  counts(comm.size());
+      std::vector<int>  counts(static_cast<size_t>(comm.size()));
       Collectives<int,void*>::all_gather(comm, count(in), counts);
 
-      std::vector<int>  offsets(comm.size(), 0);
-      for (unsigned i = 1; i < offsets.size(); ++i)
-        offsets[i] = offsets[i-1] + counts[i-1];
+      std::vector<int>  offsets(counts.size());
+      offsets[0] = 0;
+      std::partial_sum(counts.begin(), counts.end() - 1, offsets.begin() + 1);
 
       int elem_size = count(in[0]);     // size of 1 vector element in units of mpi datatype
       std::vector<T> buffer((offsets.back() + counts.back()) / elem_size);
-      MPI_Allgatherv(address(in), count(in), datatype(in),
-                     address(buffer),
-                     &counts[0],
-                     &offsets[0],
-                     datatype(buffer),
-                     comm);
+      detail::all_gather_v(comm,
+                           address(in), count(in), datatype_of(in),
+                           address(buffer),
+                           &counts[0],
+                           &offsets[0]);
 
-      out.resize(comm.size());
-      size_t cur = 0;
-      for (int i = 0; i < comm.size(); ++i)
+      out.resize(static_cast<size_t>(comm.size()));
+      size_t offset = 0;
+      for (size_t i = 0; i < out.size(); ++i)
       {
-          out[i].reserve(counts[i] / elem_size);
-          for (int j = 0; j < (int)(counts[i] / elem_size); ++j)
-              out[i].push_back(buffer[cur++]);
+          auto count = static_cast<size_t>(counts[i] / elem_size);
+          out[i].insert(out[i].end(), buffer.data() + offset, buffer.data() + offset + count);
+          offset += count;
       }
-#else
-      DIY_UNUSED(comm);
-      out.resize(1);
-      out[0] = in;
-#endif
     }
 
     static void reduce(const communicator& comm, const T& in, T& out, int root, const Op&)
     {
-#ifndef VTKM_DIY_NO_MPI
-      MPI_Reduce(address(in), address(out), count(in), datatype(in),
-                 detail::mpi_op<Op>::get(),
-                 root, comm);
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(root);
-      out = in;
-#endif
+      auto op = detail::mpi_op<Op>::get();
+      detail::reduce(comm, address(in), count(in), datatype_of(in), address(out), op, root);
     }
 
     static void reduce(const communicator& comm, const T& in, int root, const Op&)
     {
-#ifndef VTKM_DIY_NO_MPI
-      MPI_Reduce(address(in), address(in), count(in), datatype(in),
-                 detail::mpi_op<Op>::get(),
-                 root, comm);
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(in);
-      DIY_UNUSED(root);
-      DIY_UNSUPPORTED_MPI_CALL("MPI_Reduce");
-#endif
+      auto op = detail::mpi_op<Op>::get();
+      detail::reduce(comm, address(in), count(in), datatype_of(in), address(in), op, root);
     }
 
     static void all_reduce(const communicator& comm, const T& in, T& out, const Op&)
     {
-#ifndef VTKM_DIY_NO_MPI
-      MPI_Allreduce(address(in), address(out), count(in), datatype(in),
-                    detail::mpi_op<Op>::get(),
-                    comm);
-#else
-      DIY_UNUSED(comm);
-      out = in;
-#endif
+      auto op = detail::mpi_op<Op>::get();
+      detail::all_reduce(comm, address(in), address(out), count(in), datatype_of(in), op);
     }
 
     static void all_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op&)
     {
-#ifndef VTKM_DIY_NO_MPI
+      auto op = detail::mpi_op<Op>::get();
       out.resize(in.size());
-      MPI_Allreduce(address(in), address(out), count(in),
-                    datatype(in),
-                    detail::mpi_op<Op>::get(),
-                    comm);
-#else
-      DIY_UNUSED(comm);
-      out = in;
-#endif
+      detail::all_reduce(comm, address(in), address(out), count(in), datatype_of(in), op);
+    }
+
+    static request iall_reduce(const communicator& comm, const T& in, T& out, const Op&)
+    {
+      auto op = detail::mpi_op<Op>::get();
+      return detail::iall_reduce(comm, address(in), address(out), count(in), datatype_of(in), op);
+    }
+
+    static request iall_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op&)
+    {
+      auto op = detail::mpi_op<Op>::get();
+      out.resize(in.size());
+      return detail::iall_reduce(comm, address(in), address(out), count(in), datatype_of(in), op);
     }
 
     static void scan(const communicator& comm, const T& in, T& out, const Op&)
     {
-#ifndef VTKM_DIY_NO_MPI
-      MPI_Scan(address(in), address(out), count(in), datatype(in),
-               detail::mpi_op<Op>::get(),
-               comm);
-#else
-      DIY_UNUSED(comm);
-      out = in;
-#endif
+      auto op = detail::mpi_op<Op>::get();
+      detail::scan(comm, address(in), address(out), count(in), datatype_of(in), op);
     }
 
     static void all_to_all(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, int n = 1)
     {
-#ifndef VTKM_DIY_NO_MPI
       // n specifies how many elements go to/from every process from every process;
       // the sizes of in and out are expected to be n * comm.size()
 
       int elem_size = count(in[0]);               // size of 1 vector element in units of mpi datatype
       // NB: this will fail if T is a vector
-      MPI_Alltoall(address(in),
-                   elem_size * n,
-                   datatype(in),
-                   address(out),
-                   elem_size * n,
-                   datatype(out),
-                   comm);
-#else
-      DIY_UNUSED(comm);
-      DIY_UNUSED(n);
-      out = in;
-#endif
+      detail::all_to_all(comm, address(in), elem_size * n, datatype_of(in), address(out));
     }
   };
 
+  //! iBarrier; standalone function version for completeness
+  inline request   ibarrier(const communicator& comm)
+  {
+    return comm.ibarrier();
+  }
+
   //! Broadcast to all processes in `comm`.
   template<class T>
+  inline
   void      broadcast(const communicator& comm, T& x, int root)
   {
     Collectives<T,void*>::broadcast(comm, x, root);
@@ -275,6 +265,7 @@ namespace mpi
 
   //! Broadcast for vectors
   template<class T>
+  inline
   void      broadcast(const communicator& comm, std::vector<T>& x, int root)
   {
     Collectives<T,void*>::broadcast(comm, x, root);
@@ -291,6 +282,7 @@ namespace mpi
   //!  On `root` process, `out` is resized to `comm.size()` and filled with
   //! elements from the respective ranks.
   template<class T>
+  inline
   void      gather(const communicator& comm, const T& in, std::vector<T>& out, int root)
   {
     Collectives<T,void*>::gather(comm, in, out, root);
@@ -298,6 +290,7 @@ namespace mpi
 
   //! Same as above, but for vectors.
   template<class T>
+  inline
   void      gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out, int root)
   {
     Collectives<T,void*>::gather(comm, in, out, root);
@@ -305,6 +298,7 @@ namespace mpi
 
   //! Simplified version (without `out`) for use on non-root processes.
   template<class T>
+  inline
   void      gather(const communicator& comm, const T& in, int root)
   {
     Collectives<T,void*>::gather(comm, in, root);
@@ -312,6 +306,7 @@ namespace mpi
 
   //! Simplified version (without `out`) for use on non-root processes.
   template<class T>
+  inline
   void      gather(const communicator& comm, const std::vector<T>& in, int root)
   {
     Collectives<T,void*>::gather(comm, in, root);
@@ -321,6 +316,7 @@ namespace mpi
   //! `out` is resized to `comm.size()` and filled with
   //! elements from the respective ranks.
   template<class T>
+  inline
   void      all_gather(const communicator& comm, const T& in, std::vector<T>& out)
   {
     Collectives<T,void*>::all_gather(comm, in, out);
@@ -328,6 +324,7 @@ namespace mpi
 
   //! Same as above, but for vectors.
   template<class T>
+  inline
   void      all_gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out)
   {
     Collectives<T,void*>::all_gather(comm, in, out);
@@ -335,6 +332,7 @@ namespace mpi
 
   //! reduce
   template<class T, class Op>
+  inline
   void      reduce(const communicator& comm, const T& in, T& out, int root, const Op& op)
   {
     Collectives<T, Op>::reduce(comm, in, out, root, op);
@@ -342,6 +340,7 @@ namespace mpi
 
   //! Simplified version (without `out`) for use on non-root processes.
   template<class T, class Op>
+  inline
   void      reduce(const communicator& comm, const T& in, int root, const Op& op)
   {
     Collectives<T, Op>::reduce(comm, in, root, op);
@@ -349,6 +348,7 @@ namespace mpi
 
   //! all_reduce
   template<class T, class Op>
+  inline
   void      all_reduce(const communicator& comm, const T& in, T& out, const Op& op)
   {
     Collectives<T, Op>::all_reduce(comm, in, out, op);
@@ -356,13 +356,32 @@ namespace mpi
 
   //! Same as above, but for vectors.
   template<class T, class Op>
+  inline
   void      all_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op& op)
   {
     Collectives<T, Op>::all_reduce(comm, in, out, op);
   }
 
+  //! iall_reduce
+  template<class T, class Op>
+  inline
+  request   iall_reduce(const communicator& comm, const T& in, T& out, const Op& op)
+  {
+    return Collectives<T, Op>::iall_reduce(comm, in, out, op);
+  }
+
+  //! Same as above, but for vectors.
+  template<class T, class Op>
+  inline
+  request   iall_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op& op)
+  {
+    return Collectives<T, Op>::iall_reduce(comm, in, out, op);
+  }
+
+
   //! scan
   template<class T, class Op>
+  inline
   void      scan(const communicator& comm, const T& in, T& out, const Op& op)
   {
     Collectives<T, Op>::scan(comm, in, out, op);
@@ -370,6 +389,7 @@ namespace mpi
 
   //! all_to_all
   template<class T>
+  inline
   void      all_to_all(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, int n = 1)
   {
     Collectives<T, void*>::all_to_all(comm, in, out, n);
@@ -378,3 +398,9 @@ namespace mpi
   //!@}
 }
 }
+
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "collectives.cpp"
+#endif
+
+#endif // VTKMDIY_MPI_COLLECTIVES_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/communicator.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/communicator.cpp
new file mode 100644
index 000000000..e415a1c78
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/communicator.cpp
@@ -0,0 +1,130 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "communicator.hpp"
+#endif
+
+diy::mpi::communicator::communicator()
+  : comm_(make_DIY_MPI_Comm(MPI_COMM_WORLD)), rank_(0), size_(1), owner_(false)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Comm_rank(mpi_cast(comm_), &rank_);
+  MPI_Comm_size(mpi_cast(comm_), &size_);
+#endif
+}
+
+diy::mpi::communicator::
+communicator(DIY_MPI_Comm comm, bool owner):
+    comm_(comm), rank_(0), size_(1), owner_(owner)
+{
+#if VTKMDIY_HAS_MPI
+  if (mpi_cast(comm_) != MPI_COMM_NULL)
+  {
+    MPI_Comm_rank(mpi_cast(comm_), &rank_);
+    MPI_Comm_size(mpi_cast(comm_), &size_);
+  }
+#endif
+}
+
+#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
+diy::mpi::communicator::
+communicator(MPI_Comm comm, bool owner):
+    comm_(comm), rank_(0), size_(1), owner_(owner)
+{
+#if VTKMDIY_HAS_MPI
+  if (comm_ != MPI_COMM_NULL)
+  {
+    MPI_Comm_rank(comm_, &rank_);
+    MPI_Comm_size(comm_, &size_);
+  }
+#endif
+}
+#endif
+
+void
+diy::mpi::communicator::
+destroy()
+{
+#if VTKMDIY_HAS_MPI
+    if (owner_)
+        MPI_Comm_free(&mpi_cast(comm_));
+#endif
+}
+
+diy::mpi::status
+diy::mpi::communicator::
+probe(int source, int tag) const
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  MPI_Probe(source, tag, mpi_cast(comm_), &mpi_cast(s.handle));
+  return s;
+#else
+  (void) source; (void) tag;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Probe);
+#endif
+}
+
+diy::mpi::optional<diy::mpi::status>
+diy::mpi::communicator::
+iprobe(int source, int tag) const
+{
+  (void) source; (void) tag;
+#if VTKMDIY_HAS_MPI
+  status s;
+  int flag;
+  MPI_Iprobe(source, tag, mpi_cast(comm_), &flag, &mpi_cast(s.handle));
+  if (flag)
+    return s;
+#endif
+  return optional<status>();
+}
+
+void
+diy::mpi::communicator::
+barrier() const
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Barrier(mpi_cast(comm_));
+#endif
+}
+
+diy::mpi::communicator
+diy::mpi::communicator::
+split(int color, int key) const
+{
+#if VTKMDIY_HAS_MPI
+    DIY_MPI_Comm newcomm;
+    MPI_Comm_split(mpi_cast(comm_), color, key, &mpi_cast(newcomm));
+    return communicator(newcomm, true);
+#else
+    (void) color; (void) key;
+    return communicator();
+#endif
+}
+
+diy::mpi::request
+diy::mpi::communicator::
+ibarrier() const
+{
+#if VTKMDIY_HAS_MPI
+    request r;
+    MPI_Ibarrier(mpi_cast(comm_), &mpi_cast(r.handle));
+    return r;
+#else
+    // this is not the ideal fix; in principle we should just return a status
+    // that tests true, but this requires redesigning some parts of our no-mpi
+    // handling
+    VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Ibarrier);
+#endif
+}
+
+void
+diy::mpi::communicator::
+duplicate(const communicator& other)
+{
+#if VTKMDIY_HAS_MPI
+    DIY_MPI_Comm newcomm;
+    MPI_Comm_dup(mpi_cast(other.comm_), &mpi_cast(newcomm));
+    (*this) = communicator(newcomm,true);
+#endif
+    (void) other;
+}
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/communicator.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/communicator.hpp
index a99b20eee..80550776b 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/communicator.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/communicator.hpp
@@ -1,3 +1,12 @@
+#ifndef VTKMDIY_MPI_COMMUNICATOR_HPP
+#define VTKMDIY_MPI_COMMUNICATOR_HPP
+
+#include "config.hpp"
+#include "optional.hpp"
+#include "point-to-point.hpp"
+#include "request.hpp"
+#include "status.hpp"
+
 namespace diy
 {
 namespace mpi
@@ -8,8 +17,14 @@ namespace mpi
   class communicator
   {
     public:
-                inline
-                communicator(MPI_Comm comm = MPI_COMM_WORLD, bool owner = false);
+                VTKMDIY_MPI_EXPORT_FUNCTION
+                communicator();
+
+                communicator(DIY_MPI_Comm comm):
+                  communicator(comm, false) {}
+
+                VTKMDIY_MPI_EXPORT_FUNCTION
+                communicator(DIY_MPI_Comm comm, bool owner);
 
                 ~communicator()                     { destroy(); }
 
@@ -25,9 +40,19 @@ namespace mpi
                     size_(other.size_),
                     owner_(other.owner_)                    { other.owner_ = false; }
 
-    communicator&
+#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
+                communicator(MPI_Comm comm):
+                  communicator(comm, false) {}
+
+                VTKMDIY_MPI_EXPORT_FUNCTION
+                communicator(MPI_Comm comm, bool owner);
+
+                operator MPI_Comm() { return comm_; }
+#endif
+
+      communicator&
                 operator=(const communicator& other)        { destroy(); comm_ = other.comm_; rank_ = other.rank_; size_ = other.size_; owner_ = false; return *this; }
-    communicator&
+      communicator&
                 operator=(communicator&& other)             { destroy(); comm_ = other.comm_; rank_ = other.rank_; size_ = other.size_; owner_ = other.owner_; other.owner_ = false; return *this; }
 
       int       rank() const                        { return rank_; }
@@ -35,193 +60,71 @@ namespace mpi
 
       //! Send `x` to processor `dest` using `tag` (blocking).
       template<class T>
-      void      send(int dest, int tag, const T& x) const   { detail::send<T>()(comm_, dest, tag, x); }
+      void      send(int dest, int tag, const T& x) const   { detail::send(comm_, dest, tag, x); }
 
       //! Receive `x` from `dest` using `tag` (blocking).
       //! If `T` is an `std::vector<...>`, `recv` will resize it to fit exactly the sent number of values.
-      template <class T>
-      status recv(int source, int tag, T &x) const
-      {
-#if defined(VTKM_DIY_NO_MPI) && defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ < 8 // CUDA 7.5 workaround
-        (void) source; (void)tag; (void)x;
-        DIY_UNSUPPORTED_MPI_CALL(MPI_Recv);
-#else
-        return detail::recv<T>{}(comm_, source, tag, x);
-#endif
-      }
+      template<class T>
+      status    recv(int source, int tag, T& x) const       { return detail::recv(comm_, source, tag, x); }
 
       //! Non-blocking version of `send()`.
-      template <class T>
-      request isend(int dest, int tag, const T &x) const
-      {
-#if defined(VTKM_DIY_NO_MPI) && defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ < 8 // CUDA 7.5 workaround
-        (void) dest; (void)tag; (void)x;
-        DIY_UNSUPPORTED_MPI_CALL(MPI_Send);
-#else
-        return detail::isend<T>{}(comm_, dest, tag, x);
-#endif
-      }
+      template<class T>
+      request   isend(int dest, int tag, const T& x) const  { return detail::isend(comm_, dest, tag, x); }
 
       //! Non-blocking version of `ssend()`.
       template<class T>
-      request   issend(int dest, int tag, const T& x) const  { return detail::issend<T>()(comm_, dest, tag, x); }
+      request   issend(int dest, int tag, const T& x) const  { return detail::issend(comm_, dest, tag, x); }
 
       //! Non-blocking version of `recv()`.
       //! If `T` is an `std::vector<...>`, its size must be big enough to accommodate the sent values.
-      template <class T>
-      request irecv(int source, int tag, T &x) const
-      {
-#if defined(VTKM_DIY_NO_MPI) && defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ < 8 // CUDA 7.5 workaround
-        (void)source; (void)tag; (void)x;
-        DIY_UNSUPPORTED_MPI_CALL(MPI_Irecv);
-#else
-        return detail::irecv<T>()(comm_, source, tag, x);
-#endif
-      }
+      template<class T>
+      request   irecv(int source, int tag, T& x) const      { return detail::irecv(comm_, source, tag, x); }
 
       //! probe
-      inline
+      VTKMDIY_MPI_EXPORT_FUNCTION
       status    probe(int source, int tag) const;
 
       //! iprobe
-      inline
+      VTKMDIY_MPI_EXPORT_FUNCTION
       optional<status>
                 iprobe(int source, int tag) const;
 
       //! barrier
-      inline
+      VTKMDIY_MPI_EXPORT_FUNCTION
       void      barrier() const;
 
       //! Nonblocking version of barrier
-      inline
+      VTKMDIY_MPI_EXPORT_FUNCTION
       request   ibarrier() const;
 
-                operator MPI_Comm() const                   { return comm_; }
-
       //! split
       //! When keys are the same, the ties are broken by the rank in the original comm.
-      inline
+      VTKMDIY_MPI_EXPORT_FUNCTION
       communicator
                 split(int color, int key = 0) const;
 
       //! duplicate
-      inline
+      VTKMDIY_MPI_EXPORT_FUNCTION
       void      duplicate(const communicator& other);
 
+      DIY_MPI_Comm handle() const { return comm_; }
+
     private:
-      inline
+      VTKMDIY_MPI_EXPORT_FUNCTION
       void      destroy();
 
     private:
-      MPI_Comm  comm_;
-      int       rank_;
-      int       size_;
-      bool      owner_;
+      DIY_MPI_Comm  comm_;
+      int           rank_;
+      int           size_;
+      bool          owner_;
   };
-}
-}
 
-diy::mpi::communicator::
-communicator(MPI_Comm comm, bool owner):
-    comm_(comm), rank_(0), size_(1), owner_(owner)
-{
-#ifndef VTKM_DIY_NO_MPI
-  if (comm != MPI_COMM_NULL)
-  {
-    MPI_Comm_rank(comm_, &rank_);
-    MPI_Comm_size(comm_, &size_);
-  }
+}
+} // diy::mpi
+
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "communicator.cpp"
 #endif
-}
 
-void
-diy::mpi::communicator::
-destroy()
-{
-#ifndef VTKM_DIY_NO_MPI
-    if (owner_)
-        MPI_Comm_free(&comm_);
-#endif
-}
-
-diy::mpi::status
-diy::mpi::communicator::
-probe(int source, int tag) const
-{
-  (void) source;
-  (void) tag;
-
-#ifndef VTKM_DIY_NO_MPI
-  status s;
-  MPI_Probe(source, tag, comm_, &s.s);
-  return s;
-#else
-  DIY_UNSUPPORTED_MPI_CALL(MPI_Probe);
-#endif
-}
-
-diy::mpi::optional<diy::mpi::status>
-diy::mpi::communicator::
-iprobe(int source, int tag) const
-{
-  (void) source;
-  (void) tag;
-#ifndef VTKM_DIY_NO_MPI
-  status s;
-  int flag;
-  MPI_Iprobe(source, tag, comm_, &flag, &s.s);
-  if (flag)
-    return s;
-#endif
-  return optional<status>();
-}
-
-void
-diy::mpi::communicator::
-barrier() const
-{
-#ifndef VTKM_DIY_NO_MPI
-  MPI_Barrier(comm_);
-#endif
-}
-
-diy::mpi::communicator
-diy::mpi::communicator::
-split(int color, int key) const
-{
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Comm newcomm;
-    MPI_Comm_split(comm_, color, key, &newcomm);
-    return communicator(newcomm, true);
-#else
-    return communicator();
-#endif
-}
-
-diy::mpi::request
-diy::mpi::communicator::
-ibarrier() const
-{
-#ifndef VTKM_DIY_NO_MPI
-    request r_;
-    MPI_Ibarrier(comm_, &r_.r);
-    return r_;
-#else
-    // this is not the ideal fix; in principle we should just return a status
-    // that tests true, but this requires redesigning some parts of our no-mpi
-    // handling
-    DIY_UNSUPPORTED_MPI_CALL(MPI_Ibarrier);
-#endif
-}
-
-
-void
-diy::mpi::communicator::
-duplicate(const communicator& other)
-{
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Comm newcomm;
-    MPI_Comm_dup(other.comm_, &newcomm);
-    (*this) = communicator(newcomm,true);
-#endif
-}
+#endif // VTKMDIY_MPI_COMMUNICATOR_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/config.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/config.hpp
new file mode 100644
index 000000000..b8a6643af
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/config.hpp
@@ -0,0 +1,70 @@
+#ifndef VTKMDIY_MPI_CONFIG_HPP
+#define VTKMDIY_MPI_CONFIG_HPP
+
+/// We want to allow the use of `diy::mpi` in either header-only or library mode.
+/// VTKMDIY_MPI_AS_LIB is defined when using library mode.
+/// This file contains some configuration macros. To maintain backwards compatibility
+/// suitable default values should be defined when using header-only mode.
+
+/// VTKMDIY_HAS_MPI should always be defined when VTKMDIY_MPI_AS_LIB is defined, but only for
+/// the compilation units that are part of the library.
+/// VTKMDIY_HAS_MPI=1 means MPI library is availalbe.
+/// For header-only, the default is to assume MPI is available
+#if !defined(VTKMDIY_MPI_AS_LIB) && !defined(VTKMDIY_HAS_MPI)
+#  define VTKMDIY_HAS_MPI 1
+#endif
+
+/// Include appropriate mpi header. Since VTKMDIY_HAS_MPI is only defined for
+/// the compilation units of the library, when in library mode, the header is
+/// only included for the library's compilation units.
+#ifdef VTKMDIY_HAS_MPI
+#  if VTKMDIY_HAS_MPI
+#    include <mpi.h>
+#  else
+#    include "no-mpi.hpp"
+#  endif
+#endif
+
+/// Classes and objects that need to be visible to clients of the library should be
+/// marked as VTKMDIY_MPI_EXPORT. Similarly API functions should be marked as
+/// VTKMDIY_MPI_EXPORT_FUNCTION.
+#include "diy-mpi-export.h" // defines VTKMDIY_MPI_EXPORT and VTKMDIY_MPI_EXPORT_FUNCTION
+
+/// Define alisases for MPI types
+#ifdef VTKMDIY_MPI_AS_LIB
+#  include "mpitypes.hpp" // only configured in library mode
+#else // ifdef VTKMDIY_MPI_AS_LIB
+
+namespace diy
+{
+namespace mpi
+{
+
+#define DEFINE_DIY_MPI_TYPE(mpitype)                                          \
+struct DIY_##mpitype {                                                        \
+  DIY_##mpitype() = default;                                                  \
+  DIY_##mpitype(const mpitype& obj) : data(obj) {}                            \
+  DIY_##mpitype& operator=(const mpitype& obj) { data = obj; return *this; }  \
+  operator mpitype() { return data; }                                         \
+  mpitype data;                                                               \
+};
+
+DEFINE_DIY_MPI_TYPE(MPI_Comm)
+DEFINE_DIY_MPI_TYPE(MPI_Datatype)
+DEFINE_DIY_MPI_TYPE(MPI_Status)
+DEFINE_DIY_MPI_TYPE(MPI_Request)
+DEFINE_DIY_MPI_TYPE(MPI_Op)
+DEFINE_DIY_MPI_TYPE(MPI_File)
+DEFINE_DIY_MPI_TYPE(MPI_Win)
+
+#undef DEFINE_DIY_MPI_TYPE
+
+}
+} // diy::mpi
+#endif // ifdef VTKMDIY_MPI_AS_LIB
+
+#ifdef VTKMDIY_HAS_MPI
+#  include "mpi_cast.hpp"
+#endif
+
+#endif // VTKMDIY_MPI_CONFIG_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/constants.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/constants.hpp
deleted file mode 100644
index 5bb5ec4cc..000000000
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/constants.hpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef VTKMDIY_MPI_CONSTANTS_HPP
-#define VTKMDIY_MPI_CONSTANTS_HPP
-
-namespace diy
-{
-namespace mpi
-{
-  const int any_source  = MPI_ANY_SOURCE;
-  const int any_tag     = MPI_ANY_TAG;
-}
-}
-
-#endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/datatypes.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/datatypes.cpp
new file mode 100644
index 000000000..807b9731f
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/datatypes.cpp
@@ -0,0 +1,34 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "datatypes.hpp"
+#endif
+
+namespace diy
+{
+namespace mpi
+{
+
+namespace detail
+{
+
+#define DIY_MPI_DATATYPE_MAP(cpp_type, mpi_type)                                                  \
+  template<>  datatype get_mpi_datatype<cpp_type>() {                                             \
+    return datatype(make_DIY_MPI_Datatype(mpi_type));                                             \
+  }
+
+  DIY_MPI_DATATYPE_MAP(char,                MPI_BYTE)
+  DIY_MPI_DATATYPE_MAP(unsigned char,       MPI_BYTE)
+  DIY_MPI_DATATYPE_MAP(bool,                MPI_BYTE)
+  DIY_MPI_DATATYPE_MAP(int,                 MPI_INT)
+  DIY_MPI_DATATYPE_MAP(unsigned,            MPI_UNSIGNED)
+  DIY_MPI_DATATYPE_MAP(long,                MPI_LONG)
+  DIY_MPI_DATATYPE_MAP(unsigned long,       MPI_UNSIGNED_LONG)
+  DIY_MPI_DATATYPE_MAP(long long,           MPI_LONG_LONG_INT)
+  DIY_MPI_DATATYPE_MAP(unsigned long long,  MPI_UNSIGNED_LONG_LONG)
+  DIY_MPI_DATATYPE_MAP(float,               MPI_FLOAT)
+  DIY_MPI_DATATYPE_MAP(double,              MPI_DOUBLE)
+
+#undef DIY_MPI_DATATYPE_MAP
+
+}
+}
+} // diy::mpi::detail
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/datatypes.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/datatypes.hpp
index 1a1d45fba..24d4b3275 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/datatypes.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/datatypes.hpp
@@ -1,16 +1,31 @@
 #ifndef VTKMDIY_MPI_DATATYPES_HPP
 #define VTKMDIY_MPI_DATATYPES_HPP
 
+#include "config.hpp"
+
 #include <vector>
+#include <array>
 
 namespace diy
 {
 namespace mpi
 {
+
+struct datatype
+{
+  datatype() = default;
+  datatype(const DIY_MPI_Datatype& dt) : handle(dt) {}
+
+#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
+  datatype(const MPI_Datatype& dt) : handle(dt) {}
+  operator MPI_Datatype() { return handle; }
+#endif
+
+  DIY_MPI_Datatype handle;
+};
+
 namespace detail
 {
-  template<class T> MPI_Datatype  get_mpi_datatype();
-
   struct true_type  {};
   struct false_type {};
 
@@ -18,28 +33,34 @@ namespace detail
   template<class T>
   struct is_mpi_datatype        { typedef false_type    type; };
 
-#define VTKMDIY_MPI_DATATYPE_MAP(cpp_type, mpi_type) \
-  template<>  inline MPI_Datatype  get_mpi_datatype<cpp_type>() { return mpi_type; }  \
-  template<>  struct is_mpi_datatype<cpp_type>                  { typedef true_type type; };    \
-  template<>  struct is_mpi_datatype< std::vector<cpp_type> >   { typedef true_type type; };
+  template<class T> datatype  get_mpi_datatype();
 
-  VTKMDIY_MPI_DATATYPE_MAP(char,                  MPI_BYTE);
-  VTKMDIY_MPI_DATATYPE_MAP(unsigned char,         MPI_BYTE);
-  VTKMDIY_MPI_DATATYPE_MAP(bool,                  MPI_BYTE);
-  VTKMDIY_MPI_DATATYPE_MAP(int,                   MPI_INT);
-  VTKMDIY_MPI_DATATYPE_MAP(unsigned,              MPI_UNSIGNED);
-  VTKMDIY_MPI_DATATYPE_MAP(long,                  MPI_LONG);
-  VTKMDIY_MPI_DATATYPE_MAP(unsigned long,         MPI_UNSIGNED_LONG);
-  VTKMDIY_MPI_DATATYPE_MAP(long long,             MPI_LONG_LONG_INT);
-  VTKMDIY_MPI_DATATYPE_MAP(unsigned long long,    MPI_UNSIGNED_LONG_LONG);
-  VTKMDIY_MPI_DATATYPE_MAP(float,                 MPI_FLOAT);
-  VTKMDIY_MPI_DATATYPE_MAP(double,                MPI_DOUBLE);
+  #define VTKMDIY_MPI_DATATYPE_DEFAULT(cpp_type)                                                      \
+  template<> VTKMDIY_MPI_EXPORT_FUNCTION datatype get_mpi_datatype<cpp_type>();                       \
+  template<>  struct is_mpi_datatype< cpp_type >                { typedef true_type type; };      \
+  template<>  struct is_mpi_datatype< std::vector<cpp_type> >   { typedef true_type type; };      \
+  template<size_t N>                                                                              \
+              struct is_mpi_datatype< std::array<cpp_type, N> > { typedef true_type type; };
 
-  /* mpi_datatype: helper routines, specialized for std::vector<...> */
+  VTKMDIY_MPI_DATATYPE_DEFAULT(char)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(unsigned char)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(bool)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(int)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(unsigned)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(long)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(unsigned long)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(long long)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(unsigned long long)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(float)
+  VTKMDIY_MPI_DATATYPE_DEFAULT(double)
+
+  #undef VTKMDIY_MPI_DATATYPE_DEFAULT
+
+  /* mpi_datatype: helper routines, specialized for std::vector<...>, std::array<...> */
   template<class T>
   struct mpi_datatype
   {
-    static MPI_Datatype         datatype()              { return get_mpi_datatype<T>(); }
+    static diy::mpi::datatype   datatype()              { return get_mpi_datatype<T>(); }
     static const void*          address(const T& x)     { return &x; }
     static void*                address(T& x)           { return &x; }
     static int                  count(const T&)         { return 1; }
@@ -50,44 +71,53 @@ namespace detail
   {
     typedef     std::vector<U>      VecU;
 
-    static MPI_Datatype         datatype()              { return mpi_datatype<U>::datatype(); }
+    static diy::mpi::datatype   datatype()              { return mpi_datatype<U>::datatype(); }
     static const void*          address(const VecU& x)  { return x.data(); }
     static void*                address(VecU& x)        { return x.data(); }
     static int                  count(const VecU& x)    { return x.empty() ? 0 : (static_cast<int>(x.size()) * mpi_datatype<U>::count(x[0])); }
   };
+
+  template<class U, size_t D>
+  struct mpi_datatype< std::array<U,D> >
+  {
+    typedef     std::array<U,D> ArrayU;
+
+    static diy::mpi::datatype   datatype()                  { return mpi_datatype<U>::datatype(); }
+    static const void*          address(const ArrayU& x)    { return x.data(); }
+    static void*                address(ArrayU& x)          { return x.data(); }
+    static int                  count(const ArrayU& x)      { return x.empty() ? 0 : (static_cast<int>(x.size()) * mpi_datatype<U>::count(x[0])); }
+  };
 } // detail
 
 template<class U>
-static MPI_Datatype datatype(const U&)
+static datatype datatype_of(const U&)
 {
-    using Datatype = detail::mpi_datatype<U>;
-    return Datatype::datatype();
+    return detail::mpi_datatype<U>::datatype();
 }
 
 template<class U>
 static void* address(const U& x)
 {
-    using Datatype = detail::mpi_datatype<U>;
-    return const_cast<void*>(Datatype::address(x));
+    return const_cast<void*>(detail::mpi_datatype<U>::address(x));
 }
 
 template<class U>
 static void* address(U& x)
 {
-    using Datatype = detail::mpi_datatype<U>;
-    return Datatype::address(x);
+    return detail::mpi_datatype<U>::address(x);
 }
 
 template<class U>
 static int count(const U& x)
 {
-    using Datatype = detail::mpi_datatype<U>;
-    return Datatype::count(x);
+    return detail::mpi_datatype<U>::count(x);
 }
 
-
-
 } // mpi
 } // diy
 
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "datatypes.cpp"
 #endif
+
+#endif // VTKMDIY_MPI_DATATYPES_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/diy-mpi-export.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/diy-mpi-export.h
new file mode 100644
index 000000000..c3d3336df
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/diy-mpi-export.h
@@ -0,0 +1,49 @@
+#ifndef VTKMDIY_MPI_EXPORT_H
+#define VTKMDIY_MPI_EXPORT_H
+
+#if defined(_MSC_VER)
+#  ifdef VTKMDIY_MPI_STATIC_BUILD
+     /* This is a static component and has no need for exports
+        elf based static libraries are able to have hidden/default visibility
+        controls on symbols so we should propagate this information in that
+        use case
+     */
+#    define VTKMDIY_MPI_EXPORT_DEFINE
+#    define VTKMDIY_MPI_IMPORT_DEFINE
+#    define VTKMDIY_MPI_NO_EXPORT_DEFINE
+#  else
+#    define VTKMDIY_MPI_EXPORT_DEFINE __declspec(dllexport)
+#    define VTKMDIY_MPI_IMPORT_DEFINE __declspec(dllimport)
+#    define VTKMDIY_MPI_NO_EXPORT_DEFINE
+#  endif
+#else
+#  define VTKMDIY_MPI_EXPORT_DEFINE __attribute__((visibility("default")))
+#  define VTKMDIY_MPI_IMPORT_DEFINE __attribute__((visibility("default")))
+#  define VTKMDIY_MPI_NO_EXPORT_DEFINE __attribute__((visibility("hidden")))
+#endif
+
+#ifndef VTKMDIY_MPI_EXPORT
+#  if !defined(VTKMDIY_MPI_AS_LIB)
+#    define VTKMDIY_MPI_EXPORT
+#    define VTKMDIY_MPI_EXPORT_FUNCTION inline
+#  else
+#    if defined(VTKMDIY_HAS_MPI)
+       /* We are building this library */
+#      define VTKMDIY_MPI_EXPORT VTKMDIY_MPI_EXPORT_DEFINE
+#    else
+       /* We are using this library */
+#      define VTKMDIY_MPI_EXPORT VTKMDIY_MPI_IMPORT_DEFINE
+#    endif
+#    define VTKMDIY_MPI_EXPORT_FUNCTION VTKMDIY_MPI_EXPORT
+#  endif
+#endif
+
+#ifndef VTKMDIY_MPI_EXPORT_FUNCTION
+#error "VTKMDIY_MPI_EXPORT_FUNCTION not defined"
+#endif
+
+#ifndef VTKMDIY_MPI_NO_EXPORT
+#  define VTKMDIY_MPI_NO_EXPORT VTKMDIY_MPI_NO_EXPORT_DEFINE
+#endif
+
+#endif // VTKMDIY_MPI_EXPORT_H
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/environment.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/environment.cpp
new file mode 100644
index 000000000..c056dd89a
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/environment.cpp
@@ -0,0 +1,62 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "environment.hpp"
+#endif
+
+bool diy::mpi::environment::initialized()
+{
+#if VTKMDIY_HAS_MPI
+  int flag;
+  MPI_Initialized(&flag);
+  return flag != 0;
+#else
+  return true;
+#endif
+}
+
+diy::mpi::environment::environment()
+{
+#if VTKMDIY_HAS_MPI
+  int argc = 0; char** argv = nullptr;
+  MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided_threading);
+#else
+  provided_threading = MPI_THREAD_FUNNELED;
+#endif
+}
+
+diy::mpi::environment::environment(int requested_threading)
+{
+#if VTKMDIY_HAS_MPI
+  int argc = 0; char** argv = nullptr;
+  MPI_Init_thread(&argc, &argv, requested_threading, &provided_threading);
+#else
+  provided_threading = requested_threading;
+#endif
+}
+
+diy::mpi::environment::environment(int argc, char* argv[])
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided_threading);
+#else
+  (void) argc; (void) argv;
+  provided_threading = MPI_THREAD_FUNNELED;
+#endif
+}
+
+diy::mpi::environment::environment(int argc, char* argv[], int requested_threading)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Init_thread(&argc, &argv, requested_threading, &provided_threading);
+#else
+  (void) argc; (void) argv;
+  provided_threading = requested_threading;
+#endif
+}
+
+diy::mpi::environment::
+~environment()
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Finalize();
+#endif
+}
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/environment.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/environment.hpp
new file mode 100644
index 000000000..a6efe8523
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/environment.hpp
@@ -0,0 +1,35 @@
+#ifndef VTKMDIY_MPI_ENVIRONMENT_HPP
+#define VTKMDIY_MPI_ENVIRONMENT_HPP
+
+#include "config.hpp"
+
+namespace diy
+{
+namespace mpi
+{
+
+//! \ingroup MPI
+struct environment
+{
+  VTKMDIY_MPI_EXPORT_FUNCTION static bool initialized();
+
+  VTKMDIY_MPI_EXPORT_FUNCTION environment();
+  VTKMDIY_MPI_EXPORT_FUNCTION environment(int requested_threading);
+  VTKMDIY_MPI_EXPORT_FUNCTION environment(int argc, char* argv[]);
+  VTKMDIY_MPI_EXPORT_FUNCTION environment(int argc, char* argv[], int requested_threading);
+
+  VTKMDIY_MPI_EXPORT_FUNCTION  ~environment();
+
+  int   threading() const           { return provided_threading; }
+
+  int   provided_threading;
+};
+
+}
+} // diy::mpi
+
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "environment.cpp"
+#endif
+
+#endif // VTKMDIY_MPI_ENVIRONMENT_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/io.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/io.cpp
new file mode 100644
index 000000000..3afab6e85
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/io.cpp
@@ -0,0 +1,222 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "io.hpp"
+#endif
+
+#include "status.hpp"
+
+#ifdef VTKMDIY_MPI_AS_LIB
+const int diy::mpi::io::file::rdonly          = MPI_MODE_RDONLY;
+const int diy::mpi::io::file::rdwr            = MPI_MODE_RDWR;
+const int diy::mpi::io::file::wronly          = MPI_MODE_WRONLY;
+const int diy::mpi::io::file::create          = MPI_MODE_CREATE;
+const int diy::mpi::io::file::exclusive       = MPI_MODE_EXCL;
+const int diy::mpi::io::file::delete_on_close = MPI_MODE_DELETE_ON_CLOSE;
+const int diy::mpi::io::file::unique_open     = MPI_MODE_UNIQUE_OPEN;
+const int diy::mpi::io::file::sequential      = MPI_MODE_SEQUENTIAL;
+const int diy::mpi::io::file::append          = MPI_MODE_APPEND;
+#endif
+
+diy::mpi::io::file::
+file(const communicator& comm__, const std::string& filename, int mode)
+: comm_(comm__)
+{
+#if VTKMDIY_HAS_MPI
+  int ret = MPI_File_open(diy::mpi::mpi_cast(comm__.handle()), const_cast<char*>(filename.c_str()), mode, MPI_INFO_NULL, &diy::mpi::mpi_cast(fh));
+  if (ret)
+      throw std::runtime_error("DIY cannot open file: " + filename);
+#else
+  (void)comm__; (void)filename; (void)mode;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_open);
+#endif
+}
+
+void
+diy::mpi::io::file::
+close()
+{
+#if VTKMDIY_HAS_MPI
+  if (diy::mpi::mpi_cast(fh) != MPI_FILE_NULL)
+    MPI_File_close(&diy::mpi::mpi_cast(fh));
+#endif
+}
+
+diy::mpi::io::offset
+diy::mpi::io::file::
+size() const
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Offset sz;
+  MPI_File_get_size(diy::mpi::mpi_cast(fh), &sz);
+  return static_cast<offset>(sz);
+#else
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_get_size);
+#endif
+}
+
+void
+diy::mpi::io::file::
+resize(diy::mpi::io::offset size_)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_File_set_size(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(size_));
+#else
+  (void)size_;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_set_size);
+#endif
+}
+
+void
+diy::mpi::io::file::
+read_at(offset o, char* buffer, size_t size_)
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  MPI_File_read_at(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(o), buffer, static_cast<int>(size_), MPI_BYTE, &diy::mpi::mpi_cast(s.handle));
+#else
+  (void)o; (void)buffer; (void)size_;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_read_at);
+#endif
+}
+
+void
+diy::mpi::io::file::
+read_at_all(offset o, char* buffer, size_t size_)
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  MPI_File_read_at_all(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(o), buffer, static_cast<int>(size_), MPI_BYTE, &diy::mpi::mpi_cast(s.handle));
+#else
+  (void)o; (void)buffer; (void)size_;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_read_at_all);
+#endif
+}
+
+void
+diy::mpi::io::file::
+write_at(offset o, const char* buffer, size_t size_)
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  MPI_File_write_at(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(o), (void *)buffer, static_cast<int>(size_), MPI_BYTE, &diy::mpi::mpi_cast(s.handle));
+#else
+  (void)o; (void)buffer; (void)size_;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_write_at);
+#endif
+}
+
+void
+diy::mpi::io::file::
+write_at_all(offset o, const char* buffer, size_t size_)
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  MPI_File_write_at_all(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(o), (void *)buffer, static_cast<int>(size_), MPI_BYTE, &diy::mpi::mpi_cast(s.handle));
+#else
+  (void)o; (void)buffer; (void)size_;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_write_at_all);
+#endif
+}
+
+void
+diy::mpi::io::file::
+read_bov(const DiscreteBounds& bounds, int ndims, const int dims[], char* buffer, size_t offset, const datatype& dt, bool collective, int chunk)
+{
+#if VTKMDIY_HAS_MPI
+  int total = 1;
+  std::vector<int> subsizes;
+  for (unsigned i = 0; i < static_cast<unsigned>(ndims); ++i)
+  {
+    subsizes.push_back(bounds.max[i] - bounds.min[i] + 1);
+    total *= subsizes.back();
+  }
+
+  MPI_Datatype T_type;
+  if (chunk == 1)
+  {
+    T_type = diy::mpi::mpi_cast(dt.handle);
+  }
+  else
+  {
+    // create an MPI struct of size chunk to read the data in those chunks
+    // (this allows to work around MPI-IO weirdness where crucial quantities
+    // are ints, which are too narrow of a type)
+    int             array_of_blocklengths[]  = { chunk };
+    MPI_Aint        array_of_displacements[] = { 0 };
+    MPI_Datatype    array_of_types[]         = { diy::mpi::mpi_cast(dt.handle) };
+    MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements, array_of_types, &T_type);
+    MPI_Type_commit(&T_type);
+  }
+
+  MPI_Datatype fileblk;
+  MPI_Type_create_subarray(ndims, dims, subsizes.data(), (int*) &bounds.min[0], MPI_ORDER_C, T_type, &fileblk);
+  MPI_Type_commit(&fileblk);
+
+  MPI_File_set_view(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(offset), T_type, fileblk, (char*)"native", MPI_INFO_NULL);
+
+  mpi::status s;
+  if (!collective)
+    MPI_File_read(diy::mpi::mpi_cast(fh), buffer, total, T_type, &mpi_cast(s.handle));
+  else
+    MPI_File_read_all(diy::mpi::mpi_cast(fh), buffer, total, T_type, &mpi_cast(s.handle));
+
+  if (chunk != 1)
+    MPI_Type_free(&T_type);
+  MPI_Type_free(&fileblk);
+#else
+  (void) bounds; (void) ndims; (void) dims, (void) buffer; (void) offset, (void) dt, (void) collective; (void) chunk;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::io::file::read_bov);
+#endif
+}
+
+void
+diy::mpi::io::file::
+write_bov(const DiscreteBounds& bounds, const DiscreteBounds& core, int ndims, const int dims[], const char* buffer, size_t offset, const datatype& dt, bool collective, int chunk)
+{
+#if VTKMDIY_HAS_MPI
+  std::vector<int> subsizes;
+  std::vector<int> buffer_shape, buffer_start;
+  for (unsigned i = 0; i < static_cast<unsigned>(ndims); ++i)
+  {
+    buffer_shape.push_back(bounds.max[i] - bounds.min[i] + 1);
+    buffer_start.push_back(core.min[i] - bounds.min[i]);
+    subsizes.push_back(core.max[i] - core.min[i] + 1);
+  }
+
+  MPI_Datatype T_type;
+  if (chunk == 1)
+  {
+    T_type = diy::mpi::mpi_cast(dt.handle);
+  }
+  else
+  {
+    // assume T is a binary block and create an MPI struct of appropriate size
+    int             array_of_blocklengths[]  = { chunk };
+    MPI_Aint        array_of_displacements[] = { 0 };
+    MPI_Datatype    array_of_types[]         = { diy::mpi::mpi_cast(dt.handle) };
+    MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements, array_of_types, &T_type);
+    MPI_Type_commit(&T_type);
+  }
+
+  MPI_Datatype fileblk, subbuffer;
+  MPI_Type_create_subarray(ndims, dims, subsizes.data(), (int*) &core.min[0], MPI_ORDER_C, T_type, &fileblk);
+  MPI_Type_create_subarray(ndims, buffer_shape.data(), subsizes.data(), buffer_start.data(), MPI_ORDER_C, T_type, &subbuffer);
+  MPI_Type_commit(&fileblk);
+  MPI_Type_commit(&subbuffer);
+
+  MPI_File_set_view(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(offset), T_type, fileblk, (char*)"native", MPI_INFO_NULL);
+
+  mpi::status s;
+  if (!collective)
+    MPI_File_write(diy::mpi::mpi_cast(fh), (void*)buffer, 1, subbuffer, &mpi_cast(s.handle));
+  else
+    MPI_File_write_all(diy::mpi::mpi_cast(fh), (void*)buffer, 1, subbuffer, &mpi_cast(s.handle));
+
+  if (chunk != 1)
+    MPI_Type_free(&T_type);
+  MPI_Type_free(&fileblk);
+  MPI_Type_free(&subbuffer);
+#else
+  (void) bounds; (void) core, (void) ndims; (void) dims, (void) buffer; (void) offset, (void) dt, (void) collective; (void) chunk;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::io::file::write_bov);
+#endif
+}
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/io.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/io.hpp
index 6359f30ad..650184c27 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/io.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/io.hpp
@@ -1,139 +1,82 @@
 #ifndef VTKMDIY_MPI_IO_HPP
 #define VTKMDIY_MPI_IO_HPP
 
-#include "../constants.h"
+#include "config.hpp"
+#include "communicator.hpp"
+
+#include <vtkmdiy/types.hpp>
 
 #include <vector>
 #include <string>
+#include <stdexcept>
 
 namespace diy
 {
 namespace mpi
 {
+
 namespace io
 {
-  typedef               MPI_Offset              offset;
+#if !defined(VTKMDIY_MPI_AS_LIB) && VTKMDIY_HAS_MPI
+  using offset = MPI_Offset;
+#else
+  using offset = long long;
+#endif
 
   //! Wraps MPI file IO. \ingroup MPI
   class file
   {
     public:
-      enum
-      {
-        rdonly          = MPI_MODE_RDONLY,
-        rdwr            = MPI_MODE_RDWR,
-        wronly          = MPI_MODE_WRONLY,
-        create          = MPI_MODE_CREATE,
-        exclusive       = MPI_MODE_EXCL,
-        delete_on_close = MPI_MODE_DELETE_ON_CLOSE,
-        unique_open     = MPI_MODE_UNIQUE_OPEN,
-        sequential      = MPI_MODE_SEQUENTIAL,
-        append          = MPI_MODE_APPEND
-      };
+#ifndef VTKMDIY_MPI_AS_LIB
+      static constexpr int rdonly          = MPI_MODE_RDONLY;
+      static constexpr int rdwr            = MPI_MODE_RDWR;
+      static constexpr int wronly          = MPI_MODE_WRONLY;
+      static constexpr int create          = MPI_MODE_CREATE;
+      static constexpr int exclusive       = MPI_MODE_EXCL;
+      static constexpr int delete_on_close = MPI_MODE_DELETE_ON_CLOSE;
+      static constexpr int unique_open     = MPI_MODE_UNIQUE_OPEN;
+      static constexpr int sequential      = MPI_MODE_SEQUENTIAL;
+      static constexpr int append          = MPI_MODE_APPEND;
+#else
+      static const int rdonly, rdwr, wronly, create, exclusive, delete_on_close, unique_open, sequential, append;
+#endif
 
     public:
-      inline        file(const communicator& comm, const std::string& filename, int mode);
-                    ~file()                                 { close(); }
-      inline void   close();
+      VTKMDIY_MPI_EXPORT_FUNCTION        file(const communicator& comm, const std::string& filename, int mode);
+                                     ~file()                            { close(); }
+      VTKMDIY_MPI_EXPORT_FUNCTION void   close();
 
-      inline offset size() const;
-      inline void   resize(offset size);
+      VTKMDIY_MPI_EXPORT_FUNCTION offset size() const;
+      VTKMDIY_MPI_EXPORT_FUNCTION void   resize(offset size);
 
-      inline void   read_at(offset o, char* buffer, size_t size);
-      inline void   read_at_all(offset o, char* buffer, size_t size);
-      inline void   write_at(offset o, const char* buffer, size_t size);
-      inline void   write_at_all(offset o, const char* buffer, size_t size);
+      VTKMDIY_MPI_EXPORT_FUNCTION void   read_at(offset o, char* buffer, size_t size);
+      VTKMDIY_MPI_EXPORT_FUNCTION void   read_at_all(offset o, char* buffer, size_t size);
+      VTKMDIY_MPI_EXPORT_FUNCTION void   write_at(offset o, const char* buffer, size_t size);
+      VTKMDIY_MPI_EXPORT_FUNCTION void   write_at_all(offset o, const char* buffer, size_t size);
 
       template<class T>
-      inline void   read_at(offset o, std::vector<T>& data);
+      inline void           read_at(offset o, std::vector<T>& data);
 
       template<class T>
-      inline void   read_at_all(offset o, std::vector<T>& data);
+      inline void           read_at_all(offset o, std::vector<T>& data);
 
       template<class T>
-      inline void   write_at(offset o, const std::vector<T>& data);
+      inline void           write_at(offset o, const std::vector<T>& data);
 
       template<class T>
-      inline void   write_at_all(offset o, const std::vector<T>& data);
+      inline void           write_at_all(offset o, const std::vector<T>& data);
 
-      const communicator&
-                    comm() const                            { return comm_; }
+      VTKMDIY_MPI_EXPORT_FUNCTION void   read_bov(const DiscreteBounds& bounds, int ndims, const int dims[], char* buffer, size_t offset, const datatype& dt, bool collective, int chunk);
+      VTKMDIY_MPI_EXPORT_FUNCTION void   write_bov(const DiscreteBounds& bounds, const DiscreteBounds& core, int ndims, const int dims[], const char* buffer, size_t offset, const datatype& dt, bool collective, int chunk);
 
-      MPI_File&     handle()                                { return fh; }
+      const communicator&   comm() const   { return comm_; }
 
     private:
-      const communicator&   comm_;
-      MPI_File              fh;
+      communicator   comm_;
+    protected: // mark protected to avoid the "unused private field" warning
+      DIY_MPI_File   fh;
   };
 }
-}
-}
-
-diy::mpi::io::file::
-file(const communicator& comm__, const std::string& filename, int mode)
-: comm_(comm__)
-{
-#ifndef VTKM_DIY_NO_MPI
-  int ret = MPI_File_open(comm__, const_cast<char*>(filename.c_str()), mode, MPI_INFO_NULL, &fh);
-  if (ret)
-      throw std::runtime_error("DIY cannot open file: " + filename);
-#else
-  DIY_UNUSED(comm__);
-  DIY_UNUSED(filename);
-  DIY_UNUSED(mode);
-  DIY_UNSUPPORTED_MPI_CALL(MPI_File_open);
-#endif
-}
-
-void
-diy::mpi::io::file::
-close()
-{
-#ifndef VTKM_DIY_NO_MPI
-  if (fh != MPI_FILE_NULL)
-    MPI_File_close(&fh);
-#endif
-}
-
-diy::mpi::io::offset
-diy::mpi::io::file::
-size() const
-{
-#ifndef VTKM_DIY_NO_MPI
-  offset sz;
-  MPI_File_get_size(fh, &sz);
-  return sz;
-#else
-  DIY_UNSUPPORTED_MPI_CALL(MPI_File_get_size);
-#endif
-}
-
-void
-diy::mpi::io::file::
-resize(diy::mpi::io::offset size_)
-{
-#ifndef VTKM_DIY_NO_MPI
-  MPI_File_set_size(fh, size_);
-#else
-  DIY_UNUSED(size_);
-  DIY_UNSUPPORTED_MPI_CALL(MPI_File_set_size);
-#endif
-}
-
-void
-diy::mpi::io::file::
-read_at(offset o, char* buffer, size_t size_)
-{
-#ifndef VTKM_DIY_NO_MPI
-  status s;
-  MPI_File_read_at(fh, o, buffer, static_cast<int>(size_), detail::get_mpi_datatype<char>(), &s.s);
-#else
-  DIY_UNUSED(o);
-  DIY_UNUSED(buffer);
-  DIY_UNUSED(size_);
-  DIY_UNSUPPORTED_MPI_CALL(MPI_File_read_at);
-#endif
-}
 
 template<class T>
 void
@@ -143,21 +86,6 @@ read_at(offset o, std::vector<T>& data)
   read_at(o, &data[0], data.size()*sizeof(T));
 }
 
-void
-diy::mpi::io::file::
-read_at_all(offset o, char* buffer, size_t size_)
-{
-#ifndef VTKM_DIY_NO_MPI
-  status s;
-  MPI_File_read_at_all(fh, o, buffer, static_cast<int>(size_), detail::get_mpi_datatype<char>(), &s.s);
-#else
-  DIY_UNUSED(o);
-  DIY_UNUSED(buffer);
-  DIY_UNUSED(size_);
-  DIY_UNSUPPORTED_MPI_CALL(MPI_File_read_at_all);
-#endif
-}
-
 template<class T>
 void
 diy::mpi::io::file::
@@ -166,21 +94,6 @@ read_at_all(offset o, std::vector<T>& data)
   read_at_all(o, (char*) &data[0], data.size()*sizeof(T));
 }
 
-void
-diy::mpi::io::file::
-write_at(offset o, const char* buffer, size_t size_)
-{
-#ifndef VTKM_DIY_NO_MPI
-  status s;
-  MPI_File_write_at(fh, o, (void *)buffer, static_cast<int>(size_), detail::get_mpi_datatype<char>(), &s.s);
-#else
-  DIY_UNUSED(o);
-  DIY_UNUSED(buffer);
-  DIY_UNUSED(size_);
-  DIY_UNSUPPORTED_MPI_CALL(MPI_File_write_at);
-#endif
-}
-
 template<class T>
 void
 diy::mpi::io::file::
@@ -189,21 +102,6 @@ write_at(offset o, const std::vector<T>& data)
   write_at(o, (const char*) &data[0], data.size()*sizeof(T));
 }
 
-void
-diy::mpi::io::file::
-write_at_all(offset o, const char* buffer, size_t size_)
-{
-#ifndef VTKM_DIY_NO_MPI
-  status s;
-  MPI_File_write_at_all(fh, o, (void *)buffer, static_cast<int>(size_), detail::get_mpi_datatype<char>(), &s.s);
-#else
-  DIY_UNUSED(o);
-  DIY_UNUSED(buffer);
-  DIY_UNUSED(size_);
-  DIY_UNSUPPORTED_MPI_CALL(MPI_File_write_at_all);
-#endif
-}
-
 template<class T>
 void
 diy::mpi::io::file::
@@ -212,4 +110,11 @@ write_at_all(offset o, const std::vector<T>& data)
   write_at_all(o, &data[0], data.size()*sizeof(T));
 }
 
+}
+} // diy::mpi::io
+
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "io.cpp"
 #endif
+
+#endif // VTKMDIY_MPI_IO_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/mpi_cast.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/mpi_cast.hpp
new file mode 100644
index 000000000..960742ae1
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/mpi_cast.hpp
@@ -0,0 +1,34 @@
+#ifndef VTKMDIY_MPI_MPICAST_HPP
+#define VTKMDIY_MPI_MPICAST_HPP
+
+/// This header provides convinience functions to cast from diy's type erased MPI objects
+/// to thier correct types.
+
+#ifndef VTKMDIY_HAS_MPI
+#  include <mpi.h>
+#endif
+
+namespace diy
+{
+namespace mpi
+{
+
+#define DEFINE_MPI_CAST(mpitype)                                                                              \
+inline mpitype& mpi_cast(DIY_##mpitype& obj) { return *reinterpret_cast<mpitype*>(&obj); }                    \
+inline const mpitype& mpi_cast(const DIY_##mpitype& obj) { return *reinterpret_cast<const mpitype*>(&obj); }  \
+inline DIY_##mpitype make_DIY_##mpitype(const mpitype& obj) { DIY_##mpitype ret; mpi_cast(ret) = obj; return ret; }
+
+DEFINE_MPI_CAST(MPI_Comm)
+DEFINE_MPI_CAST(MPI_Datatype)
+DEFINE_MPI_CAST(MPI_Status)
+DEFINE_MPI_CAST(MPI_Request)
+DEFINE_MPI_CAST(MPI_Op)
+DEFINE_MPI_CAST(MPI_File)
+DEFINE_MPI_CAST(MPI_Win)
+
+#undef DEFINE_MPI_CAST
+
+}
+} // diy::mpi
+
+#endif // VTKMDIY_MPI_MPICAST_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/mpitypes.hpp.in b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/mpitypes.hpp.in
new file mode 100644
index 000000000..bea375fd8
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/mpitypes.hpp.in
@@ -0,0 +1,51 @@
+#ifndef VTKMDIY_MPI_MPITYPES_H
+#define VTKMDIY_MPI_MPITYPES_H
+
+#cmakedefine TYPESIZE_MPI_Comm     @TYPESIZE_MPI_Comm@
+#cmakedefine TYPESIZE_MPI_Datatype @TYPESIZE_MPI_Datatype@
+#cmakedefine TYPESIZE_MPI_Status   @TYPESIZE_MPI_Status@
+#cmakedefine TYPESIZE_MPI_Request  @TYPESIZE_MPI_Request@
+#cmakedefine TYPESIZE_MPI_Op       @TYPESIZE_MPI_Op@
+#cmakedefine TYPESIZE_MPI_File     @TYPESIZE_MPI_File@
+#cmakedefine TYPESIZE_MPI_Win      @TYPESIZE_MPI_Win@
+
+namespace diy
+{
+namespace mpi
+{
+
+#if defined(VTKMDIY_HAS_MPI)
+#  define ASSERT_MPI_TYPE_SIZE(mpitype) static_assert(sizeof(mpitype) <= sizeof(DIY_##mpitype), "");
+#else
+# define ASSERT_MPI_TYPE_SIZE(mpitype)
+#endif
+
+#define DEFINE_DIY_MPI_TYPE(mpitype)                                         \
+struct DIY_##mpitype {                                                       \
+  void* data[((TYPESIZE_##mpitype) + sizeof(void*) - 1)/sizeof(void*)];      \
+};                                                                           \
+ASSERT_MPI_TYPE_SIZE(mpitype)
+
+DEFINE_DIY_MPI_TYPE(MPI_Comm)
+DEFINE_DIY_MPI_TYPE(MPI_Datatype)
+DEFINE_DIY_MPI_TYPE(MPI_Status)
+DEFINE_DIY_MPI_TYPE(MPI_Request)
+DEFINE_DIY_MPI_TYPE(MPI_Op)
+DEFINE_DIY_MPI_TYPE(MPI_File)
+DEFINE_DIY_MPI_TYPE(MPI_Win)
+
+#undef DEFINE_DIY_MPI_TYPE
+#undef ASSERT_MPI_TYPE_SIZE
+
+}
+} // diy::mpi
+
+#undef TYPESIZE_MPI_Comm
+#undef TYPESIZE_MPI_Datatype
+#undef TYPESIZE_MPI_Status
+#undef TYPESIZE_MPI_Request
+#undef TYPESIZE_MPI_Op
+#undef TYPESIZE_MPI_File
+#undef TYPESIZE_MPI_Win
+
+#endif // VTKMDIY_MPI_MPITYPES_H
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/no-mpi.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/no-mpi.hpp
index 316168961..8e7af241b 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/no-mpi.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/no-mpi.hpp
@@ -22,18 +22,17 @@ static const int MPI_THREAD_MULTIPLE    = 3;
 /* define datatypes */
 using MPI_Datatype = size_t;
 
-#define VTKM_DIY_NO_MPI_DATATYPE(cpp_type, mpi_type) \
+#define VTKMDIY_NO_MPI_DATATYPE(cpp_type, mpi_type) \
   static const MPI_Datatype mpi_type = sizeof(cpp_type);
-VTKM_DIY_NO_MPI_DATATYPE(char,                  MPI_BYTE);
-VTKM_DIY_NO_MPI_DATATYPE(int,                   MPI_INT);
-VTKM_DIY_NO_MPI_DATATYPE(unsigned,              MPI_UNSIGNED);
-VTKM_DIY_NO_MPI_DATATYPE(long,                  MPI_LONG);
-VTKM_DIY_NO_MPI_DATATYPE(unsigned long,         MPI_UNSIGNED_LONG);
-VTKM_DIY_NO_MPI_DATATYPE(long long,             MPI_LONG_LONG_INT);
-VTKM_DIY_NO_MPI_DATATYPE(unsigned long long,    MPI_UNSIGNED_LONG_LONG);
-VTKM_DIY_NO_MPI_DATATYPE(float,                 MPI_FLOAT);
-VTKM_DIY_NO_MPI_DATATYPE(double,                MPI_DOUBLE);
-#endif
+VTKMDIY_NO_MPI_DATATYPE(char,                  MPI_BYTE);
+VTKMDIY_NO_MPI_DATATYPE(int,                   MPI_INT);
+VTKMDIY_NO_MPI_DATATYPE(unsigned,              MPI_UNSIGNED);
+VTKMDIY_NO_MPI_DATATYPE(long,                  MPI_LONG);
+VTKMDIY_NO_MPI_DATATYPE(unsigned long,         MPI_UNSIGNED_LONG);
+VTKMDIY_NO_MPI_DATATYPE(long long,             MPI_LONG_LONG_INT);
+VTKMDIY_NO_MPI_DATATYPE(unsigned long long,    MPI_UNSIGNED_LONG_LONG);
+VTKMDIY_NO_MPI_DATATYPE(float,                 MPI_FLOAT);
+VTKMDIY_NO_MPI_DATATYPE(double,                MPI_DOUBLE);
 
 /* status type */
 struct MPI_Status
@@ -48,10 +47,8 @@ struct MPI_Status
 /* define MPI_Request */
 using MPI_Request = int;
 
-#ifndef DIY_UNSUPPORTED_MPI_CALL
-#define DIY_UNSUPPORTED_MPI_CALL(name) \
-  throw std::runtime_error("`" #name "` not supported when VTKM_DIY_NO_MPI is defined.");
-#endif
+#define VTKMDIY_UNSUPPORTED_MPI_CALL(name) \
+  throw std::runtime_error("`" #name "` not supported when VTKMDIY_HAS_MPI is false.");
 
 /* define operations */
 using MPI_Op = int;
@@ -63,7 +60,7 @@ static const MPI_Op MPI_LAND = 0;
 static const MPI_Op MPI_LOR = 0;
 
 /* mpi i/o stuff */
-using MPI_Offset = size_t;
+using MPI_Offset = long long;
 using MPI_File = int;
 static const MPI_File MPI_FILE_NULL = 0;
 
@@ -78,7 +75,7 @@ static const int MPI_MODE_APPEND          = 128;
 static const int MPI_MODE_SEQUENTIAL      = 256;
 
 /* define window type */
-using MPI_Win = int;
+using MPI_Win = void*;
 
 /* window fence assertions */
 static const int MPI_MODE_NOSTORE       = 1;
@@ -90,3 +87,5 @@ static const int MPI_MODE_NOCHECK       = 16;
 /* window lock types */
 static const int MPI_LOCK_SHARED        = 1;
 static const int MPI_LOCK_EXCLUSIVE     = 2;
+
+#endif // VTKMDIY_MPI_NO_MPI_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/operations.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/operations.cpp
new file mode 100644
index 000000000..fa282d0b6
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/operations.cpp
@@ -0,0 +1,33 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "operations.hpp"
+#endif
+
+#include <functional>
+
+namespace diy
+{
+namespace mpi
+{
+
+namespace detail
+{
+
+operation get_builtin_operation(BuiltinOperation id)
+{
+  operation op{};
+  switch(id)
+  {
+    case OP_MAXIMUM:     op.handle = make_DIY_MPI_Op(MPI_MAX);  break;
+    case OP_MINIMUM:     op.handle = make_DIY_MPI_Op(MPI_MIN);  break;
+    case OP_PLUS:        op.handle = make_DIY_MPI_Op(MPI_SUM);  break;
+    case OP_MULTIPLIES:  op.handle = make_DIY_MPI_Op(MPI_PROD); break;
+    case OP_LOGICAL_AND: op.handle = make_DIY_MPI_Op(MPI_LAND); break;
+    case OP_LOGICAL_OR:  op.handle = make_DIY_MPI_Op(MPI_LOR);  break;
+    default: break;
+  }
+  return op;
+}
+
+}
+}
+} // diy::mpi::detail
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/operations.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/operations.hpp
index 822113703..dec31bad2 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/operations.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/operations.hpp
@@ -1,3 +1,8 @@
+#ifndef VTKMDIY_MPI_OPERATIONS_HPP
+#define VTKMDIY_MPI_OPERATIONS_HPP
+
+#include "config.hpp"
+
 #include <algorithm> // for std::min/max
 #include <functional>
 
@@ -7,6 +12,19 @@ namespace mpi
 {
   //! \addtogroup MPI
   //!@{
+  struct operation
+  {
+    operation() = default;
+    operation(const DIY_MPI_Op& op) : handle(op) {}
+
+#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
+    operation(const MPI_Op& op) : handle(op) {}
+    operator MPI_Op() { return handle; }
+#endif
+
+    DIY_MPI_Op handle;
+  };
+
   template<class U>
   struct maximum { const U& operator()(const U& x, const U& y) const { return std::max(x,y); } };
   template<class U>
@@ -15,13 +33,32 @@ namespace mpi
 
 namespace detail
 {
-  template<class T> struct mpi_op                           { static MPI_Op  get(); };
-  template<class U> struct mpi_op< maximum<U> >             { static MPI_Op  get() { return MPI_MAX; }  };
-  template<class U> struct mpi_op< minimum<U> >             { static MPI_Op  get() { return MPI_MIN; }  };
-  template<class U> struct mpi_op< std::plus<U> >           { static MPI_Op  get() { return MPI_SUM; }  };
-  template<class U> struct mpi_op< std::multiplies<U> >     { static MPI_Op  get() { return MPI_PROD; }  };
-  template<class U> struct mpi_op< std::logical_and<U> >    { static MPI_Op  get() { return MPI_LAND; }  };
-  template<class U> struct mpi_op< std::logical_or<U> >     { static MPI_Op  get() { return MPI_LOR; }  };
-}
+  enum BuiltinOperation {
+    OP_MAXIMUM = 0,
+    OP_MINIMUM,
+    OP_PLUS,
+    OP_MULTIPLIES,
+    OP_LOGICAL_AND,
+    OP_LOGICAL_OR
+  };
+
+  VTKMDIY_MPI_EXPORT_FUNCTION operation get_builtin_operation(BuiltinOperation id);
+
+  template<class T> struct mpi_op;
+
+  template<class U> struct mpi_op< maximum<U> >          { static operation get() { return get_builtin_operation(OP_MAXIMUM); } };
+  template<class U> struct mpi_op< minimum<U> >          { static operation get() { return get_builtin_operation(OP_MINIMUM); } };
+  template<class U> struct mpi_op< std::plus<U> >        { static operation get() { return get_builtin_operation(OP_PLUS); } };
+  template<class U> struct mpi_op< std::multiplies<U> >  { static operation get() { return get_builtin_operation(OP_MULTIPLIES); } };
+  template<class U> struct mpi_op< std::logical_and<U> > { static operation get() { return get_builtin_operation(OP_LOGICAL_AND); } };
+  template<class U> struct mpi_op< std::logical_or<U> >  { static operation get() { return get_builtin_operation(OP_LOGICAL_OR); } };
 }
+
 }
+} // diy::mpi
+
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "operations.cpp"
+#endif
+
+#endif // VTKMDIY_MPI_OPERATIONS_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/optional.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/optional.hpp
index ab58aaf81..aee4d269a 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/optional.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/optional.hpp
@@ -1,3 +1,6 @@
+#ifndef VTKMDIY_MPI_OPTIONAL_HPP
+#define VTKMDIY_MPI_OPTIONAL_HPP
+
 namespace diy
 {
 namespace mpi
@@ -53,3 +56,5 @@ operator=(const optional& o)
 
   return *this;
 }
+
+#endif // VTKMDIY_MPI_OPTIONAL_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/point-to-point.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/point-to-point.cpp
new file mode 100644
index 000000000..6ca86a249
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/point-to-point.cpp
@@ -0,0 +1,96 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "point-to-point.hpp"
+#endif
+
+namespace diy
+{
+namespace mpi
+{
+
+#ifdef VTKMDIY_MPI_AS_LIB
+#  ifdef _MSC_VER
+#    define EXPORT_MACRO VTKMDIY_MPI_EXPORT
+#  else
+#    define EXPORT_MACRO
+#  endif
+EXPORT_MACRO const int any_source  = MPI_ANY_SOURCE;
+EXPORT_MACRO const int any_tag     = MPI_ANY_TAG;
+#  undef EXPORT_MACRO
+#endif
+
+namespace detail
+{
+
+void send(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Send(data, count, mpi_cast(type.handle), dest, tag, mpi_cast(comm));
+#else
+  (void) comm; (void) dest; (void) tag; (void) data; (void) count; (void) type;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Send);
+#endif
+}
+
+status probe(DIY_MPI_Comm comm, int source, int tag)
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  MPI_Probe(source, tag, mpi_cast(comm), &mpi_cast(s.handle));
+  return s;
+#else
+  (void) comm; (void) source; (void) tag;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Probe);
+#endif
+}
+
+status recv(DIY_MPI_Comm comm, int source, int tag, void* data, int count, const datatype& type)
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  MPI_Recv(data, count, mpi_cast(type.handle), source, tag, mpi_cast(comm), &mpi_cast(s.handle));
+  return s;
+#else
+  (void) comm; (void) source; (void) tag; (void) data; (void) count; (void) type;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Recv);
+#endif
+}
+
+request isend(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type)
+{
+#if VTKMDIY_HAS_MPI
+  request r;
+  MPI_Isend(data, count, mpi_cast(type.handle), dest, tag, mpi_cast(comm), &mpi_cast(r.handle));
+  return r;
+#else
+  (void) comm; (void) dest; (void) tag; (void) data; (void) count; (void) type;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Isend);
+#endif
+}
+
+request issend(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type)
+{
+#if VTKMDIY_HAS_MPI
+  request r;
+  MPI_Issend(data, count, mpi_cast(type.handle), dest, tag, mpi_cast(comm), &mpi_cast(r.handle));
+  return r;
+#else
+  (void) comm; (void) dest; (void) tag; (void) data; (void) count; (void) type;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Issend);
+#endif
+}
+
+request irecv(DIY_MPI_Comm comm, int source, int tag, void* data, int count, const datatype& type)
+{
+#if VTKMDIY_HAS_MPI
+  request r;
+  MPI_Irecv(data, count, mpi_cast(type.handle), source, tag, mpi_cast(comm), &mpi_cast(r.handle));
+  return r;
+#else
+  (void) comm; (void) source; (void) tag; (void) data; (void) count; (void) type;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Irecv);
+#endif
+}
+
+}
+}
+} // diy::mpi::detail
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/point-to-point.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/point-to-point.hpp
index cd2063b4a..8ada5b634 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/point-to-point.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/point-to-point.hpp
@@ -1,147 +1,84 @@
+#ifndef VTKMDIY_MPI_POINT_TO_POINT_HPP
+#define VTKMDIY_MPI_POINT_TO_POINT_HPP
+
+#include "config.hpp"
+#include "datatypes.hpp"
+#include "request.hpp"
+#include "status.hpp"
+
 #include <vector>
 
 namespace diy
 {
 namespace mpi
 {
+
+#ifndef VTKMDIY_MPI_AS_LIB
+constexpr int any_source  = MPI_ANY_SOURCE;
+constexpr int any_tag     = MPI_ANY_TAG;
+#else
+VTKMDIY_MPI_EXPORT extern const int any_source;
+VTKMDIY_MPI_EXPORT extern const int any_tag;
+#endif
+
 namespace detail
 {
-  // send
-  template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
-  struct send;
+  VTKMDIY_MPI_EXPORT_FUNCTION void send(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type);
+  VTKMDIY_MPI_EXPORT_FUNCTION request isend(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type);
+  VTKMDIY_MPI_EXPORT_FUNCTION request issend(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type);
+  VTKMDIY_MPI_EXPORT_FUNCTION status probe(DIY_MPI_Comm comm, int source, int tag);
+  VTKMDIY_MPI_EXPORT_FUNCTION status recv(DIY_MPI_Comm comm, int source, int tag, void* data, int count, const datatype& type);
+  VTKMDIY_MPI_EXPORT_FUNCTION request irecv(DIY_MPI_Comm comm, int source, int tag, void* data, int count, const datatype& type);
 
-  template<class T>
-  struct send<T, true_type>
+  template <class T>
+  inline void send(DIY_MPI_Comm comm, int dest, int tag, const T& x)
   {
-    void operator()(MPI_Comm comm, int dest, int tag, const T& x) const
-    {
-#ifndef VTKM_DIY_NO_MPI
-      typedef       mpi_datatype<T>     Datatype;
-      MPI_Send((void*) Datatype::address(x),
-               Datatype::count(x),
-               Datatype::datatype(),
-               dest, tag, comm);
-#else
-      (void) comm; (void) dest; (void) tag; (void) x;
-      DIY_UNSUPPORTED_MPI_CALL(MPI_Send);
-#endif
-    }
-  };
+    static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
+    send(comm, dest, tag, address(x), count(x), datatype_of(x));
+  }
 
-  // recv
-  template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
-  struct recv;
-
-  template<class T>
-  struct recv<T, true_type>
+  template <class T>
+  status recv(DIY_MPI_Comm comm, int source, int tag, T& x)
   {
-    status operator()(MPI_Comm comm, int source, int tag, T& x) const
-    {
-#ifndef VTKM_DIY_NO_MPI
-      typedef       mpi_datatype<T>     Datatype;
-      status s;
-      MPI_Recv((void*) Datatype::address(x),
-                Datatype::count(x),
-                Datatype::datatype(),
-                source, tag, comm, &s.s);
-      return s;
-#else
-      (void) comm; (void) source; (void) tag; (void) x;
-      DIY_UNSUPPORTED_MPI_CALL(MPI_Recv);
-#endif
-    }
-  };
+    static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
+    return recv(comm, source, tag, address(x), count(x), datatype_of(x));
+  }
 
-  template<class U>
-  struct recv<std::vector<U>, true_type>
+  template <class T>
+  status recv(DIY_MPI_Comm comm, int source, int tag, std::vector<T>& x)
   {
-    status operator()(MPI_Comm comm, int source, int tag, std::vector<U>& x) const
-    {
-#ifndef VTKM_DIY_NO_MPI
-      status s;
+    auto s = probe(comm, source, tag);
+    x.resize(static_cast<size_t>(s.count<T>()));
+    return recv(comm, source, tag, address(x), count(x), datatype_of(x));
+  }
 
-      MPI_Probe(source, tag, comm, &s.s);
-      x.resize(s.count<U>());
-      MPI_Recv(&x[0], static_cast<int>(x.size()), get_mpi_datatype<U>(), source, tag, comm, &s.s);
-      return s;
-#else
-      (void) comm; (void) source; (void) tag; (void) x;
-      DIY_UNSUPPORTED_MPI_CALL(MPI_Recv);
-#endif
-    }
-  };
-
-  // isend
-  template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
-  struct isend;
-
-  template<class T>
-  struct isend<T, true_type>
+  template <class T>
+  request isend(DIY_MPI_Comm comm, int dest, int tag, const T& x)
   {
-    request operator()(MPI_Comm comm, int dest, int tag, const T& x) const
-    {
-#ifndef VTKM_DIY_NO_MPI
-      request r;
-      typedef       mpi_datatype<T>     Datatype;
-      MPI_Isend((void*) Datatype::address(x),
-                Datatype::count(x),
-                Datatype::datatype(),
-                dest, tag, comm, &r.r);
-      return r;
-#else
-      (void) comm; (void) dest; (void) tag; (void) x;
-      DIY_UNSUPPORTED_MPI_CALL(MPI_Isend);
-#endif
-    }
-  };
+    static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
+    return isend(comm, dest, tag, address(x), count(x), datatype_of(x));
+  }
 
-  // issend
-  template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
-  struct issend;
-
-  template<class T>
-  struct issend<T, true_type>
+  template <class T>
+  request issend(DIY_MPI_Comm comm, int dest, int tag, const T& x)
   {
-    request operator()(MPI_Comm comm, int dest, int tag, const T& x) const
-    {
-#ifndef VTKM_DIY_NO_MPI
-      request r;
-      typedef       mpi_datatype<T>     Datatype;
-      MPI_Issend((void*) Datatype::address(x),
-                Datatype::count(x),
-                Datatype::datatype(),
-                dest, tag, comm, &r.r);
-      return r;
-#else
-      (void) comm; (void) dest; (void) tag; (void) x;
-      DIY_UNSUPPORTED_MPI_CALL(MPI_Issend);
-#endif
-    }
-  };
+    static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
+    return issend(comm, dest, tag, address(x), count(x), datatype_of(x));
+  }
 
-  // irecv
-  template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
-  struct irecv;
-
-  template<class T>
-  struct irecv<T, true_type>
+  template <class T>
+  request irecv(DIY_MPI_Comm comm, int source, int tag, T& x)
   {
-    request operator()(MPI_Comm comm, int source, int tag, T& x) const
-    {
-#ifndef VTKM_DIY_NO_MPI
-      request r;
-      typedef       mpi_datatype<T>     Datatype;
-      MPI_Irecv(Datatype::address(x),
-                Datatype::count(x),
-                Datatype::datatype(),
-                source, tag, comm, &r.r);
-      return r;
-#else
-      (void) comm; (void) source; (void) tag; (void) x;
-      DIY_UNSUPPORTED_MPI_CALL(MPI_Irecv);
-#endif
-    }
-  };
-}
+    static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
+    return irecv(comm, source, tag, address(x), count(x), datatype_of(x));
+  }
+
 }
 }
+} // diy::mpi::detail
+
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "point-to-point.cpp"
+#endif
+
+#endif // VTKMDIY_MPI_POINT_TO_POINT_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/request.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/request.cpp
new file mode 100644
index 000000000..3dc4b5719
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/request.cpp
@@ -0,0 +1,45 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "request.hpp"
+#endif
+
+#include <algorithm>
+#include <iterator>
+
+#if defined(VTKMDIY_MPI_AS_LIB) && !VTKMDIY_HAS_MPI
+diy::mpi::request::request()
+{
+  std::fill(std::begin(this->handle.data), std::end(this->handle.data), nullptr);
+}
+#else
+diy::mpi::request::request() = default;
+#endif
+
+diy::mpi::status diy::mpi::request::wait()
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  MPI_Wait(&mpi_cast(handle), &mpi_cast(s.handle));
+  return s;
+#else
+  VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::request::wait);
+#endif
+}
+
+diy::mpi::optional<diy::mpi::status> diy::mpi::request::test()
+{
+#if VTKMDIY_HAS_MPI
+  status s;
+  int flag;
+  MPI_Test(&mpi_cast(handle), &flag, &mpi_cast(s.handle));
+  if (flag)
+    return s;
+#endif
+  return optional<status>();
+}
+
+void diy::mpi::request::cancel()
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Cancel(&mpi_cast(handle));
+#endif
+}
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/request.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/request.hpp
index 1a9639bbd..defa26a29 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/request.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/request.hpp
@@ -1,50 +1,29 @@
+#ifndef VTKMDIY_MPI_REQUEST_HPP
+#define VTKMDIY_MPI_REQUEST_HPP
+
+#include "config.hpp"
+#include "status.hpp"
+#include "optional.hpp"
+
 namespace diy
 {
 namespace mpi
 {
   struct request
   {
-    inline
-    status              wait();
-    inline
-    optional<status>    test();
-    inline
-    void                cancel();
+    VTKMDIY_MPI_EXPORT_FUNCTION                  request();
+    VTKMDIY_MPI_EXPORT_FUNCTION status           wait();
+    VTKMDIY_MPI_EXPORT_FUNCTION optional<status> test();
+    VTKMDIY_MPI_EXPORT_FUNCTION void             cancel();
 
-    MPI_Request         r;
+    DIY_MPI_Request handle;
   };
-}
-}
 
-diy::mpi::status
-diy::mpi::request::wait()
-{
-#ifndef VTKM_DIY_NO_MPI
-  status s;
-  MPI_Wait(&r, &s.s);
-  return s;
-#else
-  DIY_UNSUPPORTED_MPI_CALL(diy::mpi::request::wait);
-#endif
 }
+} // diy::mpi
 
-diy::mpi::optional<diy::mpi::status>
-diy::mpi::request::test()
-{
-#ifndef VTKM_DIY_NO_MPI
-  status s;
-  int flag;
-  MPI_Test(&r, &flag, &s.s);
-  if (flag)
-    return s;
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "request.cpp"
 #endif
-  return optional<status>();
-}
 
-void
-diy::mpi::request::cancel()
-{
-#ifndef VTKM_DIY_NO_MPI
-  MPI_Cancel(&r);
-#endif
-}
+#endif // VTKMDIY_MPI_REQUEST_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/status.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/status.cpp
new file mode 100644
index 000000000..a57cc0aa7
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/status.cpp
@@ -0,0 +1,30 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "status.hpp"
+#endif
+
+int diy::mpi::status::source() const { return mpi_cast(handle).MPI_SOURCE; }
+int diy::mpi::status::tag() const    { return mpi_cast(handle).MPI_TAG; }
+int diy::mpi::status::error() const  { return mpi_cast(handle).MPI_ERROR; }
+
+bool diy::mpi::status::cancelled() const
+{
+#if VTKMDIY_HAS_MPI
+  int flag;
+  MPI_Test_cancelled(&mpi_cast(handle), &flag);
+  return flag;
+#else
+  VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::status::cancelled);
+#endif
+}
+
+int diy::mpi::status::count(const diy::mpi::datatype& type) const
+{
+#if VTKMDIY_HAS_MPI
+  int c;
+  MPI_Get_count(&mpi_cast(handle), mpi_cast(type.handle), &c);
+  return c;
+#else
+  (void) type;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::status::count);
+#endif
+}
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/status.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/status.hpp
index 6b390b180..9828c2b36 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/status.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/status.hpp
@@ -1,49 +1,42 @@
+#ifndef VTKMDIY_MPI_STATUS_HPP
+#define VTKMDIY_MPI_STATUS_HPP
+
+#include "config.hpp"
+#include "datatypes.hpp"
+
 namespace diy
 {
 namespace mpi
 {
   struct status
   {
-    int             source() const          { return s.MPI_SOURCE; }
-    int             tag() const             { return s.MPI_TAG; }
-    int             error() const           { return s.MPI_ERROR; }
+    status() = default;
+    status(const DIY_MPI_Status& s) : handle(s) {}
 
-    inline
-    bool            cancelled() const;
+#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
+    status(const MPI_Status& s) : handle(s) {}
+    operator MPI_Status() { return handle; }
+#endif
 
-    template<class T>
-    int             count() const;
+    VTKMDIY_MPI_EXPORT_FUNCTION int  source() const;
+    VTKMDIY_MPI_EXPORT_FUNCTION int  tag() const;
+    VTKMDIY_MPI_EXPORT_FUNCTION int  error() const;
+    VTKMDIY_MPI_EXPORT_FUNCTION bool cancelled() const;
+    VTKMDIY_MPI_EXPORT_FUNCTION int  count(const datatype& type) const;
 
-                    operator MPI_Status&()              { return s; }
-                    operator const MPI_Status&() const  { return s; }
+    template<class T>       int count() const
+    {
+      return this->count(detail::get_mpi_datatype<T>());
+    }
 
-    MPI_Status      s;
+    DIY_MPI_Status handle;
   };
-}
-}
 
+}
+} // diy::mpi
 
-bool
-diy::mpi::status::cancelled() const
-{
-#ifndef VTKM_DIY_NO_MPI
-  int flag;
-  MPI_Test_cancelled(const_cast<MPI_Status*>(&s), &flag);
-  return flag;
-#else
-  DIY_UNSUPPORTED_MPI_CALL(diy::mpi::status::cancelled);
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "status.cpp"
 #endif
-}
 
-template<class T>
-int
-diy::mpi::status::count() const
-{
-#ifndef VTKM_DIY_NO_MPI
-  int c;
-  MPI_Get_count(const_cast<MPI_Status*>(&s), detail::get_mpi_datatype<T>(), &c);
-  return c;
-#else
-  DIY_UNSUPPORTED_MPI_CALL(diy::mpi::status::count);
-#endif
-}
+#endif // VTKMDIY_MPI_STATUS_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/window.cpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/window.cpp
new file mode 100644
index 000000000..12b97bf7f
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/window.cpp
@@ -0,0 +1,208 @@
+#ifdef VTKMDIY_MPI_AS_LIB
+#include "window.hpp"
+#endif
+
+#include <algorithm>
+
+namespace diy
+{
+namespace mpi
+{
+
+#ifdef VTKMDIY_MPI_AS_LIB
+#  ifdef _MSC_VER
+#    define EXPORT_MACRO VTKMDIY_MPI_EXPORT
+#  else
+#    define EXPORT_MACRO
+#  endif
+EXPORT_MACRO const int nocheck  = MPI_MODE_NOCHECK;
+#  undef EXPORT_MACRO
+#endif
+
+namespace detail
+{
+
+DIY_MPI_Win win_create(const communicator& comm, void* base, unsigned size, int disp)
+{
+#if VTKMDIY_HAS_MPI
+  DIY_MPI_Win win;
+  MPI_Win_create(base, size, disp, MPI_INFO_NULL, mpi_cast(comm.handle()), &mpi_cast(win));
+  return win;
+#else
+  (void)comm; (void)size; (void)disp;
+  auto win = make_DIY_MPI_Win(base);
+  return win;
+#endif
+}
+
+void win_free(DIY_MPI_Win& win)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_free(&mpi_cast(win));
+#else
+  (void)win;
+#endif
+}
+
+void put(const DIY_MPI_Win& win, const void* data, int count, const datatype& type, int rank, unsigned offset)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Put(data, count, mpi_cast(type.handle), rank, offset, count, mpi_cast(type.handle), mpi_cast(win));
+#else
+  void* buffer = mpi_cast(win);
+  size_t size = mpi_cast(type.handle);
+  std::copy_n(static_cast<const int8_t*>(data),
+              size * static_cast<size_t>(count),
+              static_cast<int8_t*>(buffer) + (offset * size));
+  (void)rank;
+#endif
+}
+
+void get(const DIY_MPI_Win& win, void* data, int count, const datatype& type, int rank, unsigned offset)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Get(data, count, mpi_cast(type.handle), rank, offset, count, mpi_cast(type.handle), mpi_cast(win));
+#else
+  const void* buffer = mpi_cast(win);
+  size_t size = mpi_cast(type.handle);
+  std::copy_n(static_cast<const int8_t*>(buffer) + (offset * size),
+              size * static_cast<size_t>(count),
+              static_cast<int8_t*>(data));
+  (void)rank;
+#endif
+}
+
+void fence(const DIY_MPI_Win& win, int assert)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_fence(assert, mpi_cast(win));
+#else
+  (void) win; (void) assert;
+#endif
+}
+
+void lock(const DIY_MPI_Win& win, int lock_type, int rank, int assert)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_lock(lock_type, rank, assert, mpi_cast(win));
+#else
+  (void) win; (void) lock_type; (void) rank; (void) assert;
+#endif
+}
+
+void unlock(const DIY_MPI_Win& win, int rank)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_unlock(rank, mpi_cast(win));
+#else
+  (void) win; (void) rank;
+#endif
+}
+
+void lock_all(const DIY_MPI_Win& win, int assert)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_lock_all(assert, mpi_cast(win));
+#else
+  (void) win; (void) assert;
+#endif
+}
+
+void unlock_all(const DIY_MPI_Win& win)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_unlock_all(mpi_cast(win));
+#else
+  (void) win;
+#endif
+}
+
+void fetch_and_op(const DIY_MPI_Win& win,
+                  const void* origin, void* result, const datatype& type,
+                  int rank, unsigned offset,
+                  const operation& op)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Fetch_and_op(origin, result, mpi_cast(type.handle), rank, offset, mpi_cast(op.handle), mpi_cast(win));
+#else
+  (void) win; (void) origin; (void) result; (void) type; (void) rank; (void) offset; (void) op;
+  VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Fetch_and_op);
+#endif
+}
+
+void fetch(const DIY_MPI_Win& win, void* result, const datatype& type, int rank, unsigned offset)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Fetch_and_op(nullptr, result, mpi_cast(type.handle), rank, offset, MPI_NO_OP, mpi_cast(win));
+#else
+  (void) rank;
+  const void* buffer = mpi_cast(win);
+  size_t size = mpi_cast(type.handle);
+  std::copy_n(static_cast<const int8_t*>(buffer) + (offset * size),
+              size,
+              static_cast<int8_t*>(result));
+#endif
+}
+
+void replace(const DIY_MPI_Win& win, const void* value, const datatype& type, int rank, unsigned offset)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Fetch_and_op(value, nullptr, mpi_cast(type.handle), rank, offset, MPI_REPLACE, mpi_cast(win));
+#else
+  (void) rank;
+  void* buffer = mpi_cast(win);
+  size_t size = mpi_cast(type.handle);
+  std::copy_n(static_cast<const int8_t*>(value),
+              size,
+              static_cast<int8_t*>(buffer) + (offset * size));
+#endif
+}
+
+void sync(const DIY_MPI_Win& win)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_sync(mpi_cast(win));
+#else
+  (void) win;
+#endif
+}
+
+void flush(const DIY_MPI_Win& win, int rank)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_flush(rank, mpi_cast(win));
+#else
+  (void) win; (void) rank;
+#endif
+}
+
+void flush_all(const DIY_MPI_Win& win)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_flush_all(mpi_cast(win));
+#else
+  (void) win;
+#endif
+}
+
+void flush_local(const DIY_MPI_Win& win, int rank)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_flush_local(rank, mpi_cast(win));
+#else
+  (void) win; (void) rank;
+#endif
+}
+
+void flush_local_all(const DIY_MPI_Win& win)
+{
+#if VTKMDIY_HAS_MPI
+  MPI_Win_flush_local_all(mpi_cast(win));
+#else
+  (void) win;
+#endif
+}
+
+}
+}
+} // diy::mpi::detail
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/window.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/window.hpp
index 69056f5b0..730d7c439 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/window.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/mpi/window.hpp
@@ -1,10 +1,89 @@
+#ifndef VTKMDIY_MPI_WINODW_HPP
+#define VTKMDIY_MPI_WINODW_HPP
+
+#include "config.hpp"
+#include "communicator.hpp"
+#include "operations.hpp"
+
 #include <type_traits>
+#include <vector>
 
 namespace diy
 {
 namespace mpi
 {
 
+#ifndef VTKMDIY_MPI_AS_LIB
+constexpr int nocheck  = MPI_MODE_NOCHECK;
+#else
+VTKMDIY_MPI_EXPORT extern const int nocheck;
+#endif
+
+namespace detail
+{
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+DIY_MPI_Win win_create(const communicator& comm, void* base, unsigned size, int disp);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void win_free(DIY_MPI_Win& win);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void put(const DIY_MPI_Win& win,
+         const void* data, int count, const datatype& type,
+         int rank, unsigned offset);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void get(const DIY_MPI_Win& win,
+         void* data, int count, const datatype& type,
+         int rank, unsigned offset);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void fence(const DIY_MPI_Win& win, int assert);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void lock(const DIY_MPI_Win& win, int lock_type, int rank, int assert);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void unlock(const DIY_MPI_Win& win, int rank);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void lock_all(const DIY_MPI_Win& win, int assert);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void unlock_all(const DIY_MPI_Win& win);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void fetch_and_op(const DIY_MPI_Win& win,
+                  const void* origin, void* result, const datatype& type,
+                  int rank, unsigned offset,
+                  const operation& op);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void fetch(const DIY_MPI_Win& win, void* result, const datatype& type, int rank, unsigned offset);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void replace(const DIY_MPI_Win& win,
+             const void* value, const datatype& type,
+             int rank, unsigned offset);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void sync(const DIY_MPI_Win& win);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void flush(const DIY_MPI_Win& win, int rank);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void flush_all(const DIY_MPI_Win& win);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void flush_local(const DIY_MPI_Win& win, int rank);
+
+VTKMDIY_MPI_EXPORT_FUNCTION
+void flush_local_all(const DIY_MPI_Win& win);
+
+} // detail
+
     //! \ingroup MPI
     //! Simple wrapper around MPI window functions.
     template<class T>
@@ -38,7 +117,7 @@ namespace mpi
             inline void lock_all(int assert = 0);
             inline void unlock_all();
 
-            inline void fetch_and_op(const T* origin, T* result, int rank, unsigned offset, MPI_Op op);
+            inline void fetch_and_op(const T* origin, T* result, int rank, unsigned offset, const operation& op);
             inline void fetch(T& result, int rank, unsigned offset);
             inline void replace(const T& value, int rank, unsigned offset);
 
@@ -52,30 +131,25 @@ namespace mpi
         private:
             std::vector<T>      buffer_;
             int                 rank_;
-#ifndef VTKM_DIY_NO_MPI
-            MPI_Win             window_;
-#endif
+            DIY_MPI_Win         window_;
     };
+
 } // mpi
 } // diy
 
 template<class T>
 diy::mpi::window<T>::
-window(const communicator& comm, unsigned size):
+window(const diy::mpi::communicator& comm, unsigned size):
   buffer_(size), rank_(comm.rank())
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_create(buffer_.data(), buffer_.size()*sizeof(T), sizeof(T), MPI_INFO_NULL, comm, &window_);
-#endif
+  window_ = detail::win_create(comm, buffer_.data(), static_cast<unsigned>(buffer_.size()*sizeof(T)), static_cast<int>(sizeof(T)));
 }
 
 template<class T>
 diy::mpi::window<T>::
 ~window()
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_free(&window_);
-#endif
+  detail::win_free(window_);
 }
 
 template<class T>
@@ -83,15 +157,7 @@ void
 diy::mpi::window<T>::
 put(const T& x, int rank, unsigned offset)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Put(address(x), count(x), datatype(x),
-            rank,
-            offset,
-            count(x), datatype(x),
-            window_);
-#else
-    buffer_[offset] = x;
-#endif
+  detail::put(window_, address(x), count(x), datatype_of(x), rank, offset);
 }
 
 template<class T>
@@ -99,16 +165,7 @@ void
 diy::mpi::window<T>::
 put(const std::vector<T>& x, int rank, unsigned offset)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Put(address(x), count(x), datatype(x),
-            rank,
-            offset,
-            count(x), datatype(x),
-            window_);
-#else
-    for (size_t i = 0; i < x.size(); ++i)
-        buffer_[offset + i] = x[i];
-#endif
+  detail::put(window_, address(x), count(x), datatype_of(x), rank, offset);
 }
 
 template<class T>
@@ -116,15 +173,7 @@ void
 diy::mpi::window<T>::
 get(T& x, int rank, unsigned offset)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Get(address(x), count(x), datatype(x),
-            rank,
-            offset,
-            count(x), datatype(x),
-            window_);
-#else
-    x = buffer_[offset];
-#endif
+  detail::get(window_, address(x), count(x), datatype_of(x), rank, offset);
 }
 
 template<class T>
@@ -132,16 +181,7 @@ void
 diy::mpi::window<T>::
 get(std::vector<T>& x, int rank, unsigned offset)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Get(address(x), count(x), datatype(x),
-            rank,
-            offset,
-            count(x), datatype(x),
-            window_);
-#else
-    for (size_t i = 0; i < x.size(); ++i)
-        x[i] = buffer_[offset + i];
-#endif
+  detail::get(window_, address(x), count(x), datatype_of(x), rank, offset);
 }
 
 template<class T>
@@ -149,9 +189,7 @@ void
 diy::mpi::window<T>::
 fence(int assert)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_fence(assert, window_);
-#endif
+  detail::fence(window_, assert);
 }
 
 template<class T>
@@ -159,9 +197,7 @@ void
 diy::mpi::window<T>::
 lock(int lock_type, int rank, int assert)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_lock(lock_type, rank, assert, window_);
-#endif
+  detail::lock(window_, lock_type, rank, assert);
 }
 
 template<class T>
@@ -169,9 +205,7 @@ void
 diy::mpi::window<T>::
 unlock(int rank)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_unlock(rank, window_);
-#endif
+  detail::unlock(window_, rank);
 }
 
 template<class T>
@@ -179,9 +213,7 @@ void
 diy::mpi::window<T>::
 lock_all(int assert)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_lock_all(assert, window_);
-#endif
+  detail::lock_all(window_, assert);
 }
 
 template<class T>
@@ -189,20 +221,15 @@ void
 diy::mpi::window<T>::
 unlock_all()
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_unlock_all(window_);
-#endif
+  detail::unlock_all(window_);
 }
+
 template<class T>
 void
 diy::mpi::window<T>::
-fetch_and_op(const T* origin, T* result, int rank, unsigned offset, MPI_Op op)
+fetch_and_op(const T* origin, T* result, int rank, unsigned offset, const diy::mpi::operation& op)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Fetch_and_op(origin, result, datatype(*origin), rank, offset, op, window_);
-#else
-    DIY_UNSUPPORTED_MPI_CALL(MPI_Fetch_and_op);
-#endif
+  detail::fetch_and_op(window_, origin, result, datatype_of(*origin), rank, offset, op);
 }
 
 template<class T>
@@ -210,12 +237,7 @@ void
 diy::mpi::window<T>::
 fetch(T& result, int rank, unsigned offset)
 {
-#ifndef VTKM_DIY_NO_MPI
-    T unused;
-    fetch_and_op(&unused, &result, rank, offset, MPI_NO_OP);
-#else
-    result = buffer_[offset];
-#endif
+  detail::fetch(window_, &result, datatype_of(result), rank, offset);
 }
 
 template<class T>
@@ -223,12 +245,7 @@ void
 diy::mpi::window<T>::
 replace(const T& value, int rank, unsigned offset)
 {
-#ifndef VTKM_DIY_NO_MPI
-    T unused;
-    fetch_and_op(&value, &unused, rank, offset, MPI_REPLACE);
-#else
-    buffer_[offset] = value;
-#endif
+  detail::replace(window_, &value, datatype_of(value), rank, offset);
 }
 
 template<class T>
@@ -236,9 +253,7 @@ void
 diy::mpi::window<T>::
 sync()
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_sync(window_);
-#endif
+  detail::sync(window_);
 }
 
 template<class T>
@@ -246,9 +261,7 @@ void
 diy::mpi::window<T>::
 flush(int rank)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_flush(rank, window_);
-#endif
+  detail::flush(window_, rank);
 }
 
 template<class T>
@@ -256,9 +269,7 @@ void
 diy::mpi::window<T>::
 flush_all()
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_flush_all(window_);
-#endif
+  detail::flush_all(window_);
 }
 
 template<class T>
@@ -266,9 +277,7 @@ void
 diy::mpi::window<T>::
 flush_local(int rank)
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_flush_local(rank, window_);
-#endif
+  detail::flush_local(window_, rank);
 }
 
 template<class T>
@@ -276,7 +285,11 @@ void
 diy::mpi::window<T>::
 flush_local_all()
 {
-#ifndef VTKM_DIY_NO_MPI
-    MPI_Win_flush_local_all(window_);
-#endif
+  detail::flush_local_all(window_);
 }
+
+#ifndef VTKMDIY_MPI_AS_LIB
+#include "window.cpp"
+#endif
+
+#endif // VTKMDIY_MPI_WINODW_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/no-thread.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/no-thread.hpp
index 0d7bb3ca0..132612c3e 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/no-thread.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/no-thread.hpp
@@ -18,6 +18,8 @@ namespace diy
     template<class Function, class... Args>
     explicit            thread(Function&& f, Args&&... args)      { f(args...); }       // not ideal, since it doesn't support member functions
 
+    thread&             operator=(thread&&)                       = default;
+
     void                join()                                    {}
 
     static unsigned     hardware_concurrency()                    { return 1; }
@@ -31,8 +33,13 @@ namespace diy
   struct lock_guard
   {
       lock_guard(T&)        {}
+      void lock()           {}
+      void unlock()         {}
   };
 
+  template<class T, class U>
+  using concurrent_map = std::map<T,U>;
+
   namespace this_thread
   {
       inline unsigned long int  get_id()    { return 0; }
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/pick.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/pick.hpp
index 0b8de11fc..b10569de8 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/pick.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/pick.hpp
@@ -2,63 +2,74 @@
 #define VTKMDIY_PICK_HPP
 
 #include "link.hpp"
+#include "constants.h"      // for DEPRECATED
 
 namespace diy
 {
-    template<class Bounds, class Point, class OutIter>
-    void near(const RegularLink<Bounds>& link, const Point& p, float r, OutIter out,
-              const Bounds& domain);
+    template<class Bounds, class Point, class OutIter, class Coordinate>
+    void near(const RegularLink<Bounds>& link, const Point& p, Coordinate r, OutIter out, const Bounds& domain);
 
     template<class Bounds, class Point, class OutIter>
-    void in(const RegularLink<Bounds>& link, const Point& p, OutIter out, const Bounds& domain);
+    void in(const RegularLink<Bounds>& link, const Point& p, OutIter out, const Bounds& domain, bool core = true);
 
-    template<class Point, class Bounds>
-    float distance(int dim, const Bounds& bounds, const Point& p);
+    template<class Point, class Bounds, class Out = double>
+    Out distance(const Bounds& bounds, const Point& p);
+
+    template<class Point, class Bounds, class Out = double>
+    DEPRECATED("Use distance(const Bounds& bounds, const Point& p) instead.")
+    Out distance(int dim, const Bounds& bounds, const Point& p);
+
+    template<class Bounds, class Out = double>
+    Out distance(const Bounds& bounds1, const Bounds& bounds2);
+
+    template<class Bounds, class Out = double>
+    DEPRECATED("Use distance(const Bounds& bounds1, const Bounds& bounds2) instead.")
+    Out distance(int dim, const Bounds& bounds1, const Bounds& bounds2);
 
     template<class Bounds>
-    inline
-    float distance(int dim, const Bounds& bounds1, const Bounds& bounds2);
+    void wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain);
 
     template<class Bounds>
+    DEPRECATED("Use wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain) instead.")
     void wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain, int dim);
 }
 
 //! Finds the neighbors within radius r of a target point.
-template<class Bounds, class Point, class OutIter>
+template<class Bounds, class Point, class OutIter, class Coordinate>
 void
 diy::
 near(const RegularLink<Bounds>& link,  //!< neighbors
      const Point& p,                   //!< target point (must be in current block)
-     float r,                          //!< target radius (>= 0.0)
+     Coordinate r,                     //!< target radius (>= 0.0)
      OutIter out,                      //!< insert iterator for output set of neighbors
      const Bounds& domain)             //!< global domain bounds
 {
-  Bounds neigh_bounds; // neighbor block bounds
+  Bounds neigh_bounds {0}; // neighbor block bounds
 
   // for all neighbors of this block
   for (int n = 0; n < link.size(); n++)
   {
     // wrap neighbor bounds, if necessary, otherwise bounds will be unchanged
     neigh_bounds = link.bounds(n);
-    wrap_bounds(neigh_bounds, link.wrap(n), domain, link.dimension());
+    wrap_bounds(neigh_bounds, link.wrap(n), domain);
 
-    if (distance(link.dimension(), neigh_bounds, p) <= r)
+    if (distance(neigh_bounds, p) <= r)
         *out++ = n;
   } // for all neighbors
 }
 
 //! Find the distance between point `p` and box `bounds`.
-template<class Point, class Bounds>
-float
+template<class Point, class Bounds, class Out>
+Out
 diy::
-distance(int dim, const Bounds& bounds, const Point& p)
+distance(const Bounds& bounds, const Point& p)
 {
-    float res = 0;
-    for (int i = 0; i < dim; ++i)
+    Out res = 0;
+    for (int i = 0; i < p.size(); ++i)
     {
         // avoids all the annoying case logic by finding
         // diff = max(bounds.min[i] - p[i], 0, p[i] - bounds.max[i])
-        float diff = 0, d;
+        Out diff = 0, d;
 
         d = bounds.min[i] - p[i];
         if (d > diff) diff = d;
@@ -70,18 +81,68 @@ distance(int dim, const Bounds& bounds, const Point& p)
     return sqrt(res);
 }
 
-template<class Bounds>
-float
+// DEPRECATED
+//! Find the distance between point `p` and box `bounds`.
+template<class Point, class Bounds, class Out>
+Out
+diy::
+distance(int dim, const Bounds& bounds, const Point& p)
+{
+    Out res = 0;
+    for (int i = 0; i < dim; ++i)
+    {
+        // avoids all the annoying case logic by finding
+        // diff = max(bounds.min[i] - p[i], 0, p[i] - bounds.max[i])
+        Out diff = 0, d;
+
+        d = bounds.min[i] - p[i];
+        if (d > diff) diff = d;
+        d = p[i] - bounds.max[i];
+        if (d > diff) diff = d;
+
+        res += diff*diff;
+    }
+    return sqrt(res);
+}
+
+template<class Bounds, class Out>
+Out
+diy::
+distance(const Bounds& bounds1, const Bounds& bounds2)
+{
+    Out res = 0;
+    for (int i = 0; i < bounds1.min.size(); ++i)   // assume min, max of both bounds have same size
+    {
+        Out diff = 0, d;
+
+        Out d1 = bounds1.max[i] - bounds2.min[i];
+        Out d2 = bounds2.max[i] - bounds1.min[i];
+
+        if (d1 > 0 && d2 > 0)
+            diff = 0;
+        else if (d1 <= 0)
+            diff = -d1;
+        else if (d2 <= 0)
+            diff = -d2;
+
+        res += diff*diff;
+    }
+    return sqrt(res);
+}
+
+// DEPRECATED
+template<class Bounds, class Out>
+Out
 diy::
 distance(int dim, const Bounds& bounds1, const Bounds& bounds2)
 {
-    float res = 0;
+    Out res = 0;
     for (int i = 0; i < dim; ++i)
     {
-        float diff = 0, d;
+        Out diff = 0, d;
 
-        float d1 = bounds1.max[i] - bounds2.min[i];
-        float d2 = bounds2.max[i] - bounds1.min[i];
+        Out d1 = bounds1.max[i] - bounds2.min[i];
+        Out d2 = bounds2.max[i] - bounds1.min[i];
 
         if (d1 > 0 && d2 > 0)
             diff = 0;
@@ -102,22 +163,43 @@ diy::
 in(const RegularLink<Bounds>& link,  //!< neighbors
    const Point& p,                   //!< target point
    OutIter out,                      //!< insert iterator for output set of neighbors
-   const Bounds& domain)             //!< global domain bounds
+   const Bounds& domain,             //!< global domain bounds
+   bool core)                        //!< check against core (or bounds, if false)
 {
-  Bounds neigh_bounds; // neighbor block bounds
+  Bounds neigh_bounds {0}; // neighbor block bounds
 
   // for all neighbors of this block
   for (int n = 0; n < link.size(); n++)
   {
-    // wrap neighbor bounds, if necessary, otherwise bounds will be unchanged
-    neigh_bounds = link.bounds(n);
-    wrap_bounds(neigh_bounds, link.wrap(n), domain, link.dimension());
+    if (core)
+        neigh_bounds = link.core(n);
+    else
+        neigh_bounds = link.bounds(n);
 
-    if (distance(link.dimension(), neigh_bounds, p) == 0)
+    // wrap neighbor bounds, if necessary, otherwise bounds will be unchanged
+    wrap_bounds(neigh_bounds, link.wrap(n), domain);
+
+    if (distance(neigh_bounds, p) == 0)
         *out++ = n;
   } // for all neighbors
 }
 
+// wraps block bounds
+// wrap dir is the wrapping direction from original block to wrapped neighbor block
+// overall domain bounds and dimensionality are also needed
+template<class Bounds>
+void
+diy::
+wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain)
+{
+  for (int i = 0; i < bounds.min.size(); ++i)      // assume min, max of bounds, domain have same size
+  {
+    bounds.min[i] += wrap_dir[i] * (domain.max[i] - domain.min[i]);
+    bounds.max[i] += wrap_dir[i] * (domain.max[i] - domain.min[i]);
+  }
+}
+
+// DEPRECATED
 // wraps block bounds
 // wrap dir is the wrapping direction from original block to wrapped neighbor block
 // overall domain bounds and dimensionality are also needed
@@ -133,5 +215,4 @@ wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain, int dim)
   }
 }
 
-
 #endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/point.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/point.hpp
index e0465d83f..a18d1b602 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/point.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/point.hpp
@@ -8,6 +8,8 @@
 
 #include <array>
 
+#include "constants.h"      // for DEPRECATED
+
 namespace diy
 {
 
@@ -53,7 +55,9 @@ class Point: public std::array<Coordinate_, D>
         Point&              operator*=(Coordinate a)                { for (unsigned i = 0; i < D; ++i) (*this)[i] *= a;     return *this; }
         Point&              operator/=(Coordinate a)                { for (unsigned i = 0; i < D; ++i) (*this)[i] /= a;     return *this; }
 
-        Coordinate          norm() const                            { return (*this)*(*this); }
+        DEPRECATED("Use norm2 instead")
+        Coordinate          norm() const                            { return norm2(); }
+        Coordinate          norm2() const                           { return (*this)*(*this); }
 
         std::ostream&       operator<<(std::ostream& out) const     { out << (*this)[0]; for (unsigned i = 1; i < D; ++i) out << " " << (*this)[i]; return out; }
         std::istream&       operator>>(std::istream& in);
@@ -117,4 +121,4 @@ operator>>(std::istream& in, Point<C,D>& p)
 
 }
 
-#endif // DIY_POINT_HPP
+#endif // VTKMDIY_POINT_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/proxy.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/proxy.hpp
index 30fa662a5..4de89d1cf 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/proxy.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/proxy.hpp
@@ -10,29 +10,99 @@ namespace diy
     template <class T>
     struct EnqueueIterator;
 
-                        Proxy(Master* master__, int gid__):
+    using IncomingQueues = std::map<int,     MemoryBuffer>;
+    using OutgoingQueues = std::map<BlockID, MemoryBuffer>;
+
+                        Proxy(Master* master__, int gid__,
+                              IExchangeInfo*  iexchange__ = 0):
                           gid_(gid__),
                           master_(master__),
-                          incoming_(&master__->incoming(gid__)),
-                          outgoing_(&master__->outgoing(gid__)),
-                          collectives_(&master__->collectives(gid__))   {}
+                          iexchange_(iexchange__),
+                          collectives_(&master__->collectives(gid__))
+    {
+        fill_incoming();
+
+        // move outgoing_ back into proxy, in case it's a multi-foreach round
+        if (!iexchange_)
+            for (auto& x : master_->outgoing(gid_))
+            {
+                auto access = x.second.access();
+                if (!access->empty())
+                {
+                    outgoing_.emplace(x.first, access->back().move());
+                    access->pop_back();
+                }
+            }
+    }
+
+    // delete copy constructor to avoid coping incoming_ and outgoing_ (plus it
+    // won't work otherwise because MemoryBuffer has a deleted copy
+    // constructor)
+                        Proxy(const Proxy&)     =delete;
+                        Proxy(Proxy&&)          =default;
+    Proxy&              operator=(const Proxy&) =delete;
+    Proxy&              operator=(Proxy&&)      =default;
+
+                        ~Proxy()
+    {
+        auto& outgoing = master_->outgoing(gid_);
+        auto& incoming = master_->incoming(gid_);
+
+        // copy out outgoing_
+        for (auto& x : outgoing_)
+        {
+            outgoing[x.first].access()->emplace_back(std::move(x.second));
+            if (iexchange_)
+                iexchange_->inc_work();
+        }
+
+        // move incoming_ back into master, in case it's a multi-foreach round
+        if (!iexchange_)
+            for (auto& x : incoming_)
+                incoming[x.first].access()->emplace_front(std::move(x.second));
+    }
 
     int                 gid() const                                     { return gid_; }
 
+    bool                fill_incoming() const
+    {
+        bool exists = false;
+
+        incoming_.clear();
+
+        // fill incoming_
+        for (auto& x : master_->incoming(gid_))
+        {
+            auto access = x.second.access();
+            if (!access->empty())
+            {
+                exists = true;
+                incoming_.emplace(x.first, access->front().move());
+                access->pop_front();
+                if (iexchange_)
+                    iexchange_->dec_work();
+            }
+        }
+
+        return exists;
+    }
+
     //! Enqueue data whose size can be determined automatically, e.g., an STL vector.
     template<class T>
     void                enqueue(const BlockID&  to,                                     //!< target block (gid,proc)
                                 const T&        x,                                      //!< data (eg. STL vector)
-                                void (*save)(BinaryBuffer&, const T&) = &::diy::save<T> //!< optional serialization function
+                                void (*save)(BinaryBuffer&, const T&) = &::diy::save    //!< optional serialization function
                                ) const
-    { OutgoingQueues& out = *outgoing_; save(out[to], x); }
+    {
+        save(outgoing_[to], x);
+    }
 
     //! Enqueue data whose size is given explicitly by the user, e.g., an array.
     template<class T>
     void                enqueue(const BlockID&  to,                                     //!< target block (gid,proc)
                                 const T*        x,                                      //!< pointer to the data (eg. address of start of vector)
                                 size_t          n,                                      //!< size in data elements (eg. ints)
-                                void (*save)(BinaryBuffer&, const T&) = &::diy::save<T> //!< optional serialization function
+                                void (*save)(BinaryBuffer&, const T&) = &::diy::save    //!< optional serialization function
                                ) const;
 
     //! Dequeue data whose size can be determined automatically (e.g., STL vector) and that was
@@ -41,9 +111,9 @@ namespace diy
     template<class T>
     void                dequeue(int             from,                                   //!< target block gid
                                 T&              x,                                      //!< data (eg. STL vector)
-                                void (*load)(BinaryBuffer&, T&) = &::diy::load<T>       //!< optional serialization function
+                                void (*load)(BinaryBuffer&, T&) = &::diy::load          //!< optional serialization function
                                ) const
-    { IncomingQueues& in  = *incoming_; load(in[from], x); }
+    { load(incoming_[from], x); }
 
     //! Dequeue an array of data whose size is given explicitly by the user.
     //! In this case, the user needs to allocate the receive buffer prior to calling dequeue.
@@ -51,7 +121,7 @@ namespace diy
     void                dequeue(int             from,                                   //!< target block gid
                                 T*              x,                                      //!< pointer to the data (eg. address of start of vector)
                                 size_t          n,                                      //!< size in data elements (eg. ints)
-                                void (*load)(BinaryBuffer&, T&) = &::diy::load<T>       //!< optional serialization function
+                                void (*load)(BinaryBuffer&, T&) = &::diy::load          //!< optional serialization function
                                ) const;
 
     //! Dequeue data whose size can be determined automatically (e.g., STL vector) and that was
@@ -60,7 +130,7 @@ namespace diy
     template<class T>
     void                dequeue(const BlockID&  from,                                   //!< target block (gid,proc)
                                 T&              x,                                      //!< data (eg. STL vector)
-                                void (*load)(BinaryBuffer&, T&) = &::diy::load<T>       //!< optional serialization function
+                                void (*load)(BinaryBuffer&, T&) = &::diy::load          //!< optional serialization function
                                ) const                                  { dequeue(from.gid, x, load); }
 
     //! Dequeue an array of data whose size is given explicitly by the user.
@@ -69,20 +139,24 @@ namespace diy
     void                dequeue(const BlockID&  from,                                   //!< target block (gid,proc)
                                 T*              x,                                      //!< pointer to the data (eg. address of start of vector)
                                 size_t          n,                                      //!< size in data elements (eg. ints)
-                                void (*load)(BinaryBuffer&, T&) = &::diy::load<T>       //!< optional serialization function
+                                void (*load)(BinaryBuffer&, T&) = &::diy::load          //!< optional serialization function
                                ) const                                  { dequeue(from.gid, x, n, load); }
 
     template<class T>
     EnqueueIterator<T>  enqueuer(const T& x,
-                                 void (*save)(BinaryBuffer&, const T&) = &::diy::save<T>) const
+                                 void (*save)(BinaryBuffer&, const T&) = &::diy::save   ) const
     { return EnqueueIterator<T>(this, x, save); }
 
-    IncomingQueues*     incoming() const                                { return incoming_; }
-    MemoryBuffer&       incoming(int from) const                        { return (*incoming_)[from]; }
+    IncomingQueues*     incoming() const                                { return &incoming_; }
+    MemoryBuffer&       incoming(int from) const                        { return incoming_[from]; }
     inline void         incoming(std::vector<int>& v) const;            // fill v with every gid from which we have a message
 
-    OutgoingQueues*     outgoing() const                                { return outgoing_; }
-    MemoryBuffer&       outgoing(const BlockID& to) const               { return (*outgoing_)[to]; }
+    OutgoingQueues*     outgoing() const                                { return &outgoing_; }
+    MemoryBuffer&       outgoing(const BlockID& to) const               { return outgoing_[to]; }
+
+    inline bool         empty_incoming_queues() const;
+    inline bool         empty_outgoing_queues() const;
+    inline bool         empty_queues() const;
 
 /**
  * \ingroup Communication
@@ -118,12 +192,18 @@ namespace diy
     CollectivesList*    collectives() const                             { return collectives_; }
 
     Master*             master() const                                  { return master_; }
+    IExchangeInfo*      iexchange() const                               { return iexchange_; }
 
     private:
       int               gid_;
       Master*           master_;
-      IncomingQueues*   incoming_;
-      OutgoingQueues*   outgoing_;
+      IExchangeInfo*    iexchange_;
+
+      // TODO: these are marked mutable to not have to undo consts on enqueue/dequeue, in case it breaks things;
+      //       eventually, implement this change
+      mutable IncomingQueues    incoming_;
+      mutable OutgoingQueues    outgoing_;
+
       CollectivesList*  collectives_;
   };
 
@@ -151,14 +231,12 @@ namespace diy
 
   struct Master::ProxyWithLink: public Master::Proxy
   {
-            ProxyWithLink(const Proxy&    proxy,
+            ProxyWithLink(Proxy&&         proxy,
                           void*           block__,
-                          Link*           link__,
-                          IExchangeInfo*  iexchange__ = 0):
-              Proxy(proxy),
+                          Link*           link__):
+              Proxy(std::move(proxy)),
               block_(block__),
-              link_(link__),
-              iexchange_(iexchange__)                               {}
+              link_(link__)                                         {}
 
       Link*   link() const                                          { return link_; }
       void*   block() const                                         { return block_; }
@@ -166,52 +244,6 @@ namespace diy
     private:
       void*             block_;
       Link*             link_;
-      IExchangeInfo*    iexchange_;         // not used for iexchange presently, but later could trigger some special behavior
-
-    public:
-      template<class T>
-      void enqueue(const BlockID&     to,
-              const T&                x,
-              void (*save)(BinaryBuffer&, const T&) = &::diy::save<T>) const
-      {
-          diy::Master::Proxy::enqueue(to, x, save);
-          if (iexchange_)
-              master()->icommunicate(iexchange_);
-      }
-
-      template<class T>
-      void enqueue(const BlockID&     to,
-              const T*                x,
-              size_t                  n,
-              void (*save)(BinaryBuffer&, const T&) = &::diy::save<T>) const
-      {
-          diy::Master::Proxy::enqueue(to, x, n, save);
-          if (iexchange_)
-              master()->icommunicate(iexchange_);
-      }
-
-      template<class T>
-      void dequeue(int                from,
-              T&                      x,
-              void (*load)(BinaryBuffer&, T&) = &::diy::load<T>) const
-      {
-          // TODO: uncomment if necessary, try first without icommunicating on dequeue
-//           if (iexchange_)
-//               master()->icommunicate(iexchange_);
-          diy::Master::Proxy::dequeue(from, x, load);
-      }
-
-      template<class T>
-      void dequeue(int                from,
-              T*                      x,
-              size_t                  n,
-              void (*load)(BinaryBuffer&, T&) = &::diy::load<T>) const
-      {
-          // TODO: uncomment if necessary, try first without icommunicating on dequeue
-//           if (iexchange_)
-//               master()->icommunicate(iexchange_);
-          diy::Master::Proxy::dequeue(from, x, n, load);
-      }
   };
 }                                           // diy namespace
 
@@ -219,10 +251,38 @@ void
 diy::Master::Proxy::
 incoming(std::vector<int>& v) const
 {
-  for (IncomingQueues::const_iterator it = incoming_->begin(); it != incoming_->end(); ++it)
-    v.push_back(it->first);
+  for (auto& x : incoming_)
+    v.push_back(x.first);
 }
 
+bool
+diy::Master::Proxy::
+empty_incoming_queues() const
+{
+    for (auto& x : *incoming())
+        if (x.second)
+            return false;
+    return true;
+}
+
+bool
+diy::Master::Proxy::
+empty_outgoing_queues() const
+{
+    for (auto& x : *outgoing())
+        if (x.second.size())
+            return false;
+    return true;
+}
+
+bool
+diy::Master::Proxy::
+empty_queues() const
+{
+    return empty_incoming_queues() && empty_outgoing_queues();
+}
+
+
 template<class T, class Op>
 void
 diy::Master::Proxy::
@@ -265,8 +325,7 @@ diy::Master::Proxy::
 enqueue(const BlockID& to, const T* x, size_t n,
         void (*save)(BinaryBuffer&, const T&)) const
 {
-    OutgoingQueues& out = *outgoing_;
-    BinaryBuffer&   bb  = out[to];
+    BinaryBuffer&   bb  = outgoing_[to];
     if (save == (void (*)(BinaryBuffer&, const T&)) &::diy::save<T>)
         diy::save(bb, x, n);       // optimized for unspecialized types
     else
@@ -280,8 +339,7 @@ diy::Master::Proxy::
 dequeue(int from, T* x, size_t n,
         void (*load)(BinaryBuffer&, T&)) const
 {
-    IncomingQueues& in = *incoming_;
-    BinaryBuffer&   bb = in[from];
+    BinaryBuffer&   bb = incoming_[from];
     if (load == (void (*)(BinaryBuffer&, T&)) &::diy::load<T>)
         diy::load(bb, x, n);       // optimized for unspecialized types
     else
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/reduce.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/reduce.hpp
index 39fdc61ce..8f106ed49 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/reduce.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/reduce.hpp
@@ -16,13 +16,13 @@ struct ReduceProxy: public Master::Proxy
 {
     typedef     std::vector<int>                            GIDVector;
 
-    ReduceProxy(const Master::Proxy&    proxy, //!< parent proxy
+    ReduceProxy(Master::Proxy&&         proxy, //!< parent proxy
                 void*                   block, //!< diy block
                 unsigned                round, //!< current round
                 const Assigner&         assigner, //!< assigner
                 const GIDVector&        incoming_gids, //!< incoming gids in this group
                 const GIDVector&        outgoing_gids): //!< outgoing gids in this group
-      Master::Proxy(proxy),
+      Master::Proxy(std::move(proxy)),
       block_(block),
       round_(round),
       assigner_(assigner)
@@ -46,13 +46,13 @@ struct ReduceProxy: public Master::Proxy
       }
     }
 
-    ReduceProxy(const Master::Proxy&    proxy, //!< parent proxy
+    ReduceProxy(Master::Proxy&&         proxy, //!< parent proxy
                 void*                   block, //!< diy block
                 unsigned                round, //!< current round
                 const Assigner&         assigner,
                 const Link&             in_link,
                 const Link&             out_link):
-      Master::Proxy(proxy),
+      Master::Proxy(std::move(proxy)),
       block_(block),
       round_(round),
       assigner_(assigner),
@@ -170,7 +170,7 @@ namespace detail
   {
     using Callback = std::function<void(Block*, const ReduceProxy&, const Partners&)>;
 
-                ReductionFunctor(unsigned round_, const Callback& reduce_, const Partners& partners_, const Assigner& assigner_):
+                ReductionFunctor(int round_, const Callback& reduce_, const Partners& partners_, const Assigner& assigner_):
                     round(round_), reduce(reduce_), partners(partners_), assigner(assigner_)        {}
 
     void        operator()(Block* b, const Master::ProxyWithLink& cp) const
@@ -180,20 +180,20 @@ namespace detail
       std::vector<int> incoming_gids, outgoing_gids;
       if (round > 0)
           partners.incoming(round, cp.gid(), incoming_gids, *cp.master());        // receive from the previous round
-      if (round < partners.rounds())
+      if (round < static_cast<int>(partners.rounds()))
           partners.outgoing(round, cp.gid(), outgoing_gids, *cp.master());        // send to the next round
 
-      ReduceProxy   rp(cp, b, round, assigner, incoming_gids, outgoing_gids);
+      ReduceProxy   rp(std::move(const_cast<Master::ProxyWithLink&>(cp)), b, round, assigner, incoming_gids, outgoing_gids);
       reduce(b, rp, partners);
 
       // touch the outgoing queues to make sure they exist
-      Master::OutgoingQueues& outgoing = *cp.outgoing();
-      if (outgoing.size() < (size_t) rp.out_link().size())
-        for (int j = 0; j < rp.out_link().size(); ++j)
-          outgoing[rp.out_link().target(j)];       // touch the outgoing queue, creating it if necessary
+      Master::Proxy::OutgoingQueues& outgoing = *rp.outgoing();
+      if (outgoing.size() < static_cast<size_t>(rp.out_link().size()))
+        for (BlockID target : rp.out_link().neighbors())
+          outgoing[target];       // touch the outgoing queue, creating it if necessary
     }
 
-    unsigned        round;
+    int             round;
     Callback        reduce;
     Partners        partners;
     const Assigner& assigner;
@@ -213,4 +213,4 @@ namespace detail
 
 } // diy
 
-#endif // DIY_REDUCE_HPP
+#endif // VTKMDIY_REDUCE_HPP
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/serialization.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/serialization.hpp
index 5f5894b0a..992517608 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/serialization.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/serialization.hpp
@@ -13,6 +13,8 @@
 #include <unordered_set>
 #include <type_traits>              // this is used for a safety check for default serialization
 
+#include <cassert>
+
 namespace diy
 {
   //! A serialization buffer. \ingroup Serialization
@@ -30,6 +32,11 @@ namespace diy
                         MemoryBuffer(size_t position_ = 0):
                           position(position_)                       {}
 
+                        MemoryBuffer(MemoryBuffer&&)                =default;
+                        MemoryBuffer(const MemoryBuffer&)           =delete;
+    MemoryBuffer&       operator=(MemoryBuffer&&)                   =default;
+    MemoryBuffer&       operator=(const MemoryBuffer&)              =delete;
+
     virtual inline void save_binary(const char* x, size_t count) override;   //!< copy `count` bytes from `x` into the buffer
     virtual inline void append_binary(const char* x, size_t count) override; //!< append `count` bytes from `x` to end of buffer
     virtual inline void load_binary(char* x, size_t count) override;         //!< copy `count` bytes into `x` from the buffer
@@ -52,7 +59,7 @@ namespace diy
     static float        growth_multiplier()                         { return 1.5; }
 
     // simple file IO
-    void                write(const std::string& fn) const          { std::ofstream out(fn.c_str()); out.write(&buffer[0], size()); }
+    void                write(const std::string& fn) const          { std::ofstream out(fn.c_str()); out.write(&buffer[0], static_cast<std::streamsize>(size())); }
     void                read(const std::string& fn)
     {
         std::ifstream in(fn.c_str(), std::ios::binary | std::ios::ate);
@@ -99,22 +106,16 @@ namespace diy
 // 20150422 == 5.1
 // 20141030 == 4.9.2
 // See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html#abi.versioning.__GLIBCXX__
-#if defined(__GLIBCXX__) &&                                                                        \
+#if !(defined(__GLIBCXX__) &&                                                                      \
   (__GLIBCXX__ < 20150422 || __GLIBCXX__ == 20160726 || __GLIBCXX__ == 20150626 ||                 \
-   __GLIBCXX__ == 20150623)
-#define VTKMDIY_USING_GLIBCXX_4
-#endif
-
-#if !defined(VTKMDIY_USING_GLIBCXX_4)
+   __GLIBCXX__ == 20150623))
     //exempt glibcxx-4 variants as they don't have is_trivially_copyable implemented
     static_assert(std::is_trivially_copyable<T>::value, "Default serialization works only for trivially copyable types");
-#else
-# undef VTKMDIY_USING_GLIBCXX_4
 #endif
 
     static void         save(BinaryBuffer& bb, const T& x)          { bb.save_binary((const char*)  &x, sizeof(T)); }
     static void         load(BinaryBuffer& bb, T& x)                { bb.load_binary((char*)        &x, sizeof(T)); }
-    static size_t       size(const T& x)                            { return sizeof(T); }
+    static size_t       size(const T&)                              { return sizeof(T); }
   };
 
   //! Saves `x` to `bb` by calling `diy::Serialization<T>::save(bb,x)`.
@@ -185,14 +186,16 @@ namespace diy
     static void         save(BinaryBuffer& bb, const MemoryBuffer& x)
     {
       diy::save(bb, x.position);
-      diy::save(bb, &x.buffer[0], x.position);
+      if (x.position > 0)
+          diy::save(bb, &x.buffer[0], x.position);
     }
 
     static void         load(BinaryBuffer& bb, MemoryBuffer& x)
     {
       diy::load(bb, x.position);
       x.buffer.resize(x.position);
-      diy::load(bb, &x.buffer[0], x.position);
+      if (x.position > 0)
+          diy::load(bb, &x.buffer[0], x.position);
     }
 
     static size_t       size(const MemoryBuffer& x)
@@ -219,7 +222,7 @@ namespace diy
     {
       size_t s;
       diy::load(bb, s);
-      v.resize(s);
+      v.resize(s, U());
       if (s > 0)
         diy::load(bb, &v[0], s);
     }
@@ -242,7 +245,7 @@ namespace diy
     {
       size_t s;
       diy::load(bb, s);
-      v.resize(s);
+      v.resize(s, U());
       if (s > 0)
         diy::load(bb, &v[0], s);
     }
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/stats.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/stats.hpp
index e61402c2d..4bfc41adb 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/stats.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/stats.hpp
@@ -4,15 +4,83 @@
 #include <chrono>
 #include <string>
 #include <vector>
+#include <unordered_map>
 
-#include "log.hpp"      // need this for format
+#include "log.hpp"
+
+#if defined(VTKMDIY_USE_CALIPER)
+#include <caliper/cali.h>
+#include <caliper/common/Variant.h>
+#endif
 
 namespace diy
 {
 namespace stats
 {
 
-#if defined(DIY_PROFILE)
+inline
+std::ostream&
+operator<<(std::ostream& out, const std::chrono::high_resolution_clock::duration& d)
+{
+    auto time = std::chrono::duration_cast<std::chrono::microseconds>(d).count();
+    fmt::print(out, "{:02d}:{:02d}:{:02d}.{:06d}",
+                    time/1000000/60/60,
+                    time/1000000/60 % 60,
+                    time/1000000 % 60,
+                    time % 1000000);
+    return out;
+}
+
+struct DurationAccumulator
+{
+    using   Clock    = std::chrono::high_resolution_clock;
+    using   Time     = Clock::time_point;
+    using   Duration = Clock::duration;
+
+    void    operator<<(std::string name)        { last[name] = Clock::now(); }
+    void    operator>>(std::string name)        { duration[name] += Clock::now() - last[name]; }
+
+    void    clear()                             { last.clear(); duration.clear(); }
+
+    std::unordered_map<std::string, Time>       last;
+    std::unordered_map<std::string, Duration>   duration;
+
+    void    output(std::ostream& out, std::string prefix = "") const
+    {
+        if (!prefix.empty())
+            prefix += " ";
+
+        for (auto& x : duration)
+            out << prefix << x.second << ' ' << x.first << '\n';
+    }
+};
+
+template<class Profiler>
+struct  ScopedProfile
+{
+        ScopedProfile(Profiler& prof_, std::string name_):
+            prof(prof_), name(name_), active(true)  { prof << name; }
+        ~ScopedProfile()                            { if (active) prof >> name; }
+
+        ScopedProfile(ScopedProfile&& other):
+            prof(other.prof),
+            name(other.name),
+            active(other.active)                    { other.active = false; }
+
+    ScopedProfile&
+        operator=(ScopedProfile&& other) = delete;
+        ScopedProfile(const ScopedProfile&) = delete;
+    ScopedProfile&
+        operator=(const ScopedProfile&) = delete;
+
+    Profiler&   prof;
+    std::string name;
+    bool        active;
+};
+
+
+#if !defined(VTKMDIY_USE_CALIPER)
+#if defined(VTKMDIY_PROFILE)
 struct Profiler
 {
     using   Clock = std::chrono::high_resolution_clock;
@@ -32,28 +100,7 @@ struct Profiler
     };
 
     using   EventsVector = std::vector<Event>;
-
-    struct  Scoped
-    {
-            Scoped(Profiler& prof_, std::string name_):
-                prof(prof_), name(name_), active(true)  { prof << name; }
-            ~Scoped()                                   { if (active) prof >> name; }
-
-            Scoped(Scoped&& other):
-                prof(other.prof),
-                name(other.name),
-                active(other.active)                    { other.active = false; }
-
-        Scoped&
-            operator=(Scoped&& other) = delete;
-            Scoped(const Scoped&) = delete;
-        Scoped&
-            operator=(const Scoped&) = delete;
-
-        Profiler&   prof;
-        std::string name;
-        bool        active;
-    };
+    using   Scoped       = ScopedProfile<Profiler>;
 
             Profiler()                                  { reset_time(); }
 
@@ -62,10 +109,10 @@ struct Profiler
     void    operator<<(std::string name)                { enter(name); }
     void    operator>>(std::string name)                { exit(name); }
 
-    void    enter(std::string name)                     { events.push_back(Event(name, true)); }
-    void    exit(std::string name)                      { events.push_back(Event(name, false)); }
+    void    enter(std::string name)                     { events.push_back(Event(name, true));  total << name; }
+    void    exit(std::string name)                      { events.push_back(Event(name, false)); total >> name; }
 
-    void    output(std::ostream& out, std::string prefix = "")
+    void    output(std::ostream& out, std::string prefix = "") const
     {
         if (!prefix.empty())
             prefix += " ";
@@ -73,44 +120,103 @@ struct Profiler
         for (size_t i = 0; i < events.size(); ++i)
         {
             const Event& e = events[i];
-            auto time = std::chrono::duration_cast<std::chrono::microseconds>(e.stamp - start).count();
-
-            fmt::print(out, "{}{:02d}:{:02d}:{:02d}.{:06d} {}{}\n",
-                            prefix,
-                            time/1000000/60/60,
-                            time/1000000/60 % 60,
-                            time/1000000 % 60,
-                            time % 1000000,
-                            (e.begin ? '<' : '>'),
-                            e.name);
+            out << prefix << (e.stamp - start) << ' ' << (e.begin ? '<' : '>') <<  e.name << '\n';
         }
+
+        out << "# Total times:\n";
+        total.output(out, "# ");
     }
 
     Scoped  scoped(std::string name)                    { return Scoped(*this, name); }
 
-    void    clear()                                     { events.clear(); }
+    void    clear()                                     { events.clear(); total.clear(); }
+
+    const DurationAccumulator& totals() const           { return total; }
 
     private:
-        Time            start;
-        EventsVector    events;
+        Time                    start;
+        EventsVector            events;
+        DurationAccumulator     total;
 };
-#else
+#else   // VTKMDIY_PROFILE
 struct Profiler
 {
-    struct Scoped {};
+    using   Scoped = ScopedProfile<Profiler>;
 
-    void    reset_time()                                {}
+    void    reset_time()                        {}
 
-    void    operator<<(std::string)                     {}
-    void    operator>>(std::string)                     {}
+    void    operator<<(std::string name)        { enter(name); }
+    void    operator>>(std::string name)        { exit(name); }
 
-    void    enter(const std::string&)                   {}
-    void    exit(const std::string&)                    {}
+    void    enter(std::string)                  {}
+    void    exit(std::string)                   {}
 
-    void    output(std::ostream&, std::string = "")     {}
-    void    clear()                                     {}
+    void    output(std::ostream& out, std::string = "") const
+    {
+        out << "# Total times:\n";
+        total.output(out, "# ");
+    }
+    void    clear()                             { total.clear(); }
 
-    Scoped  scoped(std::string)                         { return Scoped(); }
+    Scoped  scoped(std::string name)            { return Scoped(*this, name); }
+
+    const DurationAccumulator&
+            totals() const                      { return total; }
+
+    private:
+        DurationAccumulator total;
+};
+#endif  // VTKMDIY_PROFILE
+
+// Annotations don't do anything without Caliper
+struct Annotation
+{
+    struct Guard
+    {
+                    Guard(Annotation&)              {}
+    };
+
+                    Annotation(const char*)         {}
+
+    template<class T>
+    Annotation&     set(T)                          { return *this; }
+};
+
+struct Variant
+{
+    template<class T>
+                    Variant(T)                      {}
+
+};
+
+#else   // VTKMDIY_USE_CALIPER
+
+using Annotation = cali::Annotation;
+using Variant    = cali::Variant;
+
+struct Profiler
+{
+    using   Scoped = ScopedProfile<Profiler>;
+
+    void    reset_time()                        {}
+
+    void    operator<<(std::string name)        { enter(name); }
+    void    operator>>(std::string name)        { exit(name); }
+
+    void    enter(std::string name)             { CALI_MARK_BEGIN(name.c_str()); }
+    void    exit(std::string name)              { CALI_MARK_END(name.c_str()); }
+
+    void    output(std::ostream& out, std::string = "") const {}
+    void    clear()                             {}
+
+    Scoped  scoped(std::string name)            { return Scoped(*this, name); }
+
+    // unused
+    const DurationAccumulator&
+            totals() const                      { return total; }
+
+    private:
+        DurationAccumulator total;
 };
 #endif
 }
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/storage.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/storage.hpp
index c68536cfd..cb541f546 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/storage.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/storage.hpp
@@ -26,14 +26,14 @@ namespace diy
       virtual inline void save_binary(const char* x, size_t count) override   { fwrite(x, 1, count, file); head += count; }
       virtual inline void append_binary(const char* x, size_t count) override
       {
-          size_t temp_pos = ftell(file);
+          auto temp_pos = ftell(file);
           fseek(file, static_cast<long>(tail), SEEK_END);
           fwrite(x, 1, count, file);
           tail += count;
           fseek(file, temp_pos, SEEK_SET);
       }
-      virtual inline void load_binary(char* x, size_t count) override         { auto n = fread(x, 1, count, file); (void) n;}
-      virtual inline void load_binary_back(char* x, size_t count) override    { fseek(file, static_cast<long>(tail), SEEK_END); auto n = fread(x, 1, count, file); tail += count; fseek(file, static_cast<long>(head), SEEK_SET); (void) n;}
+      virtual inline void load_binary(char* x, size_t count) override         { auto n = fread(x, 1, count, file); VTKMDIY_UNUSED(n);}
+      virtual inline void load_binary_back(char* x, size_t count) override    { fseek(file, static_cast<long>(tail), SEEK_END); auto n = fread(x, 1, count, file); tail += count; fseek(file, static_cast<long>(head), SEEK_SET); VTKMDIY_UNUSED(n);}
 
       size_t              size() const                                { return head; }
 
@@ -135,7 +135,8 @@ namespace diy
         _read(fh, &bb.buffer[0], static_cast<unsigned int>(fr.size));
 #else
         int fh = open(fr.name.c_str(), O_RDONLY | O_SYNC, 0600);
-        auto n = read(fh, &bb.buffer[0], fr.size); (void) n;
+        auto n = read(fh, &bb.buffer[0], fr.size);
+        VTKMDIY_UNUSED(n);
 #endif
         io::utils::close(fh);
         remove_file(fr);
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/chobo/small_vector.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/chobo/small_vector.hpp
new file mode 100644
index 000000000..784534bbe
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/chobo/small_vector.hpp
@@ -0,0 +1,1716 @@
+// chobo-small-vector v1.02
+//
+// std::vector-like class with a static buffer for initial capacity
+//
+// MIT License:
+// Copyright(c) 2016-2018 Chobolabs Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining
+// a copy of this software and associated documentation files(the
+// "Software"), to deal in the Software without restriction, including
+// without limitation the rights to use, copy, modify, merge, publish,
+// distribute, sublicense, and / or sell copies of the Software, and to
+// permit persons to whom the Software is furnished to do so, subject to
+// the following conditions :
+//
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+//
+//
+//                  VERSION HISTORY
+//
+//  1.02 (2018-04-24) Class inehrits from its allocator to make use of the
+//                    empty base class optimization.
+//                    emplace_back returns a reference to the inserted element
+//                    as per the c++17 standard.
+//  1.01 (2017-04-02) Fixed compilation error on (count, value) constructor and
+//                    assign, and insert methods when count or value is 0
+//  1.00 (2016-11-08) First public release
+//
+//
+//                  DOCUMENTATION
+//
+// Simply include this file wherever you need.
+// It defines the class chobo::small_vector, which is a drop-in replacement of
+// std::vector, but with an initial capacity as a template argument.
+// It gives you the benefits of using std::vector, at the cost of having a statically
+// allocated buffer for the initial capacity, which gives you cache-local data
+// when the vector is small (smaller than the initial capacity).
+//
+// When the size exceeds the capacity, the vector allocates memory via the provided
+// allocator, falling back to classic std::vector behavior.
+//
+// The second size_t template argument, RevertToStaticSize, is used when a
+// small_vector which has already switched to dynamically allocated size reduces
+// its size to a number smaller than that. In this case the vector's buffer
+// switches back to the staticallly allocated one
+//
+// A default value for the initial static capacity is provided so a replacement
+// in an existing code is possible with minimal changes to it.
+//
+// Example:
+//
+// chobo::small_vector<int, 4, 5> myvec; // a small_vector of size 0, initial capacity 4, and revert size 4 (smaller than 5)
+// myvec.resize(2); // vector is {0,0} in static buffer
+// myvec[1] = 11; // vector is {0,11} in static buffer
+// myvec.push_back(7); // vector is {0,11,7}  in static buffer
+// myvec.insert(myvec.begin() + 1, 3); // vector is {0,3,11,7} in static buffer
+// myvec.push_back(5); // vector is {0,3,11,7,5} in dynamically allocated memory buffer
+// myvec.erase(myvec.begin());  // vector is {3,11,7,5} back in static buffer
+// myvec.resize(5); // vector is {3,11,7,5,0} back in dynamically allocated memory
+//
+//
+// Reference:
+//
+// chobo::small_vector is fully compatible with std::vector with
+// the following exceptions:
+// * when reducing the size with erase or resize the new size may fall below
+//   RevertToStaticSize (if it is not 0). In such a case the vector will
+//   revert to using its static buffer, invalidating all iterators (contrary
+//   to the standard)
+// * a method is added `revert_to_static()` which reverts to the static buffer
+//   if possible, but doesn't free the dynamically allocated one
+//
+// Other notes:
+//
+// * the default value for RevertToStaticSize is zero. This means that once a dynamic
+//   buffer is allocated the data will never be put into the static one, even if the
+//   size allows it. Even if clear() is called. The only way to do so is to call
+//   shrink_to_fit() or revert_to_static()
+// * shrink_to_fit will free and reallocate if size != capacity and the data
+//   doesn't fit into the static buffer. It also will revert to the static buffer
+//   whenever possible regardless of the RevertToStaticSize value
+//
+//
+//                  Configuration
+//
+// The library has two configuration options. They can be set as #define-s
+// before including the header file, but it is recommended to change the code
+// of the library itself with the values you want, especially if you include
+// the library in many compilation units (as opposed to, say, a precompiled
+// header or a central header).
+//
+//                  Config out of range error handling
+//
+// An out of range error is a runtime error which is triggered when a method is
+// called with an iterator that doesn't belong to the vector's current range.
+// For example: vec.erase(vec.end() + 1);
+//
+// This is set by defining CHOBO_SMALL_VECTOR_ERROR_HANDLING to one of the
+// following values:
+// * CHOBO_SMALL_VECTOR_ERROR_HANDLING_NONE - no error handling. Crashes WILL
+//      ensue if the error is triggered.
+// * CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW - std::out_of_range is thrown.
+// * CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT - asserions are triggered.
+// * CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW - combines assert and
+//      throw to catch errors more easily in debug mode
+//
+// To set this setting by editing the file change the line:
+// ```
+// #   define CHOBO_SMALL_VECTOR_ERROR_HANDLING CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW
+// ```
+// to the default setting of your choice
+//
+//                  Config bounds checks:
+//
+// By default bounds checks are made in debug mode (via an asser) when accessing
+// elements (with `at` or `[]`). Iterators are not checked (yet...)
+//
+// To disable them, you can define CHOBO_SMALL_VECTOR_NO_DEBUG_BOUNDS_CHECK
+// before including the header.
+//
+//
+//                  TESTS
+//
+// The tests are included in the header file and use doctest (https://github.com/onqtam/doctest).
+// To run them, define CHOBO_SMALL_VECTOR_TEST_WITH_DOCTEST before including
+// the header in a file which has doctest.h already included.
+//
+#pragma once
+
+#include <type_traits>
+#include <cstddef>
+#include <memory>
+
+#define CHOBO_SMALL_VECTOR_ERROR_HANDLING_NONE  0
+#define CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW 1
+#define CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT 2
+#define CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW 3
+
+#if !defined(CHOBO_SMALL_VECTOR_ERROR_HANDLING)
+#   define CHOBO_SMALL_VECTOR_ERROR_HANDLING CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW
+#endif
+
+
+#if CHOBO_SMALL_VECTOR_ERROR_HANDLING == CHOBO_SMALL_VECTOR_ERROR_HANDLING_NONE
+#   define _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(cond)
+#elif CHOBO_SMALL_VECTOR_ERROR_HANDLING == CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW
+#   include <stdexcept>
+#   define _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(cond) if (cond) throw std::out_of_range("chobo::small_vector out of range")
+#elif CHOBO_SMALL_VECTOR_ERROR_HANDLING == CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT
+#   include <cassert>
+#   define _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(cond, rescue_return) assert(!(cond) && "chobo::small_vector out of range")
+#elif CHOBO_SMALL_VECTOR_ERROR_HANDLING == CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW
+#   include <stdexcept>
+#   include <cassert>
+#   define _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(cond, rescue_return) \
+    do { if (cond) { assert(false && "chobo::small_vector out of range"); throw std::out_of_range("chobo::small_vector out of range"); } } while(false)
+#else
+#error "Unknown CHOBO_SMALL_VECTOR_ERRROR_HANDLING"
+#endif
+
+
+#if defined(CHOBO_SMALL_VECTOR_NO_DEBUG_BOUNDS_CHECK)
+#   define _CHOBO_SMALL_VECTOR_BOUNDS_CHECK(i)
+#else
+#   include <cassert>
+#   define _CHOBO_SMALL_VECTOR_BOUNDS_CHECK(i) assert((i) < this->size())
+#endif
+
+namespace chobo
+{
+
+template<typename T, size_t StaticCapacity = 16, size_t RevertToStaticSize = 0, class Alloc = std::allocator<T>>
+struct small_vector: Alloc
+{
+    static_assert(RevertToStaticSize <= StaticCapacity + 1, "chobo::small_vector: the revert-to-static size shouldn't exceed the static capacity by more than one");
+
+public:
+    using allocator_type = Alloc;
+    using value_type = typename Alloc::value_type;
+    using size_type = typename Alloc::size_type;
+    using difference_type = typename Alloc::difference_type;
+    using reference = typename Alloc::reference;
+    using const_reference = typename Alloc::const_reference;
+    using pointer = typename Alloc::pointer;
+    using const_pointer = typename Alloc::const_pointer;
+    using iterator = pointer;
+    using const_iterator = const_pointer;
+    using reverse_iterator = std::reverse_iterator<iterator>;
+    using const_reverse_iterator = std::reverse_iterator<const_iterator>;
+
+    static constexpr size_t static_capacity = StaticCapacity;
+    static constexpr intptr_t revert_to_static_size = RevertToStaticSize;
+
+    small_vector()
+        : small_vector(Alloc())
+    {}
+
+    small_vector(const Alloc& alloc)
+        : Alloc(alloc)
+        , m_capacity(StaticCapacity)
+        , m_dynamic_capacity(0)
+        , m_dynamic_data(nullptr)
+    {
+        m_begin = m_end = static_begin_ptr();
+    }
+
+    explicit small_vector(size_t count, const Alloc& alloc = Alloc())
+        : small_vector(alloc)
+    {
+        resize(count);
+    }
+
+    explicit small_vector(size_t count, const T& value, const Alloc& alloc = Alloc())
+        : small_vector(alloc)
+    {
+        assign_impl(count, value);
+    }
+
+    template <class InputIterator, typename = decltype(*std::declval<InputIterator>())>
+    small_vector(InputIterator first, InputIterator last, const Alloc& alloc = Alloc())
+        : small_vector(alloc)
+    {
+        assign_impl(first, last);
+    }
+
+    small_vector(std::initializer_list<T> l, const Alloc& alloc = Alloc())
+        : small_vector(alloc)
+    {
+        assign_impl(l);
+    }
+
+    small_vector(const small_vector& v)
+        : small_vector(v, std::allocator_traits<Alloc>::select_on_container_copy_construction(v.get_allocator()))
+    {}
+
+    small_vector(const small_vector& v, const Alloc& alloc)
+        : Alloc(alloc)
+        , m_dynamic_capacity(0)
+        , m_dynamic_data(nullptr)
+    {
+        if (v.size() > StaticCapacity)
+        {
+            m_dynamic_capacity = v.size();
+            m_begin = m_end = m_dynamic_data = get_alloc().allocate(m_dynamic_capacity);
+            m_capacity = v.size();
+        }
+        else
+        {
+            m_begin = m_end = static_begin_ptr();
+            m_capacity = StaticCapacity;
+        }
+
+        for (auto p = v.m_begin; p != v.m_end; ++p)
+        {
+            get_alloc().construct(m_end, *p);
+            ++m_end;
+        }
+    }
+
+    small_vector(small_vector&& v)
+        : Alloc(std::move(v.get_alloc()))
+        , m_capacity(v.m_capacity)
+        , m_dynamic_capacity(v.m_dynamic_capacity)
+        , m_dynamic_data(v.m_dynamic_data)
+    {
+        if (v.m_begin == v.static_begin_ptr())
+        {
+            m_begin = m_end = static_begin_ptr();
+            for (auto p = v.m_begin; p != v.m_end; ++p)
+            {
+                get_alloc().construct(m_end, std::move(*p));
+                ++m_end;
+            }
+
+            v.clear();
+        }
+        else
+        {
+            m_begin = v.m_begin;
+            m_end = v.m_end;
+        }
+
+        v.m_dynamic_capacity = 0;
+        v.m_dynamic_data = nullptr;
+        v.m_begin = v.m_end = v.static_begin_ptr();
+        v.m_capacity = StaticCapacity;
+    }
+
+    ~small_vector()
+    {
+        clear();
+
+        if (m_dynamic_data)
+        {
+            get_alloc().deallocate(m_dynamic_data, m_dynamic_capacity);
+        }
+    }
+
+    small_vector& operator=(const small_vector& v)
+    {
+        if (this == &v)
+        {
+            // prevent self usurp
+            return *this;
+        }
+
+        clear();
+
+        m_begin = m_end = choose_data(v.size());
+
+        for (auto p = v.m_begin; p != v.m_end; ++p)
+        {
+            get_alloc().construct(m_end, *p);
+            ++m_end;
+        }
+
+        update_capacity();
+
+        return *this;
+    }
+
+    small_vector& operator=(small_vector&& v)
+    {
+        clear();
+
+        get_alloc() = std::move(v.get_alloc());
+        m_capacity = v.m_capacity;
+        m_dynamic_capacity = v.m_dynamic_capacity;
+        m_dynamic_data = v.m_dynamic_data;
+
+        if (v.m_begin == v.static_begin_ptr())
+        {
+            m_begin = m_end = static_begin_ptr();
+            for (auto p = v.m_begin; p != v.m_end; ++p)
+            {
+                get_alloc().construct(m_end, std::move(*p));
+                ++m_end;
+            }
+
+            v.clear();
+        }
+        else
+        {
+            m_begin = v.m_begin;
+            m_end = v.m_end;
+        }
+
+        v.m_dynamic_capacity = 0;
+        v.m_dynamic_data = nullptr;
+        v.m_begin = v.m_end = v.static_begin_ptr();
+        v.m_capacity = StaticCapacity;
+
+        return *this;
+    }
+
+    void assign(size_type count, const T& value)
+    {
+        clear();
+        assign_impl(count, value);
+    }
+
+    template <class InputIterator, typename = decltype(*std::declval<InputIterator>())>
+    void assign(InputIterator first, InputIterator last)
+    {
+        clear();
+        assign_impl(first, last);
+    }
+
+    void assign(std::initializer_list<T> ilist)
+    {
+        clear();
+        assign_impl(ilist);
+    }
+
+    allocator_type get_allocator() const
+    {
+        return get_alloc();
+    }
+
+    const_reference at(size_type i) const
+    {
+        _CHOBO_SMALL_VECTOR_BOUNDS_CHECK(i);
+        return *(m_begin + i);
+    }
+
+    reference at(size_type i)
+    {
+        _CHOBO_SMALL_VECTOR_BOUNDS_CHECK(i);
+        return *(m_begin + i);
+    }
+
+    const_reference operator[](size_type i) const
+    {
+        return at(i);
+    }
+
+    reference operator[](size_type i)
+    {
+        return at(i);
+    }
+
+    const_reference front() const
+    {
+        return at(0);
+    }
+
+    reference front()
+    {
+        return at(0);
+    }
+
+    const_reference back() const
+    {
+        return *(m_end - 1);
+    }
+
+    reference back()
+    {
+        return *(m_end - 1);
+    }
+
+    const_pointer data() const noexcept
+    {
+        return m_begin;
+    }
+
+    pointer data() noexcept
+    {
+        return m_begin;
+    }
+
+    // iterators
+    iterator begin() noexcept
+    {
+        return m_begin;
+    }
+
+    const_iterator begin() const noexcept
+    {
+        return m_begin;
+    }
+
+    const_iterator cbegin() const noexcept
+    {
+        return m_begin;
+    }
+
+    iterator end() noexcept
+    {
+        return m_end;
+    }
+
+    const_iterator end() const noexcept
+    {
+        return m_end;
+    }
+
+    const_iterator cend() const noexcept
+    {
+        return m_end;
+    }
+
+    reverse_iterator rbegin() noexcept
+    {
+        return reverse_iterator(end());
+    }
+
+    const_reverse_iterator rbegin() const noexcept
+    {
+        return const_reverse_iterator(end());
+    }
+
+    const_reverse_iterator crbegin() const noexcept
+    {
+        return const_reverse_iterator(end());
+    }
+
+    reverse_iterator rend() noexcept
+    {
+        return reverse_iterator(begin());
+    }
+
+    const_reverse_iterator rend() const noexcept
+    {
+        return const_reverse_iterator(begin());
+    }
+
+    const_reverse_iterator crend() const noexcept
+    {
+        return const_reverse_iterator(begin());
+    }
+
+    // capacity
+    bool empty() const noexcept
+    {
+        return m_begin == m_end;
+    }
+
+    size_t size() const noexcept
+    {
+        return m_end - m_begin;
+    }
+
+    size_t max_size() const noexcept
+    {
+        return get_alloc().max_size();
+    }
+
+    void reserve(size_type new_cap)
+    {
+        if (new_cap <= m_capacity) return;
+
+        auto new_buf = choose_data(new_cap);
+
+        assert(new_buf != m_begin); // should've been handled by new_cap <= m_capacity
+        assert(new_buf != static_begin_ptr()); // we should never reserve into static memory
+
+        const auto s = size();
+        if(s < RevertToStaticSize)
+        {
+            // we've allocated enough memory for the dynamic buffer but don't move there until we have to
+            return;
+        }
+
+        // now we need to transfer the existing elements into the new buffer
+        for (size_type i = 0; i < s; ++i)
+        {
+            get_alloc().construct(new_buf + i, std::move(*(m_begin + i)));
+        }
+
+        // free old elements
+        for (size_type i = 0; i < s; ++i)
+        {
+            get_alloc().destroy(m_begin + i);
+        }
+
+        if (m_begin != static_begin_ptr())
+        {
+            // we've moved from dyn to dyn memory, so deallocate the old one
+            get_alloc().deallocate(m_begin, m_capacity);
+        }
+
+        m_begin = new_buf;
+        m_end = new_buf + s;
+        m_capacity = m_dynamic_capacity;
+    }
+
+    size_t capacity() const noexcept
+    {
+        return m_capacity;
+    }
+
+    void shrink_to_fit()
+    {
+        const auto s = size();
+
+        if (s == m_capacity) return;
+        if (m_begin == static_begin_ptr()) return;
+
+        auto old_end = m_end;
+
+        if (s < StaticCapacity)
+        {
+            // revert to static capacity
+            m_begin = m_end = static_begin_ptr();
+            m_capacity = StaticCapacity;
+        }
+        else
+        {
+            // alloc new smaller buffer
+            m_begin = m_end = get_alloc().allocate(s);
+            m_capacity = s;
+        }
+
+        for (auto p = m_dynamic_data; p != old_end; ++p)
+        {
+            get_alloc().construct(m_end, std::move(*p));
+            ++m_end;
+            get_alloc().destroy(p);
+        }
+
+        get_alloc().deallocate(m_dynamic_data, m_dynamic_capacity);
+        m_dynamic_data = nullptr;
+        m_dynamic_capacity = 0;
+    }
+
+    void revert_to_static()
+    {
+        const auto s = size();
+        if (m_begin == static_begin_ptr()) return; //we're already there
+        if (s > StaticCapacity) return; // nothing we can do
+
+        // revert to static capacity
+        auto old_end = m_end;
+        m_begin = m_end = static_begin_ptr();
+        m_capacity = StaticCapacity;
+        for (auto p = m_dynamic_data; p != old_end; ++p)
+        {
+            get_alloc().construct(m_end, std::move(*p));
+            ++m_end;
+            get_alloc().destroy(p);
+        }
+    }
+
+    // modifiers
+    void clear() noexcept
+    {
+        for (auto p = m_begin; p != m_end; ++p)
+        {
+            get_alloc().destroy(p);
+        }
+
+        if (RevertToStaticSize > 0)
+        {
+            m_begin = m_end = static_begin_ptr();
+            m_capacity = StaticCapacity;
+        }
+        else
+        {
+            m_end = m_begin;
+        }
+    }
+
+    iterator insert(const_iterator position, const value_type& val)
+    {
+        auto pos = grow_at(position, 1);
+        get_alloc().construct(pos, val);
+        return pos;
+    }
+
+    iterator insert(const_iterator position, value_type&& val)
+    {
+        auto pos = grow_at(position, 1);
+        get_alloc().construct(pos, std::move(val));
+        return pos;
+    }
+
+    iterator insert(const_iterator position, size_type count, const value_type& val)
+    {
+        auto pos = grow_at(position, count);
+        for (size_type i = 0; i < count; ++i)
+        {
+            get_alloc().construct(pos + i, val);
+        }
+        return pos;
+    }
+
+    template <typename InputIterator, typename = decltype(*std::declval<InputIterator>())>
+    iterator insert(const_iterator position, InputIterator first, InputIterator last)
+    {
+        auto pos = grow_at(position, last - first);
+        size_type i = 0;
+        auto np = pos;
+        for (auto p = first; p != last; ++p, ++np)
+        {
+            get_alloc().construct(np, *p);
+        }
+        return pos;
+    }
+
+    iterator insert(const_iterator position, std::initializer_list<T> ilist)
+    {
+        auto pos = grow_at(position, ilist.size());
+        size_type i = 0;
+        for (auto& elem : ilist)
+        {
+            get_alloc().construct(pos + i, elem);
+            ++i;
+        }
+        return pos;
+    }
+
+    template<typename... Args>
+    iterator emplace(const_iterator position, Args&&... args)
+    {
+        auto pos = grow_at(position, 1);
+        get_alloc().construct(pos, std::forward<Args>(args)...);
+        return pos;
+    }
+
+    iterator erase(const_iterator position)
+    {
+        return shrink_at(position, 1);
+    }
+
+    iterator erase(const_iterator first, const_iterator last)
+    {
+        _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(first > last);
+        return shrink_at(first, last - first);
+    }
+
+    void push_back(const_reference val)
+    {
+        auto pos = grow_at(m_end, 1);
+        get_alloc().construct(pos, val);
+    }
+
+    void push_back(T&& val)
+    {
+        auto pos = grow_at(m_end, 1);
+        get_alloc().construct(pos, std::move(val));
+    }
+
+    template<typename... Args>
+    reference emplace_back(Args&&... args)
+    {
+        auto pos = grow_at(m_end, 1);
+        get_alloc().construct(pos, std::forward<Args>(args)...);
+        return *pos;
+    }
+
+    void pop_back()
+    {
+        shrink_at(m_end - 1, 1);
+    }
+
+    void resize(size_type n, const value_type& v)
+    {
+        auto new_buf = choose_data(n);
+
+        if (new_buf == m_begin)
+        {
+            // no special transfers needed
+
+            auto new_end = m_begin + n;
+
+            while (m_end > new_end)
+            {
+                get_alloc().destroy(--m_end);
+            }
+
+            while (new_end > m_end)
+            {
+                get_alloc().construct(m_end++, v);
+            }
+        }
+        else
+        {
+            // we need to transfer the elements into the new buffer
+
+            const auto s = size();
+            const auto num_transfer = n < s ? n : s;
+
+            for (size_type i = 0; i < num_transfer; ++i)
+            {
+                get_alloc().construct(new_buf + i, std::move(*(m_begin + i)));
+            }
+
+            // free obsoletes
+            for (size_type i = 0; i < s; ++i)
+            {
+                get_alloc().destroy(m_begin + i);
+            }
+
+            // construct new elements
+            for (size_type i = num_transfer; i < n; ++i)
+            {
+                get_alloc().construct(new_buf + i, v);
+            }
+
+            if (m_begin != static_begin_ptr())
+            {
+                // we've moved from dyn to dyn memory, so deallocate the old one
+                get_alloc().deallocate(m_begin, m_capacity);
+            }
+
+            if (new_buf == static_begin_ptr())
+            {
+                m_capacity = StaticCapacity;
+            }
+            else
+            {
+                m_capacity = m_dynamic_capacity;
+            }
+
+            m_begin = new_buf;
+            m_end = new_buf + n;
+        }
+    }
+
+    void resize(size_type n)
+    {
+        auto new_buf = choose_data(n);
+
+        if (new_buf == m_begin)
+        {
+            // no special transfers needed
+
+            auto new_end = m_begin + n;
+
+            while (m_end > new_end)
+            {
+                get_alloc().destroy(--m_end);
+            }
+
+            while (new_end > m_end)
+            {
+                get_alloc().construct(m_end++);
+            }
+        }
+        else
+        {
+            // we need to transfer the elements into the new buffer
+
+            const auto s = size();
+            const auto num_transfer = n < s ? n : s;
+
+            for (size_type i = 0; i < num_transfer; ++i)
+            {
+                get_alloc().construct(new_buf + i, std::move(*(m_begin + i)));
+            }
+
+            // free obsoletes
+            for (size_type i = 0; i < n; ++i)
+            {
+                get_alloc().destroy(m_begin + i);
+            }
+
+            // construct new elements
+            for (size_type i = num_transfer; i < s; ++i)
+            {
+                get_alloc().construct(new_buf + i);
+            }
+
+            if (m_begin != static_begin_ptr())
+            {
+                // we've moved from dyn to dyn memory, so deallocate the old one
+                get_alloc().deallocate(m_begin, m_capacity);
+            }
+
+            if (new_buf == static_begin_ptr())
+            {
+                m_capacity = StaticCapacity;
+            }
+            else
+            {
+                m_capacity = m_dynamic_capacity;
+            }
+
+            m_begin = new_buf;
+            m_end = new_buf + n;
+        }
+    }
+
+private:
+    T* static_begin_ptr()
+    {
+        return reinterpret_cast<pointer>(m_static_data + 0);
+    }
+
+    // increase the size by splicing the elements in such a way that
+    // a hole of uninitialized elements is left at position, with size num
+    // returns the (potentially new) address of the hole
+    T* grow_at(const T* cp, size_t num)
+    {
+        auto position = const_cast<T*>(cp);
+
+        _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(position < m_begin || position > m_end);
+
+        const auto s = size();
+        auto new_buf = choose_data(s + num);
+
+        if (new_buf == m_begin)
+        {
+            // no special transfers needed
+
+            m_end = m_begin + s + num;
+
+            for (auto p = m_end - num - 1; p >= position; --p)
+            {
+                get_alloc().construct(p + num, std::move(*p));
+                get_alloc().destroy(p);
+            }
+
+            return position;
+        }
+        else
+        {
+            // we need to transfer the elements into the new buffer
+
+            position = new_buf + (position - m_begin);
+
+            auto p = m_begin;
+            auto np = new_buf;
+
+            for (; np != position; ++p, ++np)
+            {
+                get_alloc().construct(np, std::move(*p));
+            }
+
+            np += num;
+            for (; p != m_end; ++p, ++np)
+            {
+                get_alloc().construct(np, std::move(*p));
+            }
+
+            // destroy old
+            for (p = m_begin; p != m_end; ++p)
+            {
+                get_alloc().destroy(p);
+            }
+
+            if (m_begin != static_begin_ptr())
+            {
+                // we've moved from dyn to dyn memory, so deallocate the old one
+                get_alloc().deallocate(m_begin, m_capacity);
+            }
+
+            m_capacity = m_dynamic_capacity;
+
+            m_begin = new_buf;
+            m_end = new_buf + s + num;
+
+            return position;
+        }
+    }
+
+    T* shrink_at(const T* cp, size_t num)
+    {
+        auto position = const_cast<T*>(cp);
+
+        _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(position < m_begin || position > m_end || position + num > m_end);
+
+        const auto s = size();
+        if (s - num == 0)
+        {
+            clear();
+            return m_end;
+        }
+
+        auto new_buf = choose_data(s - num);
+
+        if (new_buf == m_begin)
+        {
+            // no special transfers needed
+
+            for (auto p = position, np = position + num; np != m_end; ++p, ++np)
+            {
+                get_alloc().destroy(p);
+                get_alloc().construct(p, std::move(*np));
+            }
+
+            for (auto p = m_end - num; p != m_end; ++p)
+            {
+                get_alloc().destroy(p);
+            }
+
+            m_end -= num;
+        }
+        else
+        {
+            // we need to transfer the elements into the new buffer
+
+            assert(new_buf == static_begin_ptr()); // since we're shrinking that's the only way to have a new buffer
+
+            m_capacity = StaticCapacity;
+
+            auto p = m_begin, np = new_buf;
+            for (; p != position; ++p, ++np)
+            {
+                get_alloc().construct(np, std::move(*p));
+                get_alloc().destroy(p);
+            }
+
+            for (; p != position + num; ++p)
+            {
+                get_alloc().destroy(p);
+            }
+
+            for (; np != new_buf + s - num; ++p, ++np)
+            {
+                get_alloc().construct(np, std::move(*p));
+                get_alloc().destroy(p);
+            }
+
+            position = new_buf + (position - m_begin);
+            m_begin = new_buf;
+            m_end = np;
+        }
+
+        return ++position;
+    }
+
+    void assign_impl(size_type count, const T& value)
+    {
+        assert(m_begin);
+        assert(m_begin == m_end);
+
+        m_begin = m_end = choose_data(count);
+        for (size_type i = 0; i < count; ++i)
+        {
+            get_alloc().construct(m_end, value);
+            ++m_end;
+        }
+
+        update_capacity();
+    }
+
+    template <class InputIterator>
+    void assign_impl(InputIterator first, InputIterator last)
+    {
+        assert(m_begin);
+        assert(m_begin == m_end);
+
+        m_begin = m_end = choose_data(last - first);
+        for (auto p = first; p != last; ++p)
+        {
+            get_alloc().construct(m_end, *p);
+            ++m_end;
+        }
+
+        update_capacity();
+    }
+
+    void assign_impl(std::initializer_list<T> ilist)
+    {
+        assert(m_begin);
+        assert(m_begin == m_end);
+
+        m_begin = m_end = choose_data(ilist.size());
+        for (auto& elem : ilist)
+        {
+            get_alloc().construct(m_end, elem);
+            ++m_end;
+        }
+
+        update_capacity();
+    }
+
+    void update_capacity()
+    {
+        if (m_begin == static_begin_ptr())
+        {
+            m_capacity = StaticCapacity;
+        }
+        else
+        {
+            m_capacity = m_dynamic_capacity;
+        }
+    }
+
+    T* choose_data(size_t desired_capacity)
+    {
+        if (m_begin == m_dynamic_data)
+        {
+            // we're at the dyn buffer, so see if it needs resize or revert to static
+
+            if (desired_capacity > m_dynamic_capacity)
+            {
+                while (m_dynamic_capacity < desired_capacity)
+                {
+                    // grow by roughly 1.5
+                    m_dynamic_capacity *= 3;
+                    ++m_dynamic_capacity;
+                    m_dynamic_capacity /= 2;
+                }
+
+                m_dynamic_data = get_alloc().allocate(m_dynamic_capacity);
+                return m_dynamic_data;
+            }
+            else if (desired_capacity < RevertToStaticSize)
+            {
+                // we're reverting to the static buffer
+                return static_begin_ptr();
+            }
+            else
+            {
+                // if the capacity and we don't revert to static, just do nothing
+                return m_dynamic_data;
+            }
+        }
+        else
+        {
+            assert(m_begin == static_begin_ptr()); // corrupt begin ptr?
+
+            if (desired_capacity > StaticCapacity)
+            {
+                // we must move to dyn memory
+
+                // see if we have enough
+                if (desired_capacity > m_dynamic_capacity)
+                {
+                    // we need to allocate more
+                    // we don't have anything to destroy, so we can also deallocate the buffer
+                    if (m_dynamic_data)
+                    {
+                        get_alloc().deallocate(m_dynamic_data, m_dynamic_capacity);
+                    }
+
+                    m_dynamic_capacity = desired_capacity;
+                    m_dynamic_data = get_alloc().allocate(m_dynamic_capacity);
+                }
+
+                return m_dynamic_data;
+            }
+            else
+            {
+                // we have enough capacity as it is
+                return static_begin_ptr();
+            }
+        }
+    }
+
+    allocator_type& get_alloc() { return static_cast<allocator_type&>(*this); }
+    const allocator_type& get_alloc() const { return static_cast<const allocator_type&>(*this); }
+
+    pointer m_begin;
+    pointer m_end;
+
+    size_t m_capacity;
+    typename std::aligned_storage<sizeof(T), std::alignment_of<T>::value>::type m_static_data[StaticCapacity];
+
+    size_t m_dynamic_capacity;
+    pointer m_dynamic_data;
+};
+
+template<typename T, size_t StaticCapacity, size_t RevertToStaticSize, class Alloc>
+bool operator==(const small_vector<T, StaticCapacity, RevertToStaticSize, Alloc>& a,
+    const small_vector<T, StaticCapacity, RevertToStaticSize, Alloc>& b)
+{
+    if (a.size() != b.size())
+    {
+        return false;
+    }
+
+    for (size_t i = 0; i < a.size(); ++i)
+    {
+        if (a[i] != b[i])
+            return false;
+    }
+
+    return true;
+}
+
+template<typename T, size_t StaticCapacity, size_t RevertToStaticSize, class Alloc>
+bool operator!=(const small_vector<T, StaticCapacity, RevertToStaticSize, Alloc>& a,
+    const small_vector<T, StaticCapacity, RevertToStaticSize, Alloc>& b)
+{
+    if (a.size() != b.size())
+    {
+        return true;
+    }
+
+    for (size_t i = 0; i < a.size(); ++i)
+    {
+        if (a[i] != b[i])
+            return true;
+    }
+
+    return false;
+}
+
+}
+
+
+#if defined(CHOBO_SMALL_VECTOR_TEST_WITH_DOCTEST)
+
+#include <string>
+#include <utility>
+
+namespace chobo_small_vector_test
+{
+
+size_t allocations = 0;
+size_t deallocations = 0;
+size_t allocated_bytes = 0;
+size_t deallocated_bytes = 0;
+size_t constructions = 0;
+size_t destructions = 0;
+
+template <typename T>
+class counting_allocator : public std::allocator<T>
+{
+public:
+    typedef std::allocator<T> super;
+
+    T* allocate(size_t n, std::allocator<void>::const_pointer hint = 0)
+    {
+        ++allocations;
+        allocated_bytes += n * sizeof(T);
+        return super::allocate(n, hint);
+    }
+
+    void deallocate(T* p, size_t n)
+    {
+        ++deallocations;
+        deallocated_bytes += n * sizeof(T);
+        return super::deallocate(p, n);
+    }
+
+    template< class U, class... Args >
+    void construct(U* p, Args&&... args)
+    {
+        ++constructions;
+        return super::construct(p, std::forward<Args>(args)...);
+    }
+
+    template< class U >
+    void destroy(U* p)
+    {
+        ++destructions;
+        return super::destroy(p);
+    }
+};
+}
+
+TEST_CASE("[small_vector] static")
+{
+    using namespace chobo;
+    using namespace chobo_small_vector_test;
+    using namespace std;
+
+    static_assert(sizeof(small_vector<void*, 10>) - sizeof(small_vector<void*, 3>) == sizeof(void*) * 7, "small_vector needs to have a static buffer");
+    {
+        small_vector<int, 10, 0, counting_allocator<int>> ivec;
+        CHECK(ivec.size() == 0);
+        CHECK(ivec.capacity() == 10);
+        CHECK(ivec.begin() == ivec.end());
+        CHECK(ivec.cbegin() == ivec.cend());
+        CHECK(ivec.empty());
+
+        auto d = ivec.data();
+        ivec.reserve(9);
+        CHECK(ivec.capacity() == 10);
+        CHECK(d == ivec.data());
+
+        ivec.resize(2, 8);
+        CHECK(ivec.size() == 2);
+        CHECK(ivec.front() == 8);
+        CHECK(ivec.back() == 8);
+        CHECK(d == ivec.data());
+
+        ivec.clear();
+        CHECK(ivec.size() == 0);
+        CHECK(ivec.capacity() == 10);
+        CHECK(ivec.begin() == ivec.end());
+        CHECK(ivec.cbegin() == ivec.cend());
+        CHECK(ivec.empty());
+        CHECK(d == ivec.data());
+
+        ivec.push_back(5);
+        CHECK(ivec.size() == 1);
+        CHECK(ivec[0] == 5);
+        auto it = ivec.begin();
+        CHECK(it == ivec.data());
+        CHECK(it == ivec.cbegin());
+        CHECK(*it == 5);
+        ++it;
+        CHECK(it == ivec.end());
+        CHECK(it == ivec.cend());
+
+        auto& back = ivec.emplace_back(3);
+        CHECK(ivec.size() == 2);
+        auto rit = ivec.rbegin();
+        CHECK(*rit == 3);
+        ++rit;
+        *rit = 12;
+        ++rit;
+        CHECK(rit == ivec.rend());
+        CHECK(rit == ivec.crend());
+        CHECK(ivec.front() == 12);
+        CHECK(ivec.back() == 3);
+        CHECK(back == 3);
+        CHECK(&back == &ivec.back());
+
+        ivec.insert(ivec.begin(), 53);
+        ivec.insert(ivec.begin() + 2, 90);
+        ivec.insert(ivec.begin() + 4, 17);
+        ivec.insert(ivec.end(), 6);
+        ivec.insert(ivec.begin(), { 1, 2 });
+
+        int ints[] = { 1, 2, 53, 12, 90, 3, 17, 6 };
+        CHECK(ivec.size() == 8);
+        CHECK(memcmp(ivec.data(), ints, sizeof(ints)) == 0);
+
+        ivec.shrink_to_fit();
+        CHECK(ivec.size() == 8);
+        CHECK(ivec.capacity() == 10);
+        CHECK(d == ivec.data());
+
+        ivec.revert_to_static();
+        CHECK(ivec.size() == 8);
+        CHECK(ivec.capacity() == 10);
+        CHECK(d == ivec.data());
+
+        ivec.pop_back();
+        CHECK(ivec.size() == 7);
+        CHECK(memcmp(ivec.data(), ints, sizeof(ints) - sizeof(int)) == 0);
+
+        ivec.resize(8);
+        CHECK(ivec.size() == 8);
+        ints[7] = 0;
+        CHECK(memcmp(ivec.data(), ints, sizeof(ints)) == 0);
+
+        const small_vector<int, 5, 0, counting_allocator<int>> ivec2 = { 1, 2, 3, 4 };
+        CHECK(ivec2.size() == 4);
+        CHECK(*ivec2.begin() == 1);
+        CHECK(ivec2[1] == 2);
+        CHECK(ivec2.at(2) == 3);
+        CHECK(*ivec2.rbegin() == 4);
+
+        ivec.erase(ivec.begin());
+        CHECK(ivec.size() == 7);
+        CHECK(ivec.front() == 2);
+        CHECK(memcmp(ivec.data(), ints + 1, ivec.size() * sizeof(int)) == 0);
+
+        ivec.erase(ivec.begin() + 2, ivec.begin() + 4);
+        CHECK(ivec.size() == 5);
+        CHECK(ivec[3] == 17);
+
+        small_vector<string, 11, 0, counting_allocator<string>> svec;
+        svec.assign({ "as", "df" });
+        CHECK(svec.size() == 2);
+        string s1 = "the quick brown fox jumped over the lazy dog 1234567890";
+        auto& rs = svec.emplace_back(s1);
+        CHECK(svec.back() == s1);
+        CHECK(rs == s1);
+        CHECK(&rs == &svec.back());
+
+        auto svec1 = svec;
+        CHECK(svec1 == svec);
+
+        const void* cstr = svec.back().c_str();
+        auto svec2 = std::move(svec);
+        CHECK(svec2.size() == 3);
+        CHECK(svec2.back() == s1);
+
+        CHECK(svec.empty());
+        CHECK(svec2.back().c_str() == cstr);
+
+        svec = std::move(svec2);
+        CHECK(svec2.empty());
+        CHECK(svec.back().c_str() == cstr);
+
+        svec2 = svec;
+        CHECK(svec2.back() == s1);
+        CHECK(svec.back() == s1);
+        CHECK(svec == svec2);
+
+        svec.insert(svec.begin(), s1);
+        CHECK(svec.size() == 4);
+        CHECK(svec.back().c_str() == cstr);
+        CHECK(svec.front() == svec.back());
+
+        cstr = s1.c_str();
+        svec.emplace(svec.begin() + 2, std::move(s1));
+        CHECK(svec.size() == 5);
+        CHECK(svec.front() == svec[2]);
+        CHECK(svec[2].c_str() == cstr);
+
+        svec.clear();
+        CHECK(svec.empty());
+        svec2.clear();
+        CHECK(svec2.empty());
+        CHECK(svec == svec2);
+
+        svec.resize(svec.capacity());
+        CHECK(svec.size() == svec.capacity());
+
+        for (auto& s : svec)
+        {
+            CHECK(s.empty());
+        }
+
+        s1 = "asdf";
+        small_vector<char, 10, 10, counting_allocator<char>> cvec(s1.begin(), s1.end());
+        CHECK(cvec.size() == 4);
+        CHECK(cvec.front() == 'a');
+        CHECK(cvec.back() == 'f');
+
+        cvec.clear();
+        CHECK(cvec.size() == 0);
+        CHECK(cvec.empty());
+
+        s1 = "baz";
+        cvec.assign(s1.begin(), s1.end());
+        CHECK(cvec.size() == 3);
+        CHECK(cvec.front() == 'b');
+        CHECK(cvec.back() == 'z');
+
+        // 0 is implicitly castable to nullptr_t which can be an iterator in our case
+        small_vector<int, 4, 4> nullptr_test(2, 0);
+        CHECK(nullptr_test.size() == 2);
+        CHECK(nullptr_test.front() == 0);
+        CHECK(nullptr_test.back() == 0);
+
+        nullptr_test.assign(3, 0);
+        CHECK(nullptr_test.size() == 3);
+        CHECK(nullptr_test.front() == 0);
+        CHECK(nullptr_test.back() == 0);
+
+        nullptr_test.insert(nullptr_test.begin(), 1, 0);
+        CHECK(nullptr_test.size() == 4);
+        CHECK(nullptr_test.front() == 0);
+    }
+
+    CHECK(allocations == 0);
+    CHECK(deallocations == 0);
+    CHECK(allocated_bytes == 0);
+    CHECK(deallocated_bytes == 0);
+    CHECK(constructions == destructions);
+
+    constructions = destructions = 0;
+}
+
+
+TEST_CASE("[small_vector] dynamic")
+{
+    using namespace chobo;
+    using namespace chobo_small_vector_test;
+    using namespace std;
+    {
+        small_vector<int, 1, 0, counting_allocator<int>> ivec;
+        CHECK(ivec.size() == 0);
+        CHECK(ivec.capacity() == 1);
+        CHECK(ivec.begin() == ivec.end());
+        CHECK(ivec.cbegin() == ivec.cend());
+        CHECK(ivec.empty());
+
+        auto d = ivec.data();
+        ivec.reserve(2);
+        CHECK(ivec.capacity() == 2);
+        CHECK(d != ivec.data());
+        CHECK(allocations == 1);
+
+        ivec.resize(3, 8);
+        CHECK(ivec.capacity() == 3);
+        CHECK(ivec.size() == 3);
+        CHECK(ivec.front() == 8);
+        CHECK(ivec.back() == 8);
+        CHECK(d != ivec.data());
+        CHECK(allocations == 2);
+
+        ivec.clear();
+        CHECK(ivec.size() == 0);
+        CHECK(ivec.capacity() == 3);
+        CHECK(d != ivec.data());
+        CHECK(ivec.begin() == ivec.end());
+        CHECK(ivec.cbegin() == ivec.cend());
+        CHECK(ivec.empty());
+
+        ivec.push_back(5);
+        CHECK(ivec.size() == 1);
+        CHECK(ivec[0] == 5);
+        auto it = ivec.begin();
+        CHECK(it == ivec.data());
+        CHECK(it == ivec.cbegin());
+        CHECK(*it == 5);
+        ++it;
+        CHECK(it == ivec.end());
+        CHECK(it == ivec.cend());
+
+        auto& back = ivec.emplace_back(3);
+        CHECK(ivec.size() == 2);
+        auto rit = ivec.rbegin();
+        CHECK(*rit == 3);
+        ++rit;
+        *rit = 12;
+        ++rit;
+        CHECK(rit == ivec.rend());
+        CHECK(rit == ivec.crend());
+        CHECK(ivec.front() == 12);
+        CHECK(ivec.back() == 3);
+        CHECK(back == 3);
+        CHECK(&back == &ivec.back());
+
+        ivec.insert(ivec.begin(), 53);
+        CHECK(ivec.capacity() == 3);
+
+        ivec.insert(ivec.begin() + 2, 90);
+        ivec.insert(ivec.begin() + 4, 17);
+        ivec.insert(ivec.end(), 6);
+        ivec.insert(ivec.begin(), { 1, 2 });
+
+        int ints[] = { 1, 2, 53, 12, 90, 3, 17, 6 };
+        CHECK(ivec.capacity() >= 8);
+        CHECK(ivec.size() == 8);
+        CHECK(memcmp(ivec.data(), ints, sizeof(ints)) == 0);
+
+        ivec.pop_back();
+        CHECK(ivec.size() == 7);
+        CHECK(memcmp(ivec.data(), ints, sizeof(ints) - sizeof(int)) == 0);
+
+        ivec.resize(8);
+        CHECK(ivec.size() == 8);
+        ints[7] = 0;
+        CHECK(memcmp(ivec.data(), ints, sizeof(ints)) == 0);
+
+        const small_vector<int, 1, 0, counting_allocator<int>> ivec2 = { 1, 2, 3, 4 };
+        CHECK(ivec2.size() == 4);
+        CHECK(*ivec2.begin() == 1);
+        CHECK(ivec2[1] == 2);
+        CHECK(ivec2.at(2) == 3);
+        CHECK(*ivec2.rbegin() == 4);
+
+        ivec.erase(ivec.begin());
+        CHECK(ivec.size() == 7);
+        CHECK(ivec.front() == 2);
+        CHECK(memcmp(ivec.data(), ints + 1, ivec.size() * sizeof(int)) == 0);
+
+        ivec.erase(ivec.begin() + 2, ivec.begin() + 4);
+        CHECK(ivec.size() == 5);
+        CHECK(ivec[3] == 17);
+
+        small_vector<string, 1, 0, counting_allocator<string>> svec;
+        svec.assign({ "as", "df" });
+        CHECK(svec.size() == 2);
+        string s1 = "the quick brown fox jumped over the lazy dog 1234567890";
+        auto& rs = svec.emplace_back(s1);
+        CHECK(svec.back() == s1);
+        CHECK(rs == s1);
+        CHECK(&rs == &svec.back());
+
+        auto svec1 = svec;
+        CHECK(svec1 == svec);
+
+        const void* cstr = svec.back().c_str();
+        auto svec2 = std::move(svec);
+        CHECK(svec2.size() == 3);
+        CHECK(svec2.back() == s1);
+
+        CHECK(svec.empty());
+        CHECK(svec2.back().c_str() == cstr);
+
+        svec = std::move(svec2);
+        CHECK(svec2.empty());
+        CHECK(svec.back().c_str() == cstr);
+
+        svec2 = svec;
+        CHECK(svec2.back() == s1);
+        CHECK(svec.back() == s1);
+        CHECK(svec == svec2);
+
+        svec.insert(svec.begin(), s1);
+        CHECK(svec.size() == 4);
+        CHECK(svec.back().c_str() == cstr);
+        CHECK(svec.front() == svec.back());
+
+        cstr = s1.c_str();
+        svec.emplace(svec.begin() + 2, std::move(s1));
+        CHECK(svec.size() == 5);
+        CHECK(svec.front() == svec[2]);
+        CHECK(svec[2].c_str() == cstr);
+
+        svec.clear();
+        CHECK(svec.empty());
+        svec2.clear();
+        CHECK(svec2.empty());
+        CHECK(svec == svec2);
+
+        svec.resize(svec.capacity());
+        CHECK(svec.size() == svec.capacity());
+
+        for (auto& s : svec)
+        {
+            CHECK(s.empty());
+        }
+
+        s1 = "asdf";
+        small_vector<char, 1, 0, counting_allocator<char>> cvec(s1.begin(), s1.end());
+        CHECK(cvec.size() == 4);
+        CHECK(cvec.front() == 'a');
+        CHECK(cvec.back() == 'f');
+
+        cvec.clear();
+        CHECK(cvec.size() == 0);
+        CHECK(cvec.empty());
+
+        s1 = "baz";
+        cvec.assign(s1.begin(), s1.end());
+        CHECK(cvec.size() == 3);
+        CHECK(cvec.front() == 'b');
+        CHECK(cvec.back() == 'z');
+    }
+
+    CHECK(allocations == deallocations);
+    CHECK(allocated_bytes == deallocated_bytes);
+    CHECK(constructions == destructions);
+
+    allocations = deallocations = allocated_bytes = deallocated_bytes = constructions = destructions = 0;
+}
+
+TEST_CASE("[small_vector] static-dynamic")
+{
+    using namespace chobo;
+    using namespace chobo_small_vector_test;
+    using namespace std;
+
+    {
+        small_vector<int, 5, 3, counting_allocator<int>> ivec;
+        auto d = ivec.data();
+        ivec.reserve(20);
+        CHECK(ivec.data() == d);
+
+        ivec.push_back(1);
+        ivec.push_back(2);
+        ivec.push_back(3);
+
+        CHECK(ivec.data() == d);
+
+        ivec.insert(ivec.end(), 3u, 8);
+
+        CHECK(ivec.size() == 6);
+        CHECK(ivec.capacity() == 20);
+
+        auto dd = ivec.data();
+
+        ivec.erase(ivec.begin(), ivec.begin() + 6);
+        CHECK(ivec.data() == d);
+        CHECK(ivec.empty());
+
+        ivec.resize(19, 11);
+        CHECK(ivec.size() == 19);
+        CHECK(ivec.capacity() == 20);
+        CHECK(ivec.data() == dd);
+
+        ivec.resize(4);
+        CHECK(ivec.size() == 4);
+        CHECK(ivec.capacity() == 20);
+        CHECK(ivec.data() == dd);
+
+        ivec.revert_to_static();
+        CHECK(ivec.size() == 4);
+        CHECK(ivec.capacity() == 5);
+        CHECK(ivec.data() == d);
+
+        ivec.reserve(10);
+        CHECK(ivec.size() == 4);
+        CHECK(ivec.capacity() == 20);
+        CHECK(ivec.data() == dd);
+
+        ivec.shrink_to_fit();
+        CHECK(ivec.size() == 4);
+        CHECK(ivec.capacity() == 5);
+        CHECK(ivec.data() == d);
+
+        ivec.reserve(10);
+        CHECK(ivec.size() == 4);
+        CHECK(ivec.capacity() == 10);
+        CHECK(ivec.data() != d);
+
+        dd = ivec.data();
+        ivec.insert(ivec.begin() + 3, 5u, 88);
+        CHECK(ivec.size() == 9);
+        CHECK(ivec.capacity() == 10);
+        CHECK(ivec.data() == dd);
+        CHECK(ivec[2] == 11);
+        CHECK(ivec[7] == 88);
+        CHECK(ivec[8] == 11);
+
+        small_vector<int, 3, 4, counting_allocator<int>> ivec2(ivec.begin(), ivec.end());
+        CHECK(ivec2.size() == 9);
+        CHECK(ivec2.size() == 9);
+        CHECK(ivec2.capacity() == 9);
+        CHECK(ivec2[2] == 11);
+        CHECK(ivec2[7] == 88);
+        CHECK(ivec2[8] == 11);
+
+        ivec.erase(ivec.begin() + 1, ivec.end() - 2);
+        CHECK(ivec.size() == 3);
+        ivec.erase(ivec.end() - 1);
+        CHECK(ivec.size() == 2);
+        CHECK(ivec.capacity() == 5);
+        CHECK(ivec.data() == d);
+
+        ivec2.erase(ivec2.begin() + 1, ivec2.end() - 2);
+        CHECK(ivec2.size() == 3);
+        CHECK(ivec2.capacity() == 3);
+    }
+
+    CHECK(allocations == deallocations);
+    CHECK(allocated_bytes == deallocated_bytes);
+    CHECK(constructions == destructions);
+
+    allocations = deallocations = allocated_bytes = deallocated_bytes = constructions = destructions = 0;
+}
+
+#if !defined(__EMSCRIPTEN__) || !defined(NDEBUG) // emscripten allows exceptions with -O0
+TEST_CASE("[small_vector] out of range")
+{
+    using namespace chobo;
+    small_vector<int, 5> ivec;
+    ivec.resize(4);
+    CHECK(ivec.capacity() == 5);
+
+    CHECK_THROWS_AS(ivec.insert(ivec.begin() - 1, 1), std::out_of_range);
+    CHECK(ivec.size() == 4);
+    CHECK_THROWS_AS(ivec.insert(ivec.end() + 1, 1), std::out_of_range);
+    CHECK(ivec.size() == 4);
+    CHECK_THROWS_AS(ivec.erase(ivec.begin() - 1), std::out_of_range);
+    CHECK(ivec.size() == 4);
+    CHECK_THROWS_AS(ivec.erase(ivec.end() + 1), std::out_of_range);
+    CHECK(ivec.size() == 4);
+    CHECK_THROWS_AS(ivec.erase(ivec.begin() - 1, ivec.begin() + 1), std::out_of_range);
+    CHECK(ivec.size() == 4);
+    CHECK_THROWS_AS(ivec.erase(ivec.begin() + 2, ivec.end() + 1), std::out_of_range);
+    CHECK(ivec.size() == 4);
+    CHECK_THROWS_AS(ivec.erase(ivec.end() + 1, ivec.end() + 3), std::out_of_range);
+    CHECK(ivec.size() == 4);
+    CHECK_THROWS_AS(ivec.erase(ivec.end() - 1, ivec.begin() + 1), std::out_of_range);
+    CHECK(ivec.size() == 4);
+
+}
+#endif
+
+
+#endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/chrono.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/chrono.h
new file mode 100644
index 000000000..c965cf781
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/chrono.h
@@ -0,0 +1,829 @@
+// Formatting library for C++ - chrono support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_CHRONO_H_
+#define FMT_CHRONO_H_
+
+#include "format.h"
+#include "locale.h"
+
+#include <chrono>
+#include <ctime>
+#include <locale>
+#include <sstream>
+
+// enable safe chrono durations, unless explicitly disabled
+#ifndef FMT_SAFE_DURATION_CAST
+#  define FMT_SAFE_DURATION_CAST 1
+#endif
+
+#if FMT_SAFE_DURATION_CAST
+#  include "safe-duration-cast.h"
+#endif
+
+FMT_BEGIN_NAMESPACE
+
+// Prevents expansion of a preceding token as a function-style macro.
+// Usage: f FMT_NOMACRO()
+#define FMT_NOMACRO
+
+namespace internal {
+inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); }
+inline null<> localtime_s(...) { return null<>(); }
+inline null<> gmtime_r(...) { return null<>(); }
+inline null<> gmtime_s(...) { return null<>(); }
+}  // namespace internal
+
+// Thread-safe replacement for std::localtime
+inline std::tm localtime(std::time_t time) {
+  struct dispatcher {
+    std::time_t time_;
+    std::tm tm_;
+
+    dispatcher(std::time_t t) : time_(t) {}
+
+    bool run() {
+      using namespace fmt::internal;
+      return handle(localtime_r(&time_, &tm_));
+    }
+
+    bool handle(std::tm* tm) { return tm != nullptr; }
+
+    bool handle(internal::null<>) {
+      using namespace fmt::internal;
+      return fallback(localtime_s(&tm_, &time_));
+    }
+
+    bool fallback(int res) { return res == 0; }
+
+#if !FMT_MSC_VER
+    bool fallback(internal::null<>) {
+      using namespace fmt::internal;
+      std::tm* tm = std::localtime(&time_);
+      if (tm) tm_ = *tm;
+      return tm != nullptr;
+    }
+#endif
+  };
+  dispatcher lt(time);
+  // Too big time values may be unsupported.
+  if (!lt.run()) FMT_THROW(format_error("time_t value out of range"));
+  return lt.tm_;
+}
+
+// Thread-safe replacement for std::gmtime
+inline std::tm gmtime(std::time_t time) {
+  struct dispatcher {
+    std::time_t time_;
+    std::tm tm_;
+
+    dispatcher(std::time_t t) : time_(t) {}
+
+    bool run() {
+      using namespace fmt::internal;
+      return handle(gmtime_r(&time_, &tm_));
+    }
+
+    bool handle(std::tm* tm) { return tm != nullptr; }
+
+    bool handle(internal::null<>) {
+      using namespace fmt::internal;
+      return fallback(gmtime_s(&tm_, &time_));
+    }
+
+    bool fallback(int res) { return res == 0; }
+
+#if !FMT_MSC_VER
+    bool fallback(internal::null<>) {
+      std::tm* tm = std::gmtime(&time_);
+      if (tm) tm_ = *tm;
+      return tm != nullptr;
+    }
+#endif
+  };
+  dispatcher gt(time);
+  // Too big time values may be unsupported.
+  if (!gt.run()) FMT_THROW(format_error("time_t value out of range"));
+  return gt.tm_;
+}
+
+namespace internal {
+inline std::size_t strftime(char* str, std::size_t count, const char* format,
+                            const std::tm* time) {
+  return std::strftime(str, count, format, time);
+}
+
+inline std::size_t strftime(wchar_t* str, std::size_t count,
+                            const wchar_t* format, const std::tm* time) {
+  return std::wcsftime(str, count, format, time);
+}
+}  // namespace internal
+
+template <typename Char> struct formatter<std::tm, Char> {
+  template <typename ParseContext>
+  auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    auto it = ctx.begin();
+    if (it != ctx.end() && *it == ':') ++it;
+    auto end = it;
+    while (end != ctx.end() && *end != '}') ++end;
+    tm_format.reserve(internal::to_unsigned(end - it + 1));
+    tm_format.append(it, end);
+    tm_format.push_back('\0');
+    return end;
+  }
+
+  template <typename FormatContext>
+  auto format(const std::tm& tm, FormatContext& ctx) -> decltype(ctx.out()) {
+    basic_memory_buffer<Char> buf;
+    std::size_t start = buf.size();
+    for (;;) {
+      std::size_t size = buf.capacity() - start;
+      std::size_t count =
+          internal::strftime(&buf[start], size, &tm_format[0], &tm);
+      if (count != 0) {
+        buf.resize(start + count);
+        break;
+      }
+      if (size >= tm_format.size() * 256) {
+        // If the buffer is 256 times larger than the format string, assume
+        // that `strftime` gives an empty result. There doesn't seem to be a
+        // better way to distinguish the two cases:
+        // https://github.com/fmtlib/fmt/issues/367
+        break;
+      }
+      const std::size_t MIN_GROWTH = 10;
+      buf.reserve(buf.capacity() + (size > MIN_GROWTH ? size : MIN_GROWTH));
+    }
+    return std::copy(buf.begin(), buf.end(), ctx.out());
+  }
+
+  basic_memory_buffer<Char> tm_format;
+};
+
+namespace internal {
+template <typename Period> FMT_CONSTEXPR const char* get_units() {
+  return nullptr;
+}
+template <> FMT_CONSTEXPR const char* get_units<std::atto>() { return "as"; }
+template <> FMT_CONSTEXPR const char* get_units<std::femto>() { return "fs"; }
+template <> FMT_CONSTEXPR const char* get_units<std::pico>() { return "ps"; }
+template <> FMT_CONSTEXPR const char* get_units<std::nano>() { return "ns"; }
+template <> FMT_CONSTEXPR const char* get_units<std::micro>() { return "µs"; }
+template <> FMT_CONSTEXPR const char* get_units<std::milli>() { return "ms"; }
+template <> FMT_CONSTEXPR const char* get_units<std::centi>() { return "cs"; }
+template <> FMT_CONSTEXPR const char* get_units<std::deci>() { return "ds"; }
+template <> FMT_CONSTEXPR const char* get_units<std::ratio<1>>() { return "s"; }
+template <> FMT_CONSTEXPR const char* get_units<std::deca>() { return "das"; }
+template <> FMT_CONSTEXPR const char* get_units<std::hecto>() { return "hs"; }
+template <> FMT_CONSTEXPR const char* get_units<std::kilo>() { return "ks"; }
+template <> FMT_CONSTEXPR const char* get_units<std::mega>() { return "Ms"; }
+template <> FMT_CONSTEXPR const char* get_units<std::giga>() { return "Gs"; }
+template <> FMT_CONSTEXPR const char* get_units<std::tera>() { return "Ts"; }
+template <> FMT_CONSTEXPR const char* get_units<std::peta>() { return "Ps"; }
+template <> FMT_CONSTEXPR const char* get_units<std::exa>() { return "Es"; }
+template <> FMT_CONSTEXPR const char* get_units<std::ratio<60>>() {
+  return "m";
+}
+template <> FMT_CONSTEXPR const char* get_units<std::ratio<3600>>() {
+  return "h";
+}
+
+enum class numeric_system {
+  standard,
+  // Alternative numeric system, e.g. 十二 instead of 12 in ja_JP locale.
+  alternative
+};
+
+// Parses a put_time-like format string and invokes handler actions.
+template <typename Char, typename Handler>
+FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin,
+                                              const Char* end,
+                                              Handler&& handler) {
+  auto ptr = begin;
+  while (ptr != end) {
+    auto c = *ptr;
+    if (c == '}') break;
+    if (c != '%') {
+      ++ptr;
+      continue;
+    }
+    if (begin != ptr) handler.on_text(begin, ptr);
+    ++ptr;  // consume '%'
+    if (ptr == end) FMT_THROW(format_error("invalid format"));
+    c = *ptr++;
+    switch (c) {
+    case '%':
+      handler.on_text(ptr - 1, ptr);
+      break;
+    case 'n': {
+      const char newline[] = "\n";
+      handler.on_text(newline, newline + 1);
+      break;
+    }
+    case 't': {
+      const char tab[] = "\t";
+      handler.on_text(tab, tab + 1);
+      break;
+    }
+    // Day of the week:
+    case 'a':
+      handler.on_abbr_weekday();
+      break;
+    case 'A':
+      handler.on_full_weekday();
+      break;
+    case 'w':
+      handler.on_dec0_weekday(numeric_system::standard);
+      break;
+    case 'u':
+      handler.on_dec1_weekday(numeric_system::standard);
+      break;
+    // Month:
+    case 'b':
+      handler.on_abbr_month();
+      break;
+    case 'B':
+      handler.on_full_month();
+      break;
+    // Hour, minute, second:
+    case 'H':
+      handler.on_24_hour(numeric_system::standard);
+      break;
+    case 'I':
+      handler.on_12_hour(numeric_system::standard);
+      break;
+    case 'M':
+      handler.on_minute(numeric_system::standard);
+      break;
+    case 'S':
+      handler.on_second(numeric_system::standard);
+      break;
+    // Other:
+    case 'c':
+      handler.on_datetime(numeric_system::standard);
+      break;
+    case 'x':
+      handler.on_loc_date(numeric_system::standard);
+      break;
+    case 'X':
+      handler.on_loc_time(numeric_system::standard);
+      break;
+    case 'D':
+      handler.on_us_date();
+      break;
+    case 'F':
+      handler.on_iso_date();
+      break;
+    case 'r':
+      handler.on_12_hour_time();
+      break;
+    case 'R':
+      handler.on_24_hour_time();
+      break;
+    case 'T':
+      handler.on_iso_time();
+      break;
+    case 'p':
+      handler.on_am_pm();
+      break;
+    case 'Q':
+      handler.on_duration_value();
+      break;
+    case 'q':
+      handler.on_duration_unit();
+      break;
+    case 'z':
+      handler.on_utc_offset();
+      break;
+    case 'Z':
+      handler.on_tz_name();
+      break;
+    // Alternative representation:
+    case 'E': {
+      if (ptr == end) FMT_THROW(format_error("invalid format"));
+      c = *ptr++;
+      switch (c) {
+      case 'c':
+        handler.on_datetime(numeric_system::alternative);
+        break;
+      case 'x':
+        handler.on_loc_date(numeric_system::alternative);
+        break;
+      case 'X':
+        handler.on_loc_time(numeric_system::alternative);
+        break;
+      default:
+        FMT_THROW(format_error("invalid format"));
+      }
+      break;
+    }
+    case 'O':
+      if (ptr == end) FMT_THROW(format_error("invalid format"));
+      c = *ptr++;
+      switch (c) {
+      case 'w':
+        handler.on_dec0_weekday(numeric_system::alternative);
+        break;
+      case 'u':
+        handler.on_dec1_weekday(numeric_system::alternative);
+        break;
+      case 'H':
+        handler.on_24_hour(numeric_system::alternative);
+        break;
+      case 'I':
+        handler.on_12_hour(numeric_system::alternative);
+        break;
+      case 'M':
+        handler.on_minute(numeric_system::alternative);
+        break;
+      case 'S':
+        handler.on_second(numeric_system::alternative);
+        break;
+      default:
+        FMT_THROW(format_error("invalid format"));
+      }
+      break;
+    default:
+      FMT_THROW(format_error("invalid format"));
+    }
+    begin = ptr;
+  }
+  if (begin != ptr) handler.on_text(begin, ptr);
+  return ptr;
+}
+
+struct chrono_format_checker {
+  FMT_NORETURN void report_no_date() { FMT_THROW(format_error("no date")); }
+
+  template <typename Char> void on_text(const Char*, const Char*) {}
+  FMT_NORETURN void on_abbr_weekday() { report_no_date(); }
+  FMT_NORETURN void on_full_weekday() { report_no_date(); }
+  FMT_NORETURN void on_dec0_weekday(numeric_system) { report_no_date(); }
+  FMT_NORETURN void on_dec1_weekday(numeric_system) { report_no_date(); }
+  FMT_NORETURN void on_abbr_month() { report_no_date(); }
+  FMT_NORETURN void on_full_month() { report_no_date(); }
+  void on_24_hour(numeric_system) {}
+  void on_12_hour(numeric_system) {}
+  void on_minute(numeric_system) {}
+  void on_second(numeric_system) {}
+  FMT_NORETURN void on_datetime(numeric_system) { report_no_date(); }
+  FMT_NORETURN void on_loc_date(numeric_system) { report_no_date(); }
+  FMT_NORETURN void on_loc_time(numeric_system) { report_no_date(); }
+  FMT_NORETURN void on_us_date() { report_no_date(); }
+  FMT_NORETURN void on_iso_date() { report_no_date(); }
+  void on_12_hour_time() {}
+  void on_24_hour_time() {}
+  void on_iso_time() {}
+  void on_am_pm() {}
+  void on_duration_value() {}
+  void on_duration_unit() {}
+  FMT_NORETURN void on_utc_offset() { report_no_date(); }
+  FMT_NORETURN void on_tz_name() { report_no_date(); }
+};
+
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline bool isnan(T) {
+  return false;
+}
+template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+inline bool isnan(T value) {
+  return std::isnan(value);
+}
+
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline bool isfinite(T) {
+  return true;
+}
+template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+inline bool isfinite(T value) {
+  return std::isfinite(value);
+}
+
+// Converts value to int and checks that it's in the range [0, upper).
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline int to_nonnegative_int(T value, int upper) {
+  FMT_ASSERT(value >= 0 && value <= upper, "invalid value");
+  (void)upper;
+  return static_cast<int>(value);
+}
+template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+inline int to_nonnegative_int(T value, int upper) {
+  FMT_ASSERT(
+      std::isnan(value) || (value >= 0 && value <= static_cast<T>(upper)),
+      "invalid value");
+  (void)upper;
+  return static_cast<int>(value);
+}
+
+template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+inline T mod(T x, int y) {
+  return x % y;
+}
+template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+inline T mod(T x, int y) {
+  return std::fmod(x, static_cast<T>(y));
+}
+
+// If T is an integral type, maps T to its unsigned counterpart, otherwise
+// leaves it unchanged (unlike std::make_unsigned).
+template <typename T, bool INTEGRAL = std::is_integral<T>::value>
+struct make_unsigned_or_unchanged {
+  using type = T;
+};
+
+template <typename T> struct make_unsigned_or_unchanged<T, true> {
+  using type = typename std::make_unsigned<T>::type;
+};
+
+#if FMT_SAFE_DURATION_CAST
+// throwing version of safe_duration_cast
+template <typename To, typename FromRep, typename FromPeriod>
+To fmt_safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from) {
+  int ec;
+  To to = safe_duration_cast::safe_duration_cast<To>(from, ec);
+  if (ec) FMT_THROW(format_error("cannot format duration"));
+  return to;
+}
+#endif
+
+template <typename Rep, typename Period,
+          FMT_ENABLE_IF(std::is_integral<Rep>::value)>
+inline std::chrono::duration<Rep, std::milli> get_milliseconds(
+    std::chrono::duration<Rep, Period> d) {
+  // this may overflow and/or the result may not fit in the
+  // target type.
+#if FMT_SAFE_DURATION_CAST
+  using CommonSecondsType =
+      typename std::common_type<decltype(d), std::chrono::seconds>::type;
+  const auto d_as_common = fmt_safe_duration_cast<CommonSecondsType>(d);
+  const auto d_as_whole_seconds =
+      fmt_safe_duration_cast<std::chrono::seconds>(d_as_common);
+  // this conversion should be nonproblematic
+  const auto diff = d_as_common - d_as_whole_seconds;
+  const auto ms =
+      fmt_safe_duration_cast<std::chrono::duration<Rep, std::milli>>(diff);
+  return ms;
+#else
+  auto s = std::chrono::duration_cast<std::chrono::seconds>(d);
+  return std::chrono::duration_cast<std::chrono::milliseconds>(d - s);
+#endif
+}
+
+template <typename Rep, typename Period,
+          FMT_ENABLE_IF(std::is_floating_point<Rep>::value)>
+inline std::chrono::duration<Rep, std::milli> get_milliseconds(
+    std::chrono::duration<Rep, Period> d) {
+  using common_type = typename std::common_type<Rep, std::intmax_t>::type;
+  auto ms = mod(d.count() * static_cast<common_type>(Period::num) /
+                    static_cast<common_type>(Period::den) * 1000,
+                1000);
+  return std::chrono::duration<Rep, std::milli>(static_cast<Rep>(ms));
+}
+
+template <typename Rep, typename OutputIt>
+OutputIt format_chrono_duration_value(OutputIt out, Rep val, int precision) {
+  if (precision >= 0) return format_to(out, "{:.{}f}", val, precision);
+  return format_to(out, std::is_floating_point<Rep>::value ? "{:g}" : "{}",
+                   val);
+}
+
+template <typename Period, typename OutputIt>
+static OutputIt format_chrono_duration_unit(OutputIt out) {
+  if (const char* unit = get_units<Period>()) return format_to(out, "{}", unit);
+  if (Period::den == 1) return format_to(out, "[{}]s", Period::num);
+  return format_to(out, "[{}/{}]s", Period::num, Period::den);
+}
+
+template <typename FormatContext, typename OutputIt, typename Rep,
+          typename Period>
+struct chrono_formatter {
+  FormatContext& context;
+  OutputIt out;
+  int precision;
+  // rep is unsigned to avoid overflow.
+  using rep =
+      conditional_t<std::is_integral<Rep>::value && sizeof(Rep) < sizeof(int),
+                    unsigned, typename make_unsigned_or_unchanged<Rep>::type>;
+  rep val;
+  using seconds = std::chrono::duration<rep>;
+  seconds s;
+  using milliseconds = std::chrono::duration<rep, std::milli>;
+  bool negative;
+
+  using char_type = typename FormatContext::char_type;
+
+  explicit chrono_formatter(FormatContext& ctx, OutputIt o,
+                            std::chrono::duration<Rep, Period> d)
+      : context(ctx), out(o), val(d.count()), negative(false) {
+    if (d.count() < 0) {
+      val = 0 - val;
+      negative = true;
+    }
+
+    // this may overflow and/or the result may not fit in the
+    // target type.
+#if FMT_SAFE_DURATION_CAST
+    // might need checked conversion (rep!=Rep)
+    auto tmpval = std::chrono::duration<rep, Period>(val);
+    s = fmt_safe_duration_cast<seconds>(tmpval);
+#else
+    s = std::chrono::duration_cast<seconds>(
+        std::chrono::duration<rep, Period>(val));
+#endif
+  }
+
+  // returns true if nan or inf, writes to out.
+  bool handle_nan_inf() {
+    if (isfinite(val)) {
+      return false;
+    }
+    if (isnan(val)) {
+      write_nan();
+      return true;
+    }
+    // must be +-inf
+    if (val > 0) {
+      write_pinf();
+    } else {
+      write_ninf();
+    }
+    return true;
+  }
+
+  Rep hour() const { return static_cast<Rep>(mod((s.count() / 3600), 24)); }
+
+  Rep hour12() const {
+    Rep hour = static_cast<Rep>(mod((s.count() / 3600), 12));
+    return hour <= 0 ? 12 : hour;
+  }
+
+  Rep minute() const { return static_cast<Rep>(mod((s.count() / 60), 60)); }
+  Rep second() const { return static_cast<Rep>(mod(s.count(), 60)); }
+
+  std::tm time() const {
+    auto time = std::tm();
+    time.tm_hour = to_nonnegative_int(hour(), 24);
+    time.tm_min = to_nonnegative_int(minute(), 60);
+    time.tm_sec = to_nonnegative_int(second(), 60);
+    return time;
+  }
+
+  void write_sign() {
+    if (negative) {
+      *out++ = '-';
+      negative = false;
+    }
+  }
+
+  void write(Rep value, int width) {
+    write_sign();
+    if (isnan(value)) return write_nan();
+    uint32_or_64_t<int> n = to_unsigned(
+        to_nonnegative_int(value, (std::numeric_limits<int>::max)()));
+    int num_digits = internal::count_digits(n);
+    if (width > num_digits) out = std::fill_n(out, width - num_digits, '0');
+    out = format_decimal<char_type>(out, n, num_digits);
+  }
+
+  void write_nan() { std::copy_n("nan", 3, out); }
+  void write_pinf() { std::copy_n("inf", 3, out); }
+  void write_ninf() { std::copy_n("-inf", 4, out); }
+
+  void format_localized(const tm& time, const char* format) {
+    if (isnan(val)) return write_nan();
+    auto locale = context.locale().template get<std::locale>();
+    auto& facet = std::use_facet<std::time_put<char_type>>(locale);
+    std::basic_ostringstream<char_type> os;
+    os.imbue(locale);
+    facet.put(os, os, ' ', &time, format, format + std::strlen(format));
+    auto str = os.str();
+    std::copy(str.begin(), str.end(), out);
+  }
+
+  void on_text(const char_type* begin, const char_type* end) {
+    std::copy(begin, end, out);
+  }
+
+  // These are not implemented because durations don't have date information.
+  void on_abbr_weekday() {}
+  void on_full_weekday() {}
+  void on_dec0_weekday(numeric_system) {}
+  void on_dec1_weekday(numeric_system) {}
+  void on_abbr_month() {}
+  void on_full_month() {}
+  void on_datetime(numeric_system) {}
+  void on_loc_date(numeric_system) {}
+  void on_loc_time(numeric_system) {}
+  void on_us_date() {}
+  void on_iso_date() {}
+  void on_utc_offset() {}
+  void on_tz_name() {}
+
+  void on_24_hour(numeric_system ns) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(hour(), 2);
+    auto time = tm();
+    time.tm_hour = to_nonnegative_int(hour(), 24);
+    format_localized(time, "%OH");
+  }
+
+  void on_12_hour(numeric_system ns) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(hour12(), 2);
+    auto time = tm();
+    time.tm_hour = to_nonnegative_int(hour12(), 12);
+    format_localized(time, "%OI");
+  }
+
+  void on_minute(numeric_system ns) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) return write(minute(), 2);
+    auto time = tm();
+    time.tm_min = to_nonnegative_int(minute(), 60);
+    format_localized(time, "%OM");
+  }
+
+  void on_second(numeric_system ns) {
+    if (handle_nan_inf()) return;
+
+    if (ns == numeric_system::standard) {
+      write(second(), 2);
+#if FMT_SAFE_DURATION_CAST
+      // convert rep->Rep
+      using duration_rep = std::chrono::duration<rep, Period>;
+      using duration_Rep = std::chrono::duration<Rep, Period>;
+      auto tmpval = fmt_safe_duration_cast<duration_Rep>(duration_rep{val});
+#else
+      auto tmpval = std::chrono::duration<Rep, Period>(val);
+#endif
+      auto ms = get_milliseconds(tmpval);
+      if (ms != std::chrono::milliseconds(0)) {
+        *out++ = '.';
+        write(ms.count(), 3);
+      }
+      return;
+    }
+    auto time = tm();
+    time.tm_sec = to_nonnegative_int(second(), 60);
+    format_localized(time, "%OS");
+  }
+
+  void on_12_hour_time() {
+    if (handle_nan_inf()) return;
+
+    format_localized(time(), "%r");
+  }
+
+  void on_24_hour_time() {
+    if (handle_nan_inf()) {
+      *out++ = ':';
+      handle_nan_inf();
+      return;
+    }
+
+    write(hour(), 2);
+    *out++ = ':';
+    write(minute(), 2);
+  }
+
+  void on_iso_time() {
+    on_24_hour_time();
+    *out++ = ':';
+    if (handle_nan_inf()) return;
+    write(second(), 2);
+  }
+
+  void on_am_pm() {
+    if (handle_nan_inf()) return;
+    format_localized(time(), "%p");
+  }
+
+  void on_duration_value() {
+    if (handle_nan_inf()) return;
+    write_sign();
+    out = format_chrono_duration_value(out, val, precision);
+  }
+
+  void on_duration_unit() { out = format_chrono_duration_unit<Period>(out); }
+};
+}  // namespace internal
+
+template <typename Rep, typename Period, typename Char>
+struct formatter<std::chrono::duration<Rep, Period>, Char> {
+ private:
+  basic_format_specs<Char> specs;
+  int precision;
+  using arg_ref_type = internal::arg_ref<Char>;
+  arg_ref_type width_ref;
+  arg_ref_type precision_ref;
+  mutable basic_string_view<Char> format_str;
+  using duration = std::chrono::duration<Rep, Period>;
+
+  struct spec_handler {
+    formatter& f;
+    basic_parse_context<Char>& context;
+    basic_string_view<Char> format_str;
+
+    template <typename Id> FMT_CONSTEXPR arg_ref_type make_arg_ref(Id arg_id) {
+      context.check_arg_id(arg_id);
+      return arg_ref_type(arg_id);
+    }
+
+    FMT_CONSTEXPR arg_ref_type make_arg_ref(basic_string_view<Char> arg_id) {
+      context.check_arg_id(arg_id);
+      const auto str_val = internal::string_view_metadata(format_str, arg_id);
+      return arg_ref_type(str_val);
+    }
+
+    FMT_CONSTEXPR arg_ref_type make_arg_ref(internal::auto_id) {
+      return arg_ref_type(context.next_arg_id());
+    }
+
+    void on_error(const char* msg) { FMT_THROW(format_error(msg)); }
+    void on_fill(Char fill) { f.specs.fill[0] = fill; }
+    void on_align(align_t align) { f.specs.align = align; }
+    void on_width(unsigned width) { f.specs.width = width; }
+    void on_precision(unsigned precision) { f.precision = precision; }
+    void end_precision() {}
+
+    template <typename Id> void on_dynamic_width(Id arg_id) {
+      f.width_ref = make_arg_ref(arg_id);
+    }
+
+    template <typename Id> void on_dynamic_precision(Id arg_id) {
+      f.precision_ref = make_arg_ref(arg_id);
+    }
+  };
+
+  using iterator = typename basic_parse_context<Char>::iterator;
+  struct parse_range {
+    iterator begin;
+    iterator end;
+  };
+
+  FMT_CONSTEXPR parse_range do_parse(basic_parse_context<Char>& ctx) {
+    auto begin = ctx.begin(), end = ctx.end();
+    if (begin == end || *begin == '}') return {begin, begin};
+    spec_handler handler{*this, ctx, format_str};
+    begin = internal::parse_align(begin, end, handler);
+    if (begin == end) return {begin, begin};
+    begin = internal::parse_width(begin, end, handler);
+    if (begin == end) return {begin, begin};
+    if (*begin == '.') {
+      if (std::is_floating_point<Rep>::value)
+        begin = internal::parse_precision(begin, end, handler);
+      else
+        handler.on_error("precision not allowed for this argument type");
+    }
+    end = parse_chrono_format(begin, end, internal::chrono_format_checker());
+    return {begin, end};
+  }
+
+ public:
+  formatter() : precision(-1) {}
+
+  FMT_CONSTEXPR auto parse(basic_parse_context<Char>& ctx)
+      -> decltype(ctx.begin()) {
+    auto range = do_parse(ctx);
+    format_str = basic_string_view<Char>(
+        &*range.begin, internal::to_unsigned(range.end - range.begin));
+    return range.end;
+  }
+
+  template <typename FormatContext>
+  auto format(const duration& d, FormatContext& ctx) -> decltype(ctx.out()) {
+    auto begin = format_str.begin(), end = format_str.end();
+    // As a possible future optimization, we could avoid extra copying if width
+    // is not specified.
+    basic_memory_buffer<Char> buf;
+    auto out = std::back_inserter(buf);
+    using range = internal::output_range<decltype(ctx.out()), Char>;
+    internal::basic_writer<range> w(range(ctx.out()));
+    internal::handle_dynamic_spec<internal::width_checker>(
+        specs.width, width_ref, ctx, format_str.begin());
+    internal::handle_dynamic_spec<internal::precision_checker>(
+        precision, precision_ref, ctx, format_str.begin());
+    if (begin == end || *begin == '}') {
+      out = internal::format_chrono_duration_value(out, d.count(), precision);
+      internal::format_chrono_duration_unit<Period>(out);
+    } else {
+      internal::chrono_formatter<FormatContext, decltype(out), Rep, Period> f(
+          ctx, out, d);
+      f.precision = precision;
+      parse_chrono_format(begin, end, f);
+    }
+    w.write(buf.data(), buf.size(), specs);
+    return w.out();
+  }
+};
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_CHRONO_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/color.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/color.h
new file mode 100644
index 000000000..d9d315599
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/color.h
@@ -0,0 +1,585 @@
+// Formatting library for C++ - color support
+//
+// Copyright (c) 2018 - present, Victor Zverovich and fmt contributors
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_COLOR_H_
+#define FMT_COLOR_H_
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+
+enum class color : uint32_t {
+  alice_blue = 0xF0F8FF,               // rgb(240,248,255)
+  antique_white = 0xFAEBD7,            // rgb(250,235,215)
+  aqua = 0x00FFFF,                     // rgb(0,255,255)
+  aquamarine = 0x7FFFD4,               // rgb(127,255,212)
+  azure = 0xF0FFFF,                    // rgb(240,255,255)
+  beige = 0xF5F5DC,                    // rgb(245,245,220)
+  bisque = 0xFFE4C4,                   // rgb(255,228,196)
+  black = 0x000000,                    // rgb(0,0,0)
+  blanched_almond = 0xFFEBCD,          // rgb(255,235,205)
+  blue = 0x0000FF,                     // rgb(0,0,255)
+  blue_violet = 0x8A2BE2,              // rgb(138,43,226)
+  brown = 0xA52A2A,                    // rgb(165,42,42)
+  burly_wood = 0xDEB887,               // rgb(222,184,135)
+  cadet_blue = 0x5F9EA0,               // rgb(95,158,160)
+  chartreuse = 0x7FFF00,               // rgb(127,255,0)
+  chocolate = 0xD2691E,                // rgb(210,105,30)
+  coral = 0xFF7F50,                    // rgb(255,127,80)
+  cornflower_blue = 0x6495ED,          // rgb(100,149,237)
+  cornsilk = 0xFFF8DC,                 // rgb(255,248,220)
+  crimson = 0xDC143C,                  // rgb(220,20,60)
+  cyan = 0x00FFFF,                     // rgb(0,255,255)
+  dark_blue = 0x00008B,                // rgb(0,0,139)
+  dark_cyan = 0x008B8B,                // rgb(0,139,139)
+  dark_golden_rod = 0xB8860B,          // rgb(184,134,11)
+  dark_gray = 0xA9A9A9,                // rgb(169,169,169)
+  dark_green = 0x006400,               // rgb(0,100,0)
+  dark_khaki = 0xBDB76B,               // rgb(189,183,107)
+  dark_magenta = 0x8B008B,             // rgb(139,0,139)
+  dark_olive_green = 0x556B2F,         // rgb(85,107,47)
+  dark_orange = 0xFF8C00,              // rgb(255,140,0)
+  dark_orchid = 0x9932CC,              // rgb(153,50,204)
+  dark_red = 0x8B0000,                 // rgb(139,0,0)
+  dark_salmon = 0xE9967A,              // rgb(233,150,122)
+  dark_sea_green = 0x8FBC8F,           // rgb(143,188,143)
+  dark_slate_blue = 0x483D8B,          // rgb(72,61,139)
+  dark_slate_gray = 0x2F4F4F,          // rgb(47,79,79)
+  dark_turquoise = 0x00CED1,           // rgb(0,206,209)
+  dark_violet = 0x9400D3,              // rgb(148,0,211)
+  deep_pink = 0xFF1493,                // rgb(255,20,147)
+  deep_sky_blue = 0x00BFFF,            // rgb(0,191,255)
+  dim_gray = 0x696969,                 // rgb(105,105,105)
+  dodger_blue = 0x1E90FF,              // rgb(30,144,255)
+  fire_brick = 0xB22222,               // rgb(178,34,34)
+  floral_white = 0xFFFAF0,             // rgb(255,250,240)
+  forest_green = 0x228B22,             // rgb(34,139,34)
+  fuchsia = 0xFF00FF,                  // rgb(255,0,255)
+  gainsboro = 0xDCDCDC,                // rgb(220,220,220)
+  ghost_white = 0xF8F8FF,              // rgb(248,248,255)
+  gold = 0xFFD700,                     // rgb(255,215,0)
+  golden_rod = 0xDAA520,               // rgb(218,165,32)
+  gray = 0x808080,                     // rgb(128,128,128)
+  green = 0x008000,                    // rgb(0,128,0)
+  green_yellow = 0xADFF2F,             // rgb(173,255,47)
+  honey_dew = 0xF0FFF0,                // rgb(240,255,240)
+  hot_pink = 0xFF69B4,                 // rgb(255,105,180)
+  indian_red = 0xCD5C5C,               // rgb(205,92,92)
+  indigo = 0x4B0082,                   // rgb(75,0,130)
+  ivory = 0xFFFFF0,                    // rgb(255,255,240)
+  khaki = 0xF0E68C,                    // rgb(240,230,140)
+  lavender = 0xE6E6FA,                 // rgb(230,230,250)
+  lavender_blush = 0xFFF0F5,           // rgb(255,240,245)
+  lawn_green = 0x7CFC00,               // rgb(124,252,0)
+  lemon_chiffon = 0xFFFACD,            // rgb(255,250,205)
+  light_blue = 0xADD8E6,               // rgb(173,216,230)
+  light_coral = 0xF08080,              // rgb(240,128,128)
+  light_cyan = 0xE0FFFF,               // rgb(224,255,255)
+  light_golden_rod_yellow = 0xFAFAD2,  // rgb(250,250,210)
+  light_gray = 0xD3D3D3,               // rgb(211,211,211)
+  light_green = 0x90EE90,              // rgb(144,238,144)
+  light_pink = 0xFFB6C1,               // rgb(255,182,193)
+  light_salmon = 0xFFA07A,             // rgb(255,160,122)
+  light_sea_green = 0x20B2AA,          // rgb(32,178,170)
+  light_sky_blue = 0x87CEFA,           // rgb(135,206,250)
+  light_slate_gray = 0x778899,         // rgb(119,136,153)
+  light_steel_blue = 0xB0C4DE,         // rgb(176,196,222)
+  light_yellow = 0xFFFFE0,             // rgb(255,255,224)
+  lime = 0x00FF00,                     // rgb(0,255,0)
+  lime_green = 0x32CD32,               // rgb(50,205,50)
+  linen = 0xFAF0E6,                    // rgb(250,240,230)
+  magenta = 0xFF00FF,                  // rgb(255,0,255)
+  maroon = 0x800000,                   // rgb(128,0,0)
+  medium_aquamarine = 0x66CDAA,        // rgb(102,205,170)
+  medium_blue = 0x0000CD,              // rgb(0,0,205)
+  medium_orchid = 0xBA55D3,            // rgb(186,85,211)
+  medium_purple = 0x9370DB,            // rgb(147,112,219)
+  medium_sea_green = 0x3CB371,         // rgb(60,179,113)
+  medium_slate_blue = 0x7B68EE,        // rgb(123,104,238)
+  medium_spring_green = 0x00FA9A,      // rgb(0,250,154)
+  medium_turquoise = 0x48D1CC,         // rgb(72,209,204)
+  medium_violet_red = 0xC71585,        // rgb(199,21,133)
+  midnight_blue = 0x191970,            // rgb(25,25,112)
+  mint_cream = 0xF5FFFA,               // rgb(245,255,250)
+  misty_rose = 0xFFE4E1,               // rgb(255,228,225)
+  moccasin = 0xFFE4B5,                 // rgb(255,228,181)
+  navajo_white = 0xFFDEAD,             // rgb(255,222,173)
+  navy = 0x000080,                     // rgb(0,0,128)
+  old_lace = 0xFDF5E6,                 // rgb(253,245,230)
+  olive = 0x808000,                    // rgb(128,128,0)
+  olive_drab = 0x6B8E23,               // rgb(107,142,35)
+  orange = 0xFFA500,                   // rgb(255,165,0)
+  orange_red = 0xFF4500,               // rgb(255,69,0)
+  orchid = 0xDA70D6,                   // rgb(218,112,214)
+  pale_golden_rod = 0xEEE8AA,          // rgb(238,232,170)
+  pale_green = 0x98FB98,               // rgb(152,251,152)
+  pale_turquoise = 0xAFEEEE,           // rgb(175,238,238)
+  pale_violet_red = 0xDB7093,          // rgb(219,112,147)
+  papaya_whip = 0xFFEFD5,              // rgb(255,239,213)
+  peach_puff = 0xFFDAB9,               // rgb(255,218,185)
+  peru = 0xCD853F,                     // rgb(205,133,63)
+  pink = 0xFFC0CB,                     // rgb(255,192,203)
+  plum = 0xDDA0DD,                     // rgb(221,160,221)
+  powder_blue = 0xB0E0E6,              // rgb(176,224,230)
+  purple = 0x800080,                   // rgb(128,0,128)
+  rebecca_purple = 0x663399,           // rgb(102,51,153)
+  red = 0xFF0000,                      // rgb(255,0,0)
+  rosy_brown = 0xBC8F8F,               // rgb(188,143,143)
+  royal_blue = 0x4169E1,               // rgb(65,105,225)
+  saddle_brown = 0x8B4513,             // rgb(139,69,19)
+  salmon = 0xFA8072,                   // rgb(250,128,114)
+  sandy_brown = 0xF4A460,              // rgb(244,164,96)
+  sea_green = 0x2E8B57,                // rgb(46,139,87)
+  sea_shell = 0xFFF5EE,                // rgb(255,245,238)
+  sienna = 0xA0522D,                   // rgb(160,82,45)
+  silver = 0xC0C0C0,                   // rgb(192,192,192)
+  sky_blue = 0x87CEEB,                 // rgb(135,206,235)
+  slate_blue = 0x6A5ACD,               // rgb(106,90,205)
+  slate_gray = 0x708090,               // rgb(112,128,144)
+  snow = 0xFFFAFA,                     // rgb(255,250,250)
+  spring_green = 0x00FF7F,             // rgb(0,255,127)
+  steel_blue = 0x4682B4,               // rgb(70,130,180)
+  tan = 0xD2B48C,                      // rgb(210,180,140)
+  teal = 0x008080,                     // rgb(0,128,128)
+  thistle = 0xD8BFD8,                  // rgb(216,191,216)
+  tomato = 0xFF6347,                   // rgb(255,99,71)
+  turquoise = 0x40E0D0,                // rgb(64,224,208)
+  violet = 0xEE82EE,                   // rgb(238,130,238)
+  wheat = 0xF5DEB3,                    // rgb(245,222,179)
+  white = 0xFFFFFF,                    // rgb(255,255,255)
+  white_smoke = 0xF5F5F5,              // rgb(245,245,245)
+  yellow = 0xFFFF00,                   // rgb(255,255,0)
+  yellow_green = 0x9ACD32              // rgb(154,205,50)
+};                                     // enum class color
+
+enum class terminal_color : uint8_t {
+  black = 30,
+  red,
+  green,
+  yellow,
+  blue,
+  magenta,
+  cyan,
+  white,
+  bright_black = 90,
+  bright_red,
+  bright_green,
+  bright_yellow,
+  bright_blue,
+  bright_magenta,
+  bright_cyan,
+  bright_white
+};
+
+enum class emphasis : uint8_t {
+  bold = 1,
+  italic = 1 << 1,
+  underline = 1 << 2,
+  strikethrough = 1 << 3
+};
+
+// rgb is a struct for red, green and blue colors.
+// Using the name "rgb" makes some editors show the color in a tooltip.
+struct rgb {
+  FMT_CONSTEXPR rgb() : r(0), g(0), b(0) {}
+  FMT_CONSTEXPR rgb(uint8_t r_, uint8_t g_, uint8_t b_) : r(r_), g(g_), b(b_) {}
+  FMT_CONSTEXPR rgb(uint32_t hex)
+      : r((hex >> 16) & 0xFF), g((hex >> 8) & 0xFF), b(hex & 0xFF) {}
+  FMT_CONSTEXPR rgb(color hex)
+      : r((uint32_t(hex) >> 16) & 0xFF),
+        g((uint32_t(hex) >> 8) & 0xFF),
+        b(uint32_t(hex) & 0xFF) {}
+  uint8_t r;
+  uint8_t g;
+  uint8_t b;
+};
+
+namespace internal {
+
+// color is a struct of either a rgb color or a terminal color.
+struct color_type {
+  FMT_CONSTEXPR color_type() FMT_NOEXCEPT : is_rgb(), value{} {}
+  FMT_CONSTEXPR color_type(color rgb_color) FMT_NOEXCEPT : is_rgb(true),
+                                                           value{} {
+    value.rgb_color = static_cast<uint32_t>(rgb_color);
+  }
+  FMT_CONSTEXPR color_type(rgb rgb_color) FMT_NOEXCEPT : is_rgb(true), value{} {
+    value.rgb_color = (static_cast<uint32_t>(rgb_color.r) << 16) |
+                      (static_cast<uint32_t>(rgb_color.g) << 8) | rgb_color.b;
+  }
+  FMT_CONSTEXPR color_type(terminal_color term_color) FMT_NOEXCEPT : is_rgb(),
+                                                                     value{} {
+    value.term_color = static_cast<uint8_t>(term_color);
+  }
+  bool is_rgb;
+  union color_union {
+    uint8_t term_color;
+    uint32_t rgb_color;
+  } value;
+};
+}  // namespace internal
+
+// Experimental text formatting support.
+class text_style {
+ public:
+  FMT_CONSTEXPR text_style(emphasis em = emphasis()) FMT_NOEXCEPT
+      : set_foreground_color(),
+        set_background_color(),
+        ems(em) {}
+
+  FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) {
+    if (!set_foreground_color) {
+      set_foreground_color = rhs.set_foreground_color;
+      foreground_color = rhs.foreground_color;
+    } else if (rhs.set_foreground_color) {
+      if (!foreground_color.is_rgb || !rhs.foreground_color.is_rgb)
+        FMT_THROW(format_error("can't OR a terminal color"));
+      foreground_color.value.rgb_color |= rhs.foreground_color.value.rgb_color;
+    }
+
+    if (!set_background_color) {
+      set_background_color = rhs.set_background_color;
+      background_color = rhs.background_color;
+    } else if (rhs.set_background_color) {
+      if (!background_color.is_rgb || !rhs.background_color.is_rgb)
+        FMT_THROW(format_error("can't OR a terminal color"));
+      background_color.value.rgb_color |= rhs.background_color.value.rgb_color;
+    }
+
+    ems = static_cast<emphasis>(static_cast<uint8_t>(ems) |
+                                static_cast<uint8_t>(rhs.ems));
+    return *this;
+  }
+
+  friend FMT_CONSTEXPR text_style operator|(text_style lhs,
+                                            const text_style& rhs) {
+    return lhs |= rhs;
+  }
+
+  FMT_CONSTEXPR text_style& operator&=(const text_style& rhs) {
+    if (!set_foreground_color) {
+      set_foreground_color = rhs.set_foreground_color;
+      foreground_color = rhs.foreground_color;
+    } else if (rhs.set_foreground_color) {
+      if (!foreground_color.is_rgb || !rhs.foreground_color.is_rgb)
+        FMT_THROW(format_error("can't AND a terminal color"));
+      foreground_color.value.rgb_color &= rhs.foreground_color.value.rgb_color;
+    }
+
+    if (!set_background_color) {
+      set_background_color = rhs.set_background_color;
+      background_color = rhs.background_color;
+    } else if (rhs.set_background_color) {
+      if (!background_color.is_rgb || !rhs.background_color.is_rgb)
+        FMT_THROW(format_error("can't AND a terminal color"));
+      background_color.value.rgb_color &= rhs.background_color.value.rgb_color;
+    }
+
+    ems = static_cast<emphasis>(static_cast<uint8_t>(ems) &
+                                static_cast<uint8_t>(rhs.ems));
+    return *this;
+  }
+
+  friend FMT_CONSTEXPR text_style operator&(text_style lhs,
+                                            const text_style& rhs) {
+    return lhs &= rhs;
+  }
+
+  FMT_CONSTEXPR bool has_foreground() const FMT_NOEXCEPT {
+    return set_foreground_color;
+  }
+  FMT_CONSTEXPR bool has_background() const FMT_NOEXCEPT {
+    return set_background_color;
+  }
+  FMT_CONSTEXPR bool has_emphasis() const FMT_NOEXCEPT {
+    return static_cast<uint8_t>(ems) != 0;
+  }
+  FMT_CONSTEXPR internal::color_type get_foreground() const FMT_NOEXCEPT {
+    assert(has_foreground() && "no foreground specified for this style");
+    return foreground_color;
+  }
+  FMT_CONSTEXPR internal::color_type get_background() const FMT_NOEXCEPT {
+    assert(has_background() && "no background specified for this style");
+    return background_color;
+  }
+  FMT_CONSTEXPR emphasis get_emphasis() const FMT_NOEXCEPT {
+    assert(has_emphasis() && "no emphasis specified for this style");
+    return ems;
+  }
+
+ private:
+  FMT_CONSTEXPR text_style(bool is_foreground,
+                           internal::color_type text_color) FMT_NOEXCEPT
+      : set_foreground_color(),
+        set_background_color(),
+        ems() {
+    if (is_foreground) {
+      foreground_color = text_color;
+      set_foreground_color = true;
+    } else {
+      background_color = text_color;
+      set_background_color = true;
+    }
+  }
+
+  friend FMT_CONSTEXPR_DECL text_style fg(internal::color_type foreground)
+      FMT_NOEXCEPT;
+  friend FMT_CONSTEXPR_DECL text_style bg(internal::color_type background)
+      FMT_NOEXCEPT;
+
+  internal::color_type foreground_color;
+  internal::color_type background_color;
+  bool set_foreground_color;
+  bool set_background_color;
+  emphasis ems;
+};
+
+FMT_CONSTEXPR text_style fg(internal::color_type foreground) FMT_NOEXCEPT {
+  return text_style(/*is_foreground=*/true, foreground);
+}
+
+FMT_CONSTEXPR text_style bg(internal::color_type background) FMT_NOEXCEPT {
+  return text_style(/*is_foreground=*/false, background);
+}
+
+FMT_CONSTEXPR text_style operator|(emphasis lhs, emphasis rhs) FMT_NOEXCEPT {
+  return text_style(lhs) | rhs;
+}
+
+namespace internal {
+
+template <typename Char> struct ansi_color_escape {
+  FMT_CONSTEXPR ansi_color_escape(internal::color_type text_color,
+                                  const char* esc) FMT_NOEXCEPT {
+    // If we have a terminal color, we need to output another escape code
+    // sequence.
+    if (!text_color.is_rgb) {
+      bool is_background = esc == internal::data::background_color;
+      uint32_t value = text_color.value.term_color;
+      // Background ASCII codes are the same as the foreground ones but with
+      // 10 more.
+      if (is_background) value += 10u;
+
+      std::size_t index = 0;
+      buffer[index++] = static_cast<Char>('\x1b');
+      buffer[index++] = static_cast<Char>('[');
+
+      if (value >= 100u) {
+        buffer[index++] = static_cast<Char>('1');
+        value %= 100u;
+      }
+      buffer[index++] = static_cast<Char>('0' + value / 10u);
+      buffer[index++] = static_cast<Char>('0' + value % 10u);
+
+      buffer[index++] = static_cast<Char>('m');
+      buffer[index++] = static_cast<Char>('\0');
+      return;
+    }
+
+    for (int i = 0; i < 7; i++) {
+      buffer[i] = static_cast<Char>(esc[i]);
+    }
+    rgb color(text_color.value.rgb_color);
+    to_esc(color.r, buffer + 7, ';');
+    to_esc(color.g, buffer + 11, ';');
+    to_esc(color.b, buffer + 15, 'm');
+    buffer[19] = static_cast<Char>(0);
+  }
+  FMT_CONSTEXPR ansi_color_escape(emphasis em) FMT_NOEXCEPT {
+    uint8_t em_codes[4] = {};
+    uint8_t em_bits = static_cast<uint8_t>(em);
+    if (em_bits & static_cast<uint8_t>(emphasis::bold)) em_codes[0] = 1;
+    if (em_bits & static_cast<uint8_t>(emphasis::italic)) em_codes[1] = 3;
+    if (em_bits & static_cast<uint8_t>(emphasis::underline)) em_codes[2] = 4;
+    if (em_bits & static_cast<uint8_t>(emphasis::strikethrough))
+      em_codes[3] = 9;
+
+    std::size_t index = 0;
+    for (int i = 0; i < 4; ++i) {
+      if (!em_codes[i]) continue;
+      buffer[index++] = static_cast<Char>('\x1b');
+      buffer[index++] = static_cast<Char>('[');
+      buffer[index++] = static_cast<Char>('0' + em_codes[i]);
+      buffer[index++] = static_cast<Char>('m');
+    }
+    buffer[index++] = static_cast<Char>(0);
+  }
+  FMT_CONSTEXPR operator const Char*() const FMT_NOEXCEPT { return buffer; }
+
+  FMT_CONSTEXPR const Char* begin() const FMT_NOEXCEPT { return buffer; }
+  FMT_CONSTEXPR const Char* end() const FMT_NOEXCEPT {
+    return buffer + std::strlen(buffer);
+  }
+
+ private:
+  Char buffer[7u + 3u * 4u + 1u];
+
+  static FMT_CONSTEXPR void to_esc(uint8_t c, Char* out,
+                                   char delimiter) FMT_NOEXCEPT {
+    out[0] = static_cast<Char>('0' + c / 100);
+    out[1] = static_cast<Char>('0' + c / 10 % 10);
+    out[2] = static_cast<Char>('0' + c % 10);
+    out[3] = static_cast<Char>(delimiter);
+  }
+};
+
+template <typename Char>
+FMT_CONSTEXPR ansi_color_escape<Char> make_foreground_color(
+    internal::color_type foreground) FMT_NOEXCEPT {
+  return ansi_color_escape<Char>(foreground, internal::data::foreground_color);
+}
+
+template <typename Char>
+FMT_CONSTEXPR ansi_color_escape<Char> make_background_color(
+    internal::color_type background) FMT_NOEXCEPT {
+  return ansi_color_escape<Char>(background, internal::data::background_color);
+}
+
+template <typename Char>
+FMT_CONSTEXPR ansi_color_escape<Char> make_emphasis(emphasis em) FMT_NOEXCEPT {
+  return ansi_color_escape<Char>(em);
+}
+
+template <typename Char>
+inline void fputs(const Char* chars, FILE* stream) FMT_NOEXCEPT {
+  std::fputs(chars, stream);
+}
+
+template <>
+inline void fputs<wchar_t>(const wchar_t* chars, FILE* stream) FMT_NOEXCEPT {
+  std::fputws(chars, stream);
+}
+
+template <typename Char> inline void reset_color(FILE* stream) FMT_NOEXCEPT {
+  fputs(internal::data::reset_color, stream);
+}
+
+template <> inline void reset_color<wchar_t>(FILE* stream) FMT_NOEXCEPT {
+  fputs(internal::data::wreset_color, stream);
+}
+
+template <typename Char>
+inline void reset_color(basic_memory_buffer<Char>& buffer) FMT_NOEXCEPT {
+  const char* begin = data::reset_color;
+  const char* end = begin + sizeof(data::reset_color) - 1;
+  buffer.append(begin, end);
+}
+
+template <typename Char>
+std::basic_string<Char> vformat(const text_style& ts,
+                                basic_string_view<Char> format_str,
+                                basic_format_args<buffer_context<Char> > args) {
+  basic_memory_buffer<Char> buffer;
+  bool has_style = false;
+  if (ts.has_emphasis()) {
+    has_style = true;
+    ansi_color_escape<Char> escape = make_emphasis<Char>(ts.get_emphasis());
+    buffer.append(escape.begin(), escape.end());
+  }
+  if (ts.has_foreground()) {
+    has_style = true;
+    ansi_color_escape<Char> escape =
+        make_foreground_color<Char>(ts.get_foreground());
+    buffer.append(escape.begin(), escape.end());
+  }
+  if (ts.has_background()) {
+    has_style = true;
+    ansi_color_escape<Char> escape =
+        make_background_color<Char>(ts.get_background());
+    buffer.append(escape.begin(), escape.end());
+  }
+  internal::vformat_to(buffer, format_str, args);
+  if (has_style) {
+    reset_color<Char>(buffer);
+  }
+  return fmt::to_string(buffer);
+}
+}  // namespace internal
+
+template <typename S, typename Char = char_t<S> >
+void vprint(std::FILE* f, const text_style& ts, const S& format,
+            basic_format_args<buffer_context<Char> > args) {
+  bool has_style = false;
+  if (ts.has_emphasis()) {
+    has_style = true;
+    internal::fputs<Char>(internal::make_emphasis<Char>(ts.get_emphasis()), f);
+  }
+  if (ts.has_foreground()) {
+    has_style = true;
+    internal::fputs<Char>(
+        internal::make_foreground_color<Char>(ts.get_foreground()), f);
+  }
+  if (ts.has_background()) {
+    has_style = true;
+    internal::fputs<Char>(
+        internal::make_background_color<Char>(ts.get_background()), f);
+  }
+  vprint(f, format, args);
+  if (has_style) {
+    internal::reset_color<Char>(f);
+  }
+}
+
+/**
+  Formats a string and prints it to the specified file stream using ANSI
+  escape sequences to specify text formatting.
+  Example:
+    fmt::print(fmt::emphasis::bold | fg(fmt::color::red),
+               "Elapsed time: {0:.2f} seconds", 1.23);
+ */
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(internal::is_string<S>::value)>
+void print(std::FILE* f, const text_style& ts, const S& format_str,
+           const Args&... args) {
+  internal::check_format_string<Args...>(format_str);
+  using context = buffer_context<char_t<S> >;
+  format_arg_store<context, Args...> as{args...};
+  vprint(f, ts, format_str, basic_format_args<context>(as));
+}
+
+/**
+  Formats a string and prints it to stdout using ANSI escape sequences to
+  specify text formatting.
+  Example:
+    fmt::print(fmt::emphasis::bold | fg(fmt::color::red),
+               "Elapsed time: {0:.2f} seconds", 1.23);
+ */
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(internal::is_string<S>::value)>
+void print(const text_style& ts, const S& format_str, const Args&... args) {
+  return print(stdout, ts, format_str, args...);
+}
+
+template <typename S, typename Char = char_t<S> >
+inline std::basic_string<Char> vformat(
+    const text_style& ts, const S& format_str,
+    basic_format_args<buffer_context<Char> > args) {
+  return internal::vformat(ts, to_string_view(format_str), args);
+}
+
+/**
+  \rst
+  Formats arguments and returns the result as a string using ANSI
+  escape sequences to specify text formatting.
+
+  **Example**::
+
+    #include <fmt/color.h>
+    std::string message = fmt::format(fmt::emphasis::bold | fg(fmt::color::red),
+                                      "The answer is {}", 42);
+  \endrst
+*/
+template <typename S, typename... Args, typename Char = char_t<S> >
+inline std::basic_string<Char> format(const text_style& ts, const S& format_str,
+                                      const Args&... args) {
+  return internal::vformat(ts, to_string_view(format_str),
+                           {internal::make_args_checked(format_str, args...)});
+}
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_COLOR_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/compile.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/compile.h
new file mode 100644
index 000000000..82625bbc6
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/compile.h
@@ -0,0 +1,466 @@
+// Formatting library for C++ - experimental format string compilation
+//
+// Copyright (c) 2012 - present, Victor Zverovich and fmt contributors
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_COMPILE_H_
+#define FMT_COMPILE_H_
+
+#include <vector>
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+namespace internal {
+
+template <typename Char> struct format_part {
+ public:
+  struct named_argument_id {
+    FMT_CONSTEXPR named_argument_id(internal::string_view_metadata id)
+        : id(id) {}
+    internal::string_view_metadata id;
+  };
+
+  struct argument_id {
+    FMT_CONSTEXPR argument_id() : argument_id(0u) {}
+
+    FMT_CONSTEXPR argument_id(unsigned id)
+        : which(which_arg_id::index), val(id) {}
+
+    FMT_CONSTEXPR argument_id(internal::string_view_metadata id)
+        : which(which_arg_id::named_index), val(id) {}
+
+    enum class which_arg_id { index, named_index };
+
+    which_arg_id which;
+
+    union value {
+      FMT_CONSTEXPR value() : index(0u) {}
+      FMT_CONSTEXPR value(unsigned id) : index(id) {}
+      FMT_CONSTEXPR value(internal::string_view_metadata id)
+          : named_index(id) {}
+
+      unsigned index;
+      internal::string_view_metadata named_index;
+    } val;
+  };
+
+  struct specification {
+    FMT_CONSTEXPR specification() : arg_id(0u) {}
+    FMT_CONSTEXPR specification(unsigned id) : arg_id(id) {}
+
+    FMT_CONSTEXPR specification(internal::string_view_metadata id)
+        : arg_id(id) {}
+
+    argument_id arg_id;
+    internal::dynamic_format_specs<Char> parsed_specs;
+  };
+
+  FMT_CONSTEXPR format_part()
+      : which(kind::argument_id), end_of_argument_id(0u), val(0u) {}
+
+  FMT_CONSTEXPR format_part(internal::string_view_metadata text)
+      : which(kind::text), end_of_argument_id(0u), val(text) {}
+
+  FMT_CONSTEXPR format_part(unsigned id)
+      : which(kind::argument_id), end_of_argument_id(0u), val(id) {}
+
+  FMT_CONSTEXPR format_part(named_argument_id arg_id)
+      : which(kind::named_argument_id), end_of_argument_id(0u), val(arg_id) {}
+
+  FMT_CONSTEXPR format_part(specification spec)
+      : which(kind::specification), end_of_argument_id(0u), val(spec) {}
+
+  enum class kind { argument_id, named_argument_id, text, specification };
+
+  kind which;
+  std::size_t end_of_argument_id;
+  union value {
+    FMT_CONSTEXPR value() : arg_id(0u) {}
+    FMT_CONSTEXPR value(unsigned id) : arg_id(id) {}
+    FMT_CONSTEXPR value(named_argument_id named_id)
+        : named_arg_id(named_id.id) {}
+    FMT_CONSTEXPR value(internal::string_view_metadata t) : text(t) {}
+    FMT_CONSTEXPR value(specification s) : spec(s) {}
+    unsigned arg_id;
+    internal::string_view_metadata named_arg_id;
+    internal::string_view_metadata text;
+    specification spec;
+  } val;
+};
+
+template <typename Char, typename PartsContainer>
+class format_preparation_handler : public internal::error_handler {
+ private:
+  using part = format_part<Char>;
+
+ public:
+  using iterator = typename basic_string_view<Char>::iterator;
+
+  FMT_CONSTEXPR format_preparation_handler(basic_string_view<Char> format,
+                                           PartsContainer& parts)
+      : parts_(parts), format_(format), parse_context_(format) {}
+
+  FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) {
+    if (begin == end) return;
+    const auto offset = begin - format_.data();
+    const auto size = end - begin;
+    parts_.push_back(part(string_view_metadata(offset, size)));
+  }
+
+  FMT_CONSTEXPR void on_arg_id() {
+    parts_.push_back(part(parse_context_.next_arg_id()));
+  }
+
+  FMT_CONSTEXPR void on_arg_id(unsigned id) {
+    parse_context_.check_arg_id(id);
+    parts_.push_back(part(id));
+  }
+
+  FMT_CONSTEXPR void on_arg_id(basic_string_view<Char> id) {
+    const auto view = string_view_metadata(format_, id);
+    const auto arg_id = typename part::named_argument_id(view);
+    parts_.push_back(part(arg_id));
+  }
+
+  FMT_CONSTEXPR void on_replacement_field(const Char* ptr) {
+    parts_.back().end_of_argument_id = ptr - format_.begin();
+  }
+
+  FMT_CONSTEXPR const Char* on_format_specs(const Char* begin,
+                                            const Char* end) {
+    const auto specs_offset = to_unsigned(begin - format_.begin());
+
+    using parse_context = basic_parse_context<Char>;
+    internal::dynamic_format_specs<Char> parsed_specs;
+    dynamic_specs_handler<parse_context> handler(parsed_specs, parse_context_);
+    begin = parse_format_specs(begin, end, handler);
+
+    if (*begin != '}') on_error("missing '}' in format string");
+
+    auto& last_part = parts_.back();
+    auto specs = last_part.which == part::kind::argument_id
+                     ? typename part::specification(last_part.val.arg_id)
+                     : typename part::specification(last_part.val.named_arg_id);
+    specs.parsed_specs = parsed_specs;
+    last_part = part(specs);
+    last_part.end_of_argument_id = specs_offset;
+    return begin;
+  }
+
+ private:
+  PartsContainer& parts_;
+  basic_string_view<Char> format_;
+  basic_parse_context<Char> parse_context_;
+};
+
+template <typename Format, typename PreparedPartsProvider, typename... Args>
+class prepared_format {
+ public:
+  using char_type = char_t<Format>;
+  using format_part_t = format_part<char_type>;
+
+  constexpr prepared_format(Format f)
+      : format_(std::move(f)), parts_provider_(to_string_view(format_)) {}
+
+  prepared_format() = delete;
+
+  using context = buffer_context<char_type>;
+
+  template <typename Range, typename Context>
+  auto vformat_to(Range out, basic_format_args<Context> args) const ->
+      typename Context::iterator {
+    const auto format_view = internal::to_string_view(format_);
+    basic_parse_context<char_type> parse_ctx(format_view);
+    Context ctx(out.begin(), args);
+
+    const auto& parts = parts_provider_.parts();
+    for (auto part_it = parts.begin(); part_it != parts.end(); ++part_it) {
+      const auto& part = *part_it;
+      const auto& value = part.val;
+
+      switch (part.which) {
+      case format_part_t::kind::text: {
+        const auto text = value.text.to_view(format_view.data());
+        auto output = ctx.out();
+        auto&& it = internal::reserve(output, text.size());
+        it = std::copy_n(text.begin(), text.size(), it);
+        ctx.advance_to(output);
+      } break;
+
+      case format_part_t::kind::argument_id: {
+        advance_parse_context_to_specification(parse_ctx, part);
+        format_arg<Range>(parse_ctx, ctx, value.arg_id);
+      } break;
+
+      case format_part_t::kind::named_argument_id: {
+        advance_parse_context_to_specification(parse_ctx, part);
+        const auto named_arg_id =
+            value.named_arg_id.to_view(format_view.data());
+        format_arg<Range>(parse_ctx, ctx, named_arg_id);
+      } break;
+      case format_part_t::kind::specification: {
+        const auto& arg_id_value = value.spec.arg_id.val;
+        const auto arg = value.spec.arg_id.which ==
+                                 format_part_t::argument_id::which_arg_id::index
+                             ? ctx.arg(arg_id_value.index)
+                             : ctx.arg(arg_id_value.named_index.to_view(
+                                   to_string_view(format_).data()));
+
+        auto specs = value.spec.parsed_specs;
+
+        handle_dynamic_spec<internal::width_checker>(
+            specs.width, specs.width_ref, ctx, format_view.begin());
+        handle_dynamic_spec<internal::precision_checker>(
+            specs.precision, specs.precision_ref, ctx, format_view.begin());
+
+        check_prepared_specs(specs, arg.type());
+        advance_parse_context_to_specification(parse_ctx, part);
+        ctx.advance_to(
+            visit_format_arg(arg_formatter<Range>(ctx, nullptr, &specs), arg));
+      } break;
+      }
+    }
+
+    return ctx.out();
+  }
+
+ private:
+  void advance_parse_context_to_specification(
+      basic_parse_context<char_type>& parse_ctx,
+      const format_part_t& part) const {
+    const auto view = to_string_view(format_);
+    const auto specification_begin = view.data() + part.end_of_argument_id;
+    advance_to(parse_ctx, specification_begin);
+  }
+
+  template <typename Range, typename Context, typename Id>
+  void format_arg(basic_parse_context<char_type>& parse_ctx, Context& ctx,
+                  Id arg_id) const {
+    parse_ctx.check_arg_id(arg_id);
+    const auto stopped_at =
+        visit_format_arg(arg_formatter<Range>(ctx), ctx.arg(arg_id));
+    ctx.advance_to(stopped_at);
+  }
+
+  template <typename Char>
+  void check_prepared_specs(const basic_format_specs<Char>& specs,
+                            internal::type arg_type) const {
+    internal::error_handler h;
+    numeric_specs_checker<internal::error_handler> checker(h, arg_type);
+    if (specs.align == align::numeric) checker.require_numeric_argument();
+    if (specs.sign != sign::none) checker.check_sign();
+    if (specs.alt) checker.require_numeric_argument();
+    if (specs.precision >= 0) checker.check_precision();
+  }
+
+ private:
+  Format format_;
+  PreparedPartsProvider parts_provider_;
+};
+
+template <typename Char> struct part_counter {
+  unsigned num_parts = 0;
+
+  FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) {
+    if (begin != end) ++num_parts;
+  }
+
+  FMT_CONSTEXPR void on_arg_id() { ++num_parts; }
+  FMT_CONSTEXPR void on_arg_id(unsigned) { ++num_parts; }
+  FMT_CONSTEXPR void on_arg_id(basic_string_view<Char>) { ++num_parts; }
+
+  FMT_CONSTEXPR void on_replacement_field(const Char*) {}
+
+  FMT_CONSTEXPR const Char* on_format_specs(const Char* begin,
+                                            const Char* end) {
+    // Find the matching brace.
+    unsigned braces_counter = 0;
+    for (; begin != end; ++begin) {
+      if (*begin == '{') {
+        ++braces_counter;
+      } else if (*begin == '}') {
+        if (braces_counter == 0u) break;
+        --braces_counter;
+      }
+    }
+    return begin;
+  }
+
+  FMT_CONSTEXPR void on_error(const char*) {}
+};
+
+template <typename Format> class compiletime_prepared_parts_type_provider {
+ private:
+  using char_type = char_t<Format>;
+
+  static FMT_CONSTEXPR unsigned count_parts() {
+    FMT_CONSTEXPR_DECL const auto text = to_string_view(Format{});
+    part_counter<char_type> counter;
+    internal::parse_format_string</*IS_CONSTEXPR=*/true>(text, counter);
+    return counter.num_parts;
+  }
+
+// Workaround for old compilers. Compiletime parts preparation will not be
+// performed with them anyway.
+#if FMT_USE_CONSTEXPR
+  static FMT_CONSTEXPR_DECL const unsigned number_of_format_parts =
+      compiletime_prepared_parts_type_provider::count_parts();
+#else
+  static const unsigned number_of_format_parts = 0u;
+#endif
+
+ public:
+  template <unsigned N> struct format_parts_array {
+    using value_type = format_part<char_type>;
+
+    FMT_CONSTEXPR format_parts_array() : arr{} {}
+
+    FMT_CONSTEXPR value_type& operator[](unsigned ind) { return arr[ind]; }
+
+    FMT_CONSTEXPR const value_type* begin() const { return arr; }
+    FMT_CONSTEXPR const value_type* end() const { return begin() + N; }
+
+   private:
+    value_type arr[N];
+  };
+
+  struct empty {
+    // Parts preparator will search for it
+    using value_type = format_part<char_type>;
+  };
+
+  using type = conditional_t<number_of_format_parts != 0,
+                             format_parts_array<number_of_format_parts>, empty>;
+};
+
+template <typename Parts> class compiletime_prepared_parts_collector {
+ private:
+  using format_part = typename Parts::value_type;
+
+ public:
+  FMT_CONSTEXPR explicit compiletime_prepared_parts_collector(Parts& parts)
+      : parts_{parts}, counter_{0u} {}
+
+  FMT_CONSTEXPR void push_back(format_part part) { parts_[counter_++] = part; }
+
+  FMT_CONSTEXPR format_part& back() { return parts_[counter_ - 1]; }
+
+ private:
+  Parts& parts_;
+  unsigned counter_;
+};
+
+template <typename PartsContainer, typename Char>
+FMT_CONSTEXPR PartsContainer prepare_parts(basic_string_view<Char> format) {
+  PartsContainer parts;
+  internal::parse_format_string</*IS_CONSTEXPR=*/false>(
+      format, format_preparation_handler<Char, PartsContainer>(format, parts));
+  return parts;
+}
+
+template <typename PartsContainer, typename Char>
+FMT_CONSTEXPR PartsContainer
+prepare_compiletime_parts(basic_string_view<Char> format) {
+  using collector = compiletime_prepared_parts_collector<PartsContainer>;
+
+  PartsContainer parts;
+  collector c(parts);
+  internal::parse_format_string</*IS_CONSTEXPR=*/true>(
+      format, format_preparation_handler<Char, collector>(format, c));
+  return parts;
+}
+
+template <typename PartsContainer> class runtime_parts_provider {
+ public:
+  runtime_parts_provider() = delete;
+  template <typename Char>
+  runtime_parts_provider(basic_string_view<Char> format)
+      : parts_(prepare_parts<PartsContainer>(format)) {}
+
+  const PartsContainer& parts() const { return parts_; }
+
+ private:
+  PartsContainer parts_;
+};
+
+template <typename Format, typename PartsContainer>
+struct compiletime_parts_provider {
+  compiletime_parts_provider() = delete;
+  template <typename Char>
+  FMT_CONSTEXPR compiletime_parts_provider(basic_string_view<Char>) {}
+
+  const PartsContainer& parts() const {
+    static FMT_CONSTEXPR_DECL const PartsContainer prepared_parts =
+        prepare_compiletime_parts<PartsContainer>(
+            internal::to_string_view(Format{}));
+
+    return prepared_parts;
+  }
+};
+}  // namespace internal
+
+#if FMT_USE_CONSTEXPR
+template <typename... Args, typename S,
+          FMT_ENABLE_IF(is_compile_string<S>::value)>
+FMT_CONSTEXPR auto compile(S format_str) -> internal::prepared_format<
+    S,
+    internal::compiletime_parts_provider<
+        S,
+        typename internal::compiletime_prepared_parts_type_provider<S>::type>,
+    Args...> {
+  return format_str;
+}
+#endif
+
+template <typename... Args, typename Char, size_t N>
+auto compile(const Char (&format_str)[N]) -> internal::prepared_format<
+    std::basic_string<Char>,
+    internal::runtime_parts_provider<std::vector<internal::format_part<Char>>>,
+    Args...> {
+  return std::basic_string<Char>(format_str, N - 1);
+}
+
+template <typename CompiledFormat, typename... Args,
+          typename Char = typename CompiledFormat::char_type>
+std::basic_string<Char> format(const CompiledFormat& cf, const Args&... args) {
+  basic_memory_buffer<Char> buffer;
+  using range = internal::buffer_range<Char>;
+  using context = buffer_context<Char>;
+  cf.template vformat_to<range, context>(range(buffer),
+                                         {make_format_args<context>(args...)});
+  return to_string(buffer);
+}
+
+template <typename OutputIt, typename CompiledFormat, typename... Args>
+OutputIt format_to(OutputIt out, const CompiledFormat& cf,
+                   const Args&... args) {
+  using char_type = typename CompiledFormat::char_type;
+  using range = internal::output_range<OutputIt, char_type>;
+  using context = format_context_t<OutputIt, char_type>;
+  return cf.template vformat_to<range, context>(
+      range(out), {make_format_args<context>(args...)});
+}
+
+template <typename OutputIt, typename CompiledFormat, typename... Args,
+          FMT_ENABLE_IF(internal::is_output_iterator<OutputIt>::value)>
+format_to_n_result<OutputIt> format_to_n(OutputIt out, size_t n,
+                                         const CompiledFormat& cf,
+                                         const Args&... args) {
+  auto it =
+      format_to(internal::truncating_iterator<OutputIt>(out, n), cf, args...);
+  return {it.base(), it.count()};
+}
+
+template <typename CompiledFormat, typename... Args>
+std::size_t formatted_size(const CompiledFormat& cf, const Args&... args) {
+  return fmt::format_to(
+             internal::counting_iterator<typename CompiledFormat::char_type>(),
+             cf, args...)
+      .count();
+}
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_COMPILE_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/core.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/core.h
new file mode 100644
index 000000000..29a1281bb
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/core.h
@@ -0,0 +1,1420 @@
+// Formatting library for C++ - the core API
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_CORE_H_
+#define FMT_CORE_H_
+
+#include <cassert>
+#include <cstdio>  // std::FILE
+#include <cstring>
+#include <iterator>
+#include <string>
+#include <type_traits>
+
+// The fmt library version in the form major * 10000 + minor * 100 + patch.
+#define FMT_VERSION 60000
+
+#ifdef __has_feature
+#  define FMT_HAS_FEATURE(x) __has_feature(x)
+#else
+#  define FMT_HAS_FEATURE(x) 0
+#endif
+
+#if defined(__has_include) && !defined(__INTELLISENSE__) && \
+    !(defined(__INTEL_COMPILER) && __INTEL_COMPILER < 1600)
+#  define FMT_HAS_INCLUDE(x) __has_include(x)
+#else
+#  define FMT_HAS_INCLUDE(x) 0
+#endif
+
+#ifdef __has_cpp_attribute
+#  define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+#  define FMT_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+
+#if defined(__GNUC__) && !defined(__clang__)
+#  define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+#  define FMT_GCC_VERSION 0
+#endif
+
+#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__)
+#  define FMT_HAS_GXX_CXX11 FMT_GCC_VERSION
+#else
+#  define FMT_HAS_GXX_CXX11 0
+#endif
+
+#ifdef _MSC_VER
+#  define FMT_MSC_VER _MSC_VER
+#else
+#  define FMT_MSC_VER 0
+#endif
+
+// Check if relaxed C++14 constexpr is supported.
+// GCC doesn't allow throw in constexpr until version 6 (bug 67371).
+#ifndef FMT_USE_CONSTEXPR
+#  define FMT_USE_CONSTEXPR                                           \
+    (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VER >= 1910 || \
+     (FMT_GCC_VERSION >= 600 && __cplusplus >= 201402L))
+#endif
+#if FMT_USE_CONSTEXPR
+#  define FMT_CONSTEXPR constexpr
+#  define FMT_CONSTEXPR_DECL constexpr
+#else
+#  define FMT_CONSTEXPR inline
+#  define FMT_CONSTEXPR_DECL
+#endif
+
+#ifndef FMT_OVERRIDE
+#  if FMT_HAS_FEATURE(cxx_override) || \
+      (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900
+#    define FMT_OVERRIDE override
+#  else
+#    define FMT_OVERRIDE
+#  endif
+#endif
+
+// Check if exceptions are disabled.
+#ifndef FMT_EXCEPTIONS
+#  if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \
+      FMT_MSC_VER && !_HAS_EXCEPTIONS
+#    define FMT_EXCEPTIONS 0
+#  else
+#    define FMT_EXCEPTIONS 1
+#  endif
+#endif
+
+// Define FMT_USE_NOEXCEPT to make fmt use noexcept (C++11 feature).
+#ifndef FMT_USE_NOEXCEPT
+#  define FMT_USE_NOEXCEPT 0
+#endif
+
+#if FMT_USE_NOEXCEPT || FMT_HAS_FEATURE(cxx_noexcept) || \
+    (FMT_GCC_VERSION >= 408 && FMT_HAS_GXX_CXX11) || FMT_MSC_VER >= 1900
+#  define FMT_DETECTED_NOEXCEPT noexcept
+#  define FMT_HAS_CXX11_NOEXCEPT 1
+#else
+#  define FMT_DETECTED_NOEXCEPT throw()
+#  define FMT_HAS_CXX11_NOEXCEPT 0
+#endif
+
+#ifndef FMT_NOEXCEPT
+#  if FMT_EXCEPTIONS || FMT_HAS_CXX11_NOEXCEPT
+#    define FMT_NOEXCEPT FMT_DETECTED_NOEXCEPT
+#  else
+#    define FMT_NOEXCEPT
+#  endif
+#endif
+
+// [[noreturn]] is disabled on MSVC because of bogus unreachable code warnings.
+#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VER
+#  define FMT_NORETURN [[noreturn]]
+#else
+#  define FMT_NORETURN
+#endif
+
+#ifndef FMT_DEPRECATED
+#  if (FMT_HAS_CPP_ATTRIBUTE(deprecated) && __cplusplus >= 201402L) || \
+      FMT_MSC_VER >= 1900
+#    define FMT_DEPRECATED [[deprecated]]
+#  else
+#    if defined(__GNUC__) || defined(__clang__)
+#      define FMT_DEPRECATED __attribute__((deprecated))
+#    elif FMT_MSC_VER
+#      define FMT_DEPRECATED __declspec(deprecated)
+#    else
+#      define FMT_DEPRECATED /* deprecated */
+#    endif
+#  endif
+#endif
+// Workaround broken [[deprecated]] in the Intel compiler.
+#if defined(__INTEL_COMPILER) || defined(__NVCC__)
+#  define FMT_DEPRECATED_ALIAS
+#else
+#  define FMT_DEPRECATED_ALIAS FMT_DEPRECATED
+#endif
+
+#ifndef FMT_BEGIN_NAMESPACE
+#  if FMT_HAS_FEATURE(cxx_inline_namespaces) || FMT_GCC_VERSION >= 404 || \
+      FMT_MSC_VER >= 1900
+#    define FMT_INLINE_NAMESPACE inline namespace
+#    define FMT_END_NAMESPACE \
+      }                       \
+      }
+#  else
+#    define FMT_INLINE_NAMESPACE namespace
+#    define FMT_END_NAMESPACE \
+      }                       \
+      using namespace v6;     \
+      }
+#  endif
+#  define FMT_BEGIN_NAMESPACE \
+    namespace fmt {           \
+    FMT_INLINE_NAMESPACE v6 {
+#endif
+
+#if !defined(FMT_HEADER_ONLY) && defined(_WIN32)
+#  ifdef FMT_EXPORT
+#    define FMT_API __declspec(dllexport)
+#  elif defined(FMT_SHARED)
+#    define FMT_API __declspec(dllimport)
+#    define FMT_EXTERN_TEMPLATE_API FMT_API
+#  endif
+#endif
+#ifndef FMT_API
+#  define FMT_API
+#endif
+#ifndef FMT_EXTERN_TEMPLATE_API
+#  define FMT_EXTERN_TEMPLATE_API
+#endif
+
+#ifndef FMT_HEADER_ONLY
+#  define FMT_EXTERN extern
+#else
+#  define FMT_EXTERN
+#endif
+
+#ifndef FMT_ASSERT
+#  define FMT_ASSERT(condition, message) assert((condition) && message)
+#endif
+
+// libc++ supports string_view in pre-c++17.
+#if (FMT_HAS_INCLUDE(<string_view>) &&                       \
+     (__cplusplus > 201402L || defined(_LIBCPP_VERSION))) || \
+    (defined(_MSVC_LANG) && _MSVC_LANG > 201402L && _MSC_VER >= 1910)
+#  include <string_view>
+#  define FMT_USE_STRING_VIEW
+#elif FMT_HAS_INCLUDE("experimental/string_view") && __cplusplus >= 201402L
+#  include <experimental/string_view>
+#  define FMT_USE_EXPERIMENTAL_STRING_VIEW
+#endif
+
+FMT_BEGIN_NAMESPACE
+
+// Implementations of enable_if_t and other types for pre-C++14 systems.
+template <bool B, class T = void>
+using enable_if_t = typename std::enable_if<B, T>::type;
+template <bool B, class T, class F>
+using conditional_t = typename std::conditional<B, T, F>::type;
+template <bool B> using bool_constant = std::integral_constant<bool, B>;
+template <typename T>
+using remove_reference_t = typename std::remove_reference<T>::type;
+template <typename T>
+using remove_const_t = typename std::remove_const<T>::type;
+
+struct monostate {};
+
+// An enable_if helper to be used in template parameters which results in much
+// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed
+// to workaround a bug in MSVC 2019 (see #1140 and #1186).
+#define FMT_ENABLE_IF(...) enable_if_t<(__VA_ARGS__), int> = 0
+
+namespace internal {
+
+// A workaround for gcc 4.8 to make void_t work in a SFINAE context.
+template <typename... Ts> struct void_t_impl { using type = void; };
+
+#if defined(FMT_USE_STRING_VIEW)
+template <typename Char> using std_string_view = std::basic_string_view<Char>;
+#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW)
+template <typename Char>
+using std_string_view = std::experimental::basic_string_view<Char>;
+#else
+template <typename T> struct std_string_view {};
+#endif
+
+// Casts nonnegative integer to unsigned.
+template <typename Int>
+FMT_CONSTEXPR typename std::make_unsigned<Int>::type to_unsigned(Int value) {
+  FMT_ASSERT(value >= 0, "negative value");
+  return static_cast<typename std::make_unsigned<Int>::type>(value);
+}
+}  // namespace internal
+
+template <typename... Ts>
+using void_t = typename internal::void_t_impl<Ts...>::type;
+
+/**
+  An implementation of ``std::basic_string_view`` for pre-C++17. It provides a
+  subset of the API. ``fmt::basic_string_view`` is used for format strings even
+  if ``std::string_view`` is available to prevent issues when a library is
+  compiled with a different ``-std`` option than the client code (which is not
+  recommended).
+ */
+template <typename Char> class basic_string_view {
+ private:
+  const Char* data_;
+  size_t size_;
+
+ public:
+  using char_type = Char;
+  using iterator = const Char*;
+
+  FMT_CONSTEXPR basic_string_view() FMT_NOEXCEPT : data_(nullptr), size_(0) {}
+
+  /** Constructs a string reference object from a C string and a size. */
+  FMT_CONSTEXPR basic_string_view(const Char* s, size_t count) FMT_NOEXCEPT
+      : data_(s),
+        size_(count) {}
+
+  /**
+    \rst
+    Constructs a string reference object from a C string computing
+    the size with ``std::char_traits<Char>::length``.
+    \endrst
+   */
+  basic_string_view(const Char* s)
+      : data_(s), size_(std::char_traits<Char>::length(s)) {}
+
+  /** Constructs a string reference from a ``std::basic_string`` object. */
+  template <typename Alloc>
+  FMT_CONSTEXPR basic_string_view(const std::basic_string<Char, Alloc>& s)
+      FMT_NOEXCEPT : data_(s.data()),
+                     size_(s.size()) {}
+
+  template <
+      typename S,
+      FMT_ENABLE_IF(std::is_same<S, internal::std_string_view<Char>>::value)>
+  FMT_CONSTEXPR basic_string_view(S s) FMT_NOEXCEPT : data_(s.data()),
+                                                      size_(s.size()) {}
+
+  /** Returns a pointer to the string data. */
+  FMT_CONSTEXPR const Char* data() const { return data_; }
+
+  /** Returns the string size. */
+  FMT_CONSTEXPR size_t size() const { return size_; }
+
+  FMT_CONSTEXPR iterator begin() const { return data_; }
+  FMT_CONSTEXPR iterator end() const { return data_ + size_; }
+
+  FMT_CONSTEXPR void remove_prefix(size_t n) {
+    data_ += n;
+    size_ -= n;
+  }
+
+  // Lexicographically compare this string reference to other.
+  int compare(basic_string_view other) const {
+    size_t str_size = size_ < other.size_ ? size_ : other.size_;
+    int result = std::char_traits<Char>::compare(data_, other.data_, str_size);
+    if (result == 0)
+      result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1);
+    return result;
+  }
+
+  friend bool operator==(basic_string_view lhs, basic_string_view rhs) {
+    return lhs.compare(rhs) == 0;
+  }
+  friend bool operator!=(basic_string_view lhs, basic_string_view rhs) {
+    return lhs.compare(rhs) != 0;
+  }
+  friend bool operator<(basic_string_view lhs, basic_string_view rhs) {
+    return lhs.compare(rhs) < 0;
+  }
+  friend bool operator<=(basic_string_view lhs, basic_string_view rhs) {
+    return lhs.compare(rhs) <= 0;
+  }
+  friend bool operator>(basic_string_view lhs, basic_string_view rhs) {
+    return lhs.compare(rhs) > 0;
+  }
+  friend bool operator>=(basic_string_view lhs, basic_string_view rhs) {
+    return lhs.compare(rhs) >= 0;
+  }
+};
+
+using string_view = basic_string_view<char>;
+using wstring_view = basic_string_view<wchar_t>;
+
+#ifndef __cpp_char8_t
+// A UTF-8 code unit type.
+enum char8_t : unsigned char {};
+#endif
+
+/** Specifies if ``T`` is a character type. Can be specialized by users. */
+template <typename T> struct is_char : std::false_type {};
+template <> struct is_char<char> : std::true_type {};
+template <> struct is_char<wchar_t> : std::true_type {};
+template <> struct is_char<char8_t> : std::true_type {};
+template <> struct is_char<char16_t> : std::true_type {};
+template <> struct is_char<char32_t> : std::true_type {};
+
+/**
+  \rst
+  Returns a string view of `s`. In order to add custom string type support to
+  {fmt} provide an overload of `to_string_view` for it in the same namespace as
+  the type for the argument-dependent lookup to work.
+
+  **Example**::
+
+    namespace my_ns {
+    inline string_view to_string_view(const my_string& s) {
+      return {s.data(), s.length()};
+    }
+    }
+    std::string message = fmt::format(my_string("The answer is {}"), 42);
+  \endrst
+ */
+template <typename Char, FMT_ENABLE_IF(is_char<Char>::value)>
+inline basic_string_view<Char> to_string_view(const Char* s) {
+  return s;
+}
+
+template <typename Char, typename Traits, typename Allocator>
+inline basic_string_view<Char> to_string_view(
+    const std::basic_string<Char, Traits, Allocator>& s) {
+  return {s.data(), s.size()};
+}
+
+template <typename Char>
+inline basic_string_view<Char> to_string_view(basic_string_view<Char> s) {
+  return s;
+}
+
+template <typename Char,
+          FMT_ENABLE_IF(!std::is_empty<internal::std_string_view<Char>>::value)>
+inline basic_string_view<Char> to_string_view(
+    internal::std_string_view<Char> s) {
+  return s;
+}
+
+// A base class for compile-time strings. It is defined in the fmt namespace to
+// make formatting functions visible via ADL, e.g. format(fmt("{}"), 42).
+struct compile_string {};
+
+template <typename S>
+struct is_compile_string : std::is_base_of<compile_string, S> {};
+
+template <typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
+constexpr basic_string_view<typename S::char_type> to_string_view(const S& s) {
+  return s;
+}
+
+namespace internal {
+void to_string_view(...);
+using fmt::v6::to_string_view;
+
+// Specifies whether S is a string type convertible to fmt::basic_string_view.
+// It should be a constexpr function but MSVC 2017 fails to compile it in
+// enable_if and MSVC 2015 fails to compile it as an alias template.
+template <typename S>
+struct is_string : std::is_class<decltype(to_string_view(std::declval<S>()))> {
+};
+
+template <typename S, typename = void> struct char_t_impl {};
+template <typename S> struct char_t_impl<S, enable_if_t<is_string<S>::value>> {
+  using result = decltype(to_string_view(std::declval<S>()));
+  using type = typename result::char_type;
+};
+
+struct error_handler {
+  FMT_CONSTEXPR error_handler() {}
+  FMT_CONSTEXPR error_handler(const error_handler&) {}
+
+  // This function is intentionally not constexpr to give a compile-time error.
+  FMT_NORETURN FMT_API void on_error(const char* message);
+};
+}  // namespace internal
+
+/** String's character type. */
+template <typename S> using char_t = typename internal::char_t_impl<S>::type;
+
+// Parsing context consisting of a format string range being parsed and an
+// argument counter for automatic indexing.
+template <typename Char, typename ErrorHandler = internal::error_handler>
+class basic_parse_context : private ErrorHandler {
+ private:
+  basic_string_view<Char> format_str_;
+  int next_arg_id_;
+
+ public:
+  using char_type = Char;
+  using iterator = typename basic_string_view<Char>::iterator;
+
+  explicit FMT_CONSTEXPR basic_parse_context(basic_string_view<Char> format_str,
+                                             ErrorHandler eh = ErrorHandler())
+      : ErrorHandler(eh), format_str_(format_str), next_arg_id_(0) {}
+
+  // Returns an iterator to the beginning of the format string range being
+  // parsed.
+  FMT_CONSTEXPR iterator begin() const FMT_NOEXCEPT {
+    return format_str_.begin();
+  }
+
+  // Returns an iterator past the end of the format string range being parsed.
+  FMT_CONSTEXPR iterator end() const FMT_NOEXCEPT { return format_str_.end(); }
+
+  // Advances the begin iterator to ``it``.
+  FMT_CONSTEXPR void advance_to(iterator it) {
+    format_str_.remove_prefix(internal::to_unsigned(it - begin()));
+  }
+
+  // Returns the next argument index.
+  FMT_CONSTEXPR int next_arg_id() {
+    if (next_arg_id_ >= 0) return next_arg_id_++;
+    on_error("cannot switch from manual to automatic argument indexing");
+    return 0;
+  }
+
+  FMT_CONSTEXPR bool check_arg_id(int) {
+    if (next_arg_id_ > 0) {
+      on_error("cannot switch from automatic to manual argument indexing");
+      return false;
+    }
+    next_arg_id_ = -1;
+    return true;
+  }
+
+  FMT_CONSTEXPR void check_arg_id(basic_string_view<Char>) {}
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    ErrorHandler::on_error(message);
+  }
+
+  FMT_CONSTEXPR ErrorHandler error_handler() const { return *this; }
+};
+
+using format_parse_context = basic_parse_context<char>;
+using wformat_parse_context = basic_parse_context<wchar_t>;
+
+using parse_context FMT_DEPRECATED_ALIAS = basic_parse_context<char>;
+using wparse_context FMT_DEPRECATED_ALIAS = basic_parse_context<wchar_t>;
+
+template <typename Context> class basic_format_arg;
+template <typename Context> class basic_format_args;
+
+// A formatter for objects of type T.
+template <typename T, typename Char = char, typename Enable = void>
+struct formatter {
+  // A deleted default constructor indicates a disabled formatter.
+  formatter() = delete;
+};
+
+template <typename T, typename Char, typename Enable = void>
+struct FMT_DEPRECATED convert_to_int
+    : bool_constant<!std::is_arithmetic<T>::value &&
+                    std::is_convertible<T, int>::value> {};
+
+namespace internal {
+
+// Specifies if T has an enabled formatter specialization. A type can be
+// formattable even if it doesn't have a formatter e.g. via a conversion.
+template <typename T, typename Context>
+using has_formatter =
+    std::is_constructible<typename Context::template formatter_type<T>>;
+
+/** A contiguous memory buffer with an optional growing ability. */
+template <typename T> class buffer {
+ private:
+  buffer(const buffer&) = delete;
+  void operator=(const buffer&) = delete;
+
+  T* ptr_;
+  std::size_t size_;
+  std::size_t capacity_;
+
+ protected:
+  // Don't initialize ptr_ since it is not accessed to save a few cycles.
+  buffer(std::size_t sz) FMT_NOEXCEPT : size_(sz), capacity_(sz) {}
+
+  buffer(T* p = nullptr, std::size_t sz = 0, std::size_t cap = 0) FMT_NOEXCEPT
+      : ptr_(p),
+        size_(sz),
+        capacity_(cap) {}
+
+  /** Sets the buffer data and capacity. */
+  void set(T* buf_data, std::size_t buf_capacity) FMT_NOEXCEPT {
+    ptr_ = buf_data;
+    capacity_ = buf_capacity;
+  }
+
+  /** Increases the buffer capacity to hold at least *capacity* elements. */
+  virtual void grow(std::size_t capacity) = 0;
+
+ public:
+  using value_type = T;
+  using const_reference = const T&;
+
+  virtual ~buffer() {}
+
+  T* begin() FMT_NOEXCEPT { return ptr_; }
+  T* end() FMT_NOEXCEPT { return ptr_ + size_; }
+
+  /** Returns the size of this buffer. */
+  std::size_t size() const FMT_NOEXCEPT { return size_; }
+
+  /** Returns the capacity of this buffer. */
+  std::size_t capacity() const FMT_NOEXCEPT { return capacity_; }
+
+  /** Returns a pointer to the buffer data. */
+  T* data() FMT_NOEXCEPT { return ptr_; }
+
+  /** Returns a pointer to the buffer data. */
+  const T* data() const FMT_NOEXCEPT { return ptr_; }
+
+  /**
+    Resizes the buffer. If T is a POD type new elements may not be initialized.
+   */
+  void resize(std::size_t new_size) {
+    reserve(new_size);
+    size_ = new_size;
+  }
+
+  /** Clears this buffer. */
+  void clear() { size_ = 0; }
+
+  /** Reserves space to store at least *capacity* elements. */
+  void reserve(std::size_t new_capacity) {
+    if (new_capacity > capacity_) grow(new_capacity);
+  }
+
+  void push_back(const T& value) {
+    reserve(size_ + 1);
+    ptr_[size_++] = value;
+  }
+
+  /** Appends data to the end of the buffer. */
+  template <typename U> void append(const U* begin, const U* end);
+
+  T& operator[](std::size_t index) { return ptr_[index]; }
+  const T& operator[](std::size_t index) const { return ptr_[index]; }
+};
+
+// A container-backed buffer.
+template <typename Container>
+class container_buffer : public buffer<typename Container::value_type> {
+ private:
+  Container& container_;
+
+ protected:
+  void grow(std::size_t capacity) FMT_OVERRIDE {
+    container_.resize(capacity);
+    this->set(&container_[0], capacity);
+  }
+
+ public:
+  explicit container_buffer(Container& c)
+      : buffer<typename Container::value_type>(c.size()), container_(c) {}
+};
+
+// Extracts a reference to the container from back_insert_iterator.
+template <typename Container>
+inline Container& get_container(std::back_insert_iterator<Container> it) {
+  using bi_iterator = std::back_insert_iterator<Container>;
+  struct accessor : bi_iterator {
+    accessor(bi_iterator iter) : bi_iterator(iter) {}
+    using bi_iterator::container;
+  };
+  return *accessor(it).container;
+}
+
+template <typename T, typename Char = char, typename Enable = void>
+struct fallback_formatter {
+  fallback_formatter() = delete;
+};
+
+// Specifies if T has an enabled fallback_formatter specialization.
+template <typename T, typename Context>
+using has_fallback_formatter =
+    std::is_constructible<fallback_formatter<T, typename Context::char_type>>;
+
+template <typename Char> struct named_arg_base;
+template <typename T, typename Char> struct named_arg;
+
+enum type {
+  none_type,
+  named_arg_type,
+  // Integer types should go first,
+  int_type,
+  uint_type,
+  long_long_type,
+  ulong_long_type,
+  bool_type,
+  char_type,
+  last_integer_type = char_type,
+  // followed by floating-point types.
+  double_type,
+  long_double_type,
+  last_numeric_type = long_double_type,
+  cstring_type,
+  string_type,
+  pointer_type,
+  custom_type
+};
+
+// Maps core type T to the corresponding type enum constant.
+template <typename T, typename Char>
+struct type_constant : std::integral_constant<type, custom_type> {};
+
+#define FMT_TYPE_CONSTANT(Type, constant) \
+  template <typename Char>                \
+  struct type_constant<Type, Char> : std::integral_constant<type, constant> {}
+
+FMT_TYPE_CONSTANT(const named_arg_base<Char>&, named_arg_type);
+FMT_TYPE_CONSTANT(int, int_type);
+FMT_TYPE_CONSTANT(unsigned, uint_type);
+FMT_TYPE_CONSTANT(long long, long_long_type);
+FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type);
+FMT_TYPE_CONSTANT(bool, bool_type);
+FMT_TYPE_CONSTANT(Char, char_type);
+FMT_TYPE_CONSTANT(double, double_type);
+FMT_TYPE_CONSTANT(long double, long_double_type);
+FMT_TYPE_CONSTANT(const Char*, cstring_type);
+FMT_TYPE_CONSTANT(basic_string_view<Char>, string_type);
+FMT_TYPE_CONSTANT(const void*, pointer_type);
+
+FMT_CONSTEXPR bool is_integral(type t) {
+  FMT_ASSERT(t != named_arg_type, "invalid argument type");
+  return t > none_type && t <= last_integer_type;
+}
+
+FMT_CONSTEXPR bool is_arithmetic(type t) {
+  FMT_ASSERT(t != named_arg_type, "invalid argument type");
+  return t > none_type && t <= last_numeric_type;
+}
+
+template <typename Char> struct string_value {
+  const Char* data;
+  std::size_t size;
+};
+
+template <typename Context> struct custom_value {
+  using parse_context = basic_parse_context<typename Context::char_type>;
+  const void* value;
+  void (*format)(const void* arg, parse_context& parse_ctx, Context& ctx);
+};
+
+// A formatting argument value.
+template <typename Context> class value {
+ public:
+  using char_type = typename Context::char_type;
+
+  union {
+    int int_value;
+    unsigned uint_value;
+    long long long_long_value;
+    unsigned long long ulong_long_value;
+    bool bool_value;
+    char_type char_value;
+    double double_value;
+    long double long_double_value;
+    const void* pointer;
+    string_value<char_type> string;
+    custom_value<Context> custom;
+    const named_arg_base<char_type>* named_arg;
+  };
+
+  FMT_CONSTEXPR value(int val = 0) : int_value(val) {}
+  FMT_CONSTEXPR value(unsigned val) : uint_value(val) {}
+  value(long long val) : long_long_value(val) {}
+  value(unsigned long long val) : ulong_long_value(val) {}
+  value(double val) : double_value(val) {}
+  value(long double val) : long_double_value(val) {}
+  value(bool val) : bool_value(val) {}
+  value(char_type val) : char_value(val) {}
+  value(const char_type* val) { string.data = val; }
+  value(basic_string_view<char_type> val) {
+    string.data = val.data();
+    string.size = val.size();
+  }
+  value(const void* val) : pointer(val) {}
+
+  template <typename T> value(const T& val) {
+    custom.value = &val;
+    // Get the formatter type through the context to allow different contexts
+    // have different extension points, e.g. `formatter<T>` for `format` and
+    // `printf_formatter<T>` for `printf`.
+    custom.format = format_custom_arg<
+        T, conditional_t<has_formatter<T, Context>::value,
+                         typename Context::template formatter_type<T>,
+                         fallback_formatter<T, char_type>>>;
+  }
+
+  value(const named_arg_base<char_type>& val) { named_arg = &val; }
+
+ private:
+  // Formats an argument of a custom type, such as a user-defined class.
+  template <typename T, typename Formatter>
+  static void format_custom_arg(const void* arg,
+                                basic_parse_context<char_type>& parse_ctx,
+                                Context& ctx) {
+    Formatter f;
+    parse_ctx.advance_to(f.parse(parse_ctx));
+    ctx.advance_to(f.format(*static_cast<const T*>(arg), ctx));
+  }
+};
+
+template <typename Context, typename T>
+FMT_CONSTEXPR basic_format_arg<Context> make_arg(const T& value);
+
+// To minimize the number of types we need to deal with, long is translated
+// either to int or to long long depending on its size.
+enum { long_short = sizeof(long) == sizeof(int) };
+using long_type = conditional_t<long_short, int, long long>;
+using ulong_type = conditional_t<long_short, unsigned, unsigned long long>;
+
+// Maps formatting arguments to core types.
+template <typename Context> struct arg_mapper {
+  using char_type = typename Context::char_type;
+
+  FMT_CONSTEXPR int map(signed char val) { return val; }
+  FMT_CONSTEXPR unsigned map(unsigned char val) { return val; }
+  FMT_CONSTEXPR int map(short val) { return val; }
+  FMT_CONSTEXPR unsigned map(unsigned short val) { return val; }
+  FMT_CONSTEXPR int map(int val) { return val; }
+  FMT_CONSTEXPR unsigned map(unsigned val) { return val; }
+  FMT_CONSTEXPR long_type map(long val) { return val; }
+  FMT_CONSTEXPR ulong_type map(unsigned long val) { return val; }
+  FMT_CONSTEXPR long long map(long long val) { return val; }
+  FMT_CONSTEXPR unsigned long long map(unsigned long long val) { return val; }
+  FMT_CONSTEXPR bool map(bool val) { return val; }
+
+  template <typename T, FMT_ENABLE_IF(is_char<T>::value)>
+  FMT_CONSTEXPR char_type map(T val) {
+    static_assert(
+        std::is_same<T, char>::value || std::is_same<T, char_type>::value,
+        "mixing character types is disallowed");
+    return val;
+  }
+
+  FMT_CONSTEXPR double map(float val) { return static_cast<double>(val); }
+  FMT_CONSTEXPR double map(double val) { return val; }
+  FMT_CONSTEXPR long double map(long double val) { return val; }
+
+  FMT_CONSTEXPR const char_type* map(char_type* val) { return val; }
+  FMT_CONSTEXPR const char_type* map(const char_type* val) { return val; }
+  template <typename T, FMT_ENABLE_IF(is_string<T>::value)>
+  FMT_CONSTEXPR basic_string_view<char_type> map(const T& val) {
+    static_assert(std::is_same<char_type, char_t<T>>::value,
+                  "mixing character types is disallowed");
+    return to_string_view(val);
+  }
+  template <typename T,
+            FMT_ENABLE_IF(
+                std::is_constructible<basic_string_view<char_type>, T>::value &&
+                !is_string<T>::value)>
+  FMT_CONSTEXPR basic_string_view<char_type> map(const T& val) {
+    return basic_string_view<char_type>(val);
+  }
+  FMT_CONSTEXPR const char* map(const signed char* val) {
+    static_assert(std::is_same<char_type, char>::value, "invalid string type");
+    return reinterpret_cast<const char*>(val);
+  }
+  FMT_CONSTEXPR const char* map(const unsigned char* val) {
+    static_assert(std::is_same<char_type, char>::value, "invalid string type");
+    return reinterpret_cast<const char*>(val);
+  }
+
+  FMT_CONSTEXPR const void* map(void* val) { return val; }
+  FMT_CONSTEXPR const void* map(const void* val) { return val; }
+  FMT_CONSTEXPR const void* map(std::nullptr_t val) { return val; }
+  template <typename T> FMT_CONSTEXPR int map(const T*) {
+    // Formatting of arbitrary pointers is disallowed. If you want to output
+    // a pointer cast it to "void *" or "const void *". In particular, this
+    // forbids formatting of "[const] volatile char *" which is printed as bool
+    // by iostreams.
+    static_assert(!sizeof(T), "formatting of non-void pointers is disallowed");
+    return 0;
+  }
+
+  template <typename T,
+            FMT_ENABLE_IF(std::is_enum<T>::value &&
+                          !has_formatter<T, Context>::value &&
+                          !has_fallback_formatter<T, Context>::value)>
+  FMT_CONSTEXPR int map(const T& val) {
+    return static_cast<int>(val);
+  }
+  template <typename T,
+            FMT_ENABLE_IF(!is_string<T>::value && !is_char<T>::value &&
+                          (has_formatter<T, Context>::value ||
+                           has_fallback_formatter<T, Context>::value))>
+  FMT_CONSTEXPR const T& map(const T& val) {
+    return val;
+  }
+
+  template <typename T>
+  FMT_CONSTEXPR const named_arg_base<char_type>& map(
+      const named_arg<T, char_type>& val) {
+    auto arg = make_arg<Context>(val.value);
+    std::memcpy(val.data, &arg, sizeof(arg));
+    return val;
+  }
+};
+
+// A type constant after applying arg_mapper<Context>.
+template <typename T, typename Context>
+using mapped_type_constant =
+    type_constant<decltype(arg_mapper<Context>().map(std::declval<T>())),
+                  typename Context::char_type>;
+
+// Maximum number of arguments with packed types.
+enum { max_packed_args = 15 };
+enum : unsigned long long { is_unpacked_bit = 1ull << 63 };
+
+template <typename Context> class arg_map;
+}  // namespace internal
+
+// A formatting argument. It is a trivially copyable/constructible type to
+// allow storage in basic_memory_buffer.
+template <typename Context> class basic_format_arg {
+ private:
+  internal::value<Context> value_;
+  internal::type type_;
+
+  template <typename ContextType, typename T>
+  friend FMT_CONSTEXPR basic_format_arg<ContextType> internal::make_arg(
+      const T& value);
+
+  template <typename Visitor, typename Ctx>
+  friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis,
+                                             const basic_format_arg<Ctx>& arg)
+      -> decltype(vis(0));
+
+  friend class basic_format_args<Context>;
+  friend class internal::arg_map<Context>;
+
+  using char_type = typename Context::char_type;
+
+ public:
+  class handle {
+   public:
+    explicit handle(internal::custom_value<Context> custom) : custom_(custom) {}
+
+    void format(basic_parse_context<char_type>& parse_ctx, Context& ctx) const {
+      custom_.format(custom_.value, parse_ctx, ctx);
+    }
+
+   private:
+    internal::custom_value<Context> custom_;
+  };
+
+  FMT_CONSTEXPR basic_format_arg() : type_(internal::none_type) {}
+
+  FMT_CONSTEXPR explicit operator bool() const FMT_NOEXCEPT {
+    return type_ != internal::none_type;
+  }
+
+  internal::type type() const { return type_; }
+
+  bool is_integral() const { return internal::is_integral(type_); }
+  bool is_arithmetic() const { return internal::is_arithmetic(type_); }
+};
+
+/**
+  \rst
+  Visits an argument dispatching to the appropriate visit method based on
+  the argument type. For example, if the argument type is ``double`` then
+  ``vis(value)`` will be called with the value of type ``double``.
+  \endrst
+ */
+template <typename Visitor, typename Context>
+FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis,
+                                    const basic_format_arg<Context>& arg)
+    -> decltype(vis(0)) {
+  using char_type = typename Context::char_type;
+  switch (arg.type_) {
+  case internal::none_type:
+    break;
+  case internal::named_arg_type:
+    FMT_ASSERT(false, "invalid argument type");
+    break;
+  case internal::int_type:
+    return vis(arg.value_.int_value);
+  case internal::uint_type:
+    return vis(arg.value_.uint_value);
+  case internal::long_long_type:
+    return vis(arg.value_.long_long_value);
+  case internal::ulong_long_type:
+    return vis(arg.value_.ulong_long_value);
+  case internal::bool_type:
+    return vis(arg.value_.bool_value);
+  case internal::char_type:
+    return vis(arg.value_.char_value);
+  case internal::double_type:
+    return vis(arg.value_.double_value);
+  case internal::long_double_type:
+    return vis(arg.value_.long_double_value);
+  case internal::cstring_type:
+    return vis(arg.value_.string.data);
+  case internal::string_type:
+    return vis(basic_string_view<char_type>(arg.value_.string.data,
+                                            arg.value_.string.size));
+  case internal::pointer_type:
+    return vis(arg.value_.pointer);
+  case internal::custom_type:
+    return vis(typename basic_format_arg<Context>::handle(arg.value_.custom));
+  }
+  return vis(monostate());
+}
+
+namespace internal {
+// A map from argument names to their values for named arguments.
+template <typename Context> class arg_map {
+ private:
+  arg_map(const arg_map&) = delete;
+  void operator=(const arg_map&) = delete;
+
+  using char_type = typename Context::char_type;
+
+  struct entry {
+    basic_string_view<char_type> name;
+    basic_format_arg<Context> arg;
+  };
+
+  entry* map_;
+  unsigned size_;
+
+  void push_back(value<Context> val) {
+    const auto& named = *val.named_arg;
+    map_[size_] = {named.name, named.template deserialize<Context>()};
+    ++size_;
+  }
+
+ public:
+  arg_map() : map_(nullptr), size_(0) {}
+  void init(const basic_format_args<Context>& args);
+  ~arg_map() { delete[] map_; }
+
+  basic_format_arg<Context> find(basic_string_view<char_type> name) const {
+    // The list is unsorted, so just return the first matching name.
+    for (entry *it = map_, *end = map_ + size_; it != end; ++it) {
+      if (it->name == name) return it->arg;
+    }
+    return {};
+  }
+};
+
+// A type-erased reference to an std::locale to avoid heavy <locale> include.
+class locale_ref {
+ private:
+  const void* locale_;  // A type-erased pointer to std::locale.
+
+ public:
+  locale_ref() : locale_(nullptr) {}
+  template <typename Locale> explicit locale_ref(const Locale& loc);
+
+  template <typename Locale> Locale get() const;
+};
+
+template <typename> constexpr unsigned long long encode_types() { return 0; }
+
+template <typename Context, typename Arg, typename... Args>
+constexpr unsigned long long encode_types() {
+  return mapped_type_constant<Arg, Context>::value |
+         (encode_types<Context, Args...>() << 4);
+}
+
+template <typename Context, typename T>
+FMT_CONSTEXPR basic_format_arg<Context> make_arg(const T& value) {
+  basic_format_arg<Context> arg;
+  arg.type_ = mapped_type_constant<T, Context>::value;
+  arg.value_ = arg_mapper<Context>().map(value);
+  return arg;
+}
+
+template <bool IS_PACKED, typename Context, typename T,
+          FMT_ENABLE_IF(IS_PACKED)>
+inline value<Context> make_arg(const T& val) {
+  return arg_mapper<Context>().map(val);
+}
+
+template <bool IS_PACKED, typename Context, typename T,
+          FMT_ENABLE_IF(!IS_PACKED)>
+inline basic_format_arg<Context> make_arg(const T& value) {
+  return make_arg<Context>(value);
+}
+}  // namespace internal
+
+// Formatting context.
+template <typename OutputIt, typename Char> class basic_format_context {
+ public:
+  /** The character type for the output. */
+  using char_type = Char;
+
+ private:
+  OutputIt out_;
+  basic_format_args<basic_format_context> args_;
+  internal::arg_map<basic_format_context> map_;
+  internal::locale_ref loc_;
+
+  basic_format_context(const basic_format_context&) = delete;
+  void operator=(const basic_format_context&) = delete;
+
+ public:
+  using iterator = OutputIt;
+  using format_arg = basic_format_arg<basic_format_context>;
+  template <typename T> using formatter_type = formatter<T, char_type>;
+
+  /**
+   Constructs a ``basic_format_context`` object. References to the arguments are
+   stored in the object so make sure they have appropriate lifetimes.
+   */
+  basic_format_context(OutputIt out,
+                       basic_format_args<basic_format_context> ctx_args,
+                       internal::locale_ref loc = internal::locale_ref())
+      : out_(out), args_(ctx_args), loc_(loc) {}
+
+  format_arg arg(int id) const { return args_.get(id); }
+
+  // Checks if manual indexing is used and returns the argument with the
+  // specified name.
+  format_arg arg(basic_string_view<char_type> name);
+
+  internal::error_handler error_handler() { return {}; }
+  void on_error(const char* message) { error_handler().on_error(message); }
+
+  // Returns an iterator to the beginning of the output range.
+  iterator out() { return out_; }
+
+  // Advances the begin iterator to ``it``.
+  void advance_to(iterator it) { out_ = it; }
+
+  internal::locale_ref locale() { return loc_; }
+};
+
+template <typename Char>
+using buffer_context =
+    basic_format_context<std::back_insert_iterator<internal::buffer<Char>>,
+                         Char>;
+using format_context = buffer_context<char>;
+using wformat_context = buffer_context<wchar_t>;
+
+/**
+  \rst
+  An array of references to arguments. It can be implicitly converted into
+  `~fmt::basic_format_args` for passing into type-erased formatting functions
+  such as `~fmt::vformat`.
+  \endrst
+ */
+template <typename Context, typename... Args> class format_arg_store {
+ private:
+  static const size_t num_args = sizeof...(Args);
+  static const bool is_packed = num_args < internal::max_packed_args;
+
+  using value_type = conditional_t<is_packed, internal::value<Context>,
+                                   basic_format_arg<Context>>;
+
+  // If the arguments are not packed, add one more element to mark the end.
+  value_type data_[num_args + (num_args == 0 ? 1 : 0)];
+
+  friend class basic_format_args<Context>;
+
+ public:
+  static constexpr unsigned long long types =
+      is_packed ? internal::encode_types<Context, Args...>()
+                : internal::is_unpacked_bit | num_args;
+  FMT_DEPRECATED static constexpr unsigned long long TYPES = types;
+
+  format_arg_store(const Args&... args)
+      : data_{internal::make_arg<is_packed, Context>(args)...} {}
+};
+
+/**
+  \rst
+  Constructs an `~fmt::format_arg_store` object that contains references to
+  arguments and can be implicitly converted to `~fmt::format_args`. `Context`
+  can be omitted in which case it defaults to `~fmt::context`.
+  See `~fmt::arg` for lifetime considerations.
+  \endrst
+ */
+template <typename Context = format_context, typename... Args>
+inline format_arg_store<Context, Args...> make_format_args(
+    const Args&... args) {
+  return {args...};
+}
+
+/** Formatting arguments. */
+template <typename Context> class basic_format_args {
+ public:
+  using size_type = int;
+  using format_arg = basic_format_arg<Context>;
+
+ private:
+  // To reduce compiled code size per formatting function call, types of first
+  // max_packed_args arguments are passed in the types_ field.
+  unsigned long long types_;
+  union {
+    // If the number of arguments is less than max_packed_args, the argument
+    // values are stored in values_, otherwise they are stored in args_.
+    // This is done to reduce compiled code size as storing larger objects
+    // may require more code (at least on x86-64) even if the same amount of
+    // data is actually copied to stack. It saves ~10% on the bloat test.
+    const internal::value<Context>* values_;
+    const format_arg* args_;
+  };
+
+  bool is_packed() const { return (types_ & internal::is_unpacked_bit) == 0; }
+
+  internal::type type(int index) const {
+    int shift = index * 4;
+    return static_cast<internal::type>((types_ & (0xfull << shift)) >> shift);
+  }
+
+  friend class internal::arg_map<Context>;
+
+  void set_data(const internal::value<Context>* values) { values_ = values; }
+  void set_data(const format_arg* args) { args_ = args; }
+
+  format_arg do_get(int index) const {
+    format_arg arg;
+    if (!is_packed()) {
+      auto num_args = max_size();
+      if (index < num_args) arg = args_[index];
+      return arg;
+    }
+    if (index > internal::max_packed_args) return arg;
+    arg.type_ = type(index);
+    if (arg.type_ == internal::none_type) return arg;
+    internal::value<Context>& val = arg.value_;
+    val = values_[index];
+    return arg;
+  }
+
+ public:
+  basic_format_args() : types_(0) {}
+
+  /**
+   \rst
+   Constructs a `basic_format_args` object from `~fmt::format_arg_store`.
+   \endrst
+   */
+  template <typename... Args>
+  basic_format_args(const format_arg_store<Context, Args...>& store)
+      : types_(static_cast<unsigned long long>(store.types)) {
+    set_data(store.data_);
+  }
+
+  /**
+   \rst
+   Constructs a `basic_format_args` object from a dynamic set of arguments.
+   \endrst
+   */
+  basic_format_args(const format_arg* args, int count)
+      : types_(internal::is_unpacked_bit | internal::to_unsigned(count)) {
+    set_data(args);
+  }
+
+  /** Returns the argument at specified index. */
+  format_arg get(int index) const {
+    format_arg arg = do_get(index);
+    if (arg.type_ == internal::named_arg_type)
+      arg = arg.value_.named_arg->template deserialize<Context>();
+    return arg;
+  }
+
+  int max_size() const {
+    unsigned long long max_packed = internal::max_packed_args;
+    return static_cast<int>(is_packed() ? max_packed
+                                        : types_ & ~internal::is_unpacked_bit);
+  }
+};
+
+/** An alias to ``basic_format_args<context>``. */
+// It is a separate type rather than an alias to make symbols readable.
+struct format_args : basic_format_args<format_context> {
+  template <typename... Args>
+  format_args(Args&&... args)
+      : basic_format_args<format_context>(std::forward<Args>(args)...) {}
+};
+struct wformat_args : basic_format_args<wformat_context> {
+  template <typename... Args>
+  wformat_args(Args&&... args)
+      : basic_format_args<wformat_context>(std::forward<Args>(args)...) {}
+};
+
+template <typename Container> struct is_contiguous : std::false_type {};
+
+template <typename Char>
+struct is_contiguous<std::basic_string<Char>> : std::true_type {};
+
+template <typename Char>
+struct is_contiguous<internal::buffer<Char>> : std::true_type {};
+
+namespace internal {
+
+template <typename OutputIt>
+struct is_contiguous_back_insert_iterator : std::false_type {};
+template <typename Container>
+struct is_contiguous_back_insert_iterator<std::back_insert_iterator<Container>>
+    : is_contiguous<Container> {};
+
+template <typename Char> struct named_arg_base {
+  basic_string_view<Char> name;
+
+  // Serialized value<context>.
+  mutable char data[sizeof(basic_format_arg<buffer_context<Char>>)];
+
+  named_arg_base(basic_string_view<Char> nm) : name(nm) {}
+
+  template <typename Context> basic_format_arg<Context> deserialize() const {
+    basic_format_arg<Context> arg;
+    std::memcpy(&arg, data, sizeof(basic_format_arg<Context>));
+    return arg;
+  }
+};
+
+template <typename T, typename Char> struct named_arg : named_arg_base<Char> {
+  const T& value;
+
+  named_arg(basic_string_view<Char> name, const T& val)
+      : named_arg_base<Char>(name), value(val) {}
+};
+
+template <typename..., typename S, FMT_ENABLE_IF(!is_compile_string<S>::value)>
+inline void check_format_string(const S&) {
+#if defined(FMT_ENFORCE_COMPILE_STRING)
+  static_assert(is_compile_string<S>::value,
+                "FMT_ENFORCE_COMPILE_STRING requires all format strings to "
+                "utilize FMT_STRING() or fmt().");
+#endif
+}
+template <typename..., typename S, FMT_ENABLE_IF(is_compile_string<S>::value)>
+void check_format_string(S);
+
+struct view {};
+template <bool...> struct bool_pack;
+template <bool... Args>
+using all_true =
+    std::is_same<bool_pack<Args..., true>, bool_pack<true, Args...>>;
+
+template <typename... Args, typename S, typename Char = char_t<S>>
+inline format_arg_store<buffer_context<Char>, remove_reference_t<Args>...>
+make_args_checked(const S& format_str,
+                  const remove_reference_t<Args>&... args) {
+  static_assert(all_true<(!std::is_base_of<view, remove_reference_t<Args>>() ||
+                          !std::is_reference<Args>())...>::value,
+                "passing views as lvalues is disallowed");
+  check_format_string<remove_const_t<remove_reference_t<Args>>...>(format_str);
+  return {args...};
+}
+
+template <typename Char>
+std::basic_string<Char> vformat(basic_string_view<Char> format_str,
+                                basic_format_args<buffer_context<Char>> args);
+
+template <typename Char>
+typename buffer_context<Char>::iterator vformat_to(
+    buffer<Char>& buf, basic_string_view<Char> format_str,
+    basic_format_args<buffer_context<Char>> args);
+}  // namespace internal
+
+/**
+  \rst
+  Returns a named argument to be used in a formatting function.
+
+  The named argument holds a reference and does not extend the lifetime
+  of its arguments.
+  Consequently, a dangling reference can accidentally be created.
+  The user should take care to only pass this function temporaries when
+  the named argument is itself a temporary, as per the following example.
+
+  **Example**::
+
+    fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23));
+  \endrst
+ */
+template <typename S, typename T, typename Char = char_t<S>>
+inline internal::named_arg<T, Char> arg(const S& name, const T& arg) {
+  static_assert(internal::is_string<S>::value, "");
+  return {name, arg};
+}
+
+// Disable nested named arguments, e.g. ``arg("a", arg("b", 42))``.
+template <typename S, typename T, typename Char>
+void arg(S, internal::named_arg<T, Char>) = delete;
+
+/** Formats a string and writes the output to ``out``. */
+// GCC 8 and earlier cannot handle std::back_insert_iterator<Container> with
+// vformat_to<ArgFormatter>(...) overload, so SFINAE on iterator type instead.
+template <typename OutputIt, typename S, typename Char = char_t<S>,
+          FMT_ENABLE_IF(
+              internal::is_contiguous_back_insert_iterator<OutputIt>::value)>
+OutputIt vformat_to(OutputIt out, const S& format_str,
+                    basic_format_args<buffer_context<Char>> args) {
+  using container = remove_reference_t<decltype(internal::get_container(out))>;
+  internal::container_buffer<container> buf((internal::get_container(out)));
+  internal::vformat_to(buf, to_string_view(format_str), args);
+  return out;
+}
+
+template <typename Container, typename S, typename... Args,
+          FMT_ENABLE_IF(
+              is_contiguous<Container>::value&& internal::is_string<S>::value)>
+inline std::back_insert_iterator<Container> format_to(
+    std::back_insert_iterator<Container> out, const S& format_str,
+    Args&&... args) {
+  return vformat_to(
+      out, to_string_view(format_str),
+      {internal::make_args_checked<Args...>(format_str, args...)});
+}
+
+template <typename S, typename Char = char_t<S>>
+inline std::basic_string<Char> vformat(
+    const S& format_str, basic_format_args<buffer_context<Char>> args) {
+  return internal::vformat(to_string_view(format_str), args);
+}
+
+/**
+  \rst
+  Formats arguments and returns the result as a string.
+
+  **Example**::
+
+    #include <fmt/core.h>
+    std::string message = fmt::format("The answer is {}", 42);
+  \endrst
+*/
+// Pass char_t as a default template parameter instead of using
+// std::basic_string<char_t<S>> to reduce the symbol size.
+template <typename S, typename... Args, typename Char = char_t<S>>
+inline std::basic_string<Char> format(const S& format_str, Args&&... args) {
+  return internal::vformat(
+      to_string_view(format_str),
+      {internal::make_args_checked<Args...>(format_str, args...)});
+}
+
+FMT_API void vprint(std::FILE* f, string_view format_str, format_args args);
+FMT_API void vprint(std::FILE* f, wstring_view format_str, wformat_args args);
+
+/**
+  \rst
+  Prints formatted data to the file *f*. For wide format strings,
+  *f* should be in wide-oriented mode set via ``fwide(f, 1)`` or
+  ``_setmode(_fileno(f), _O_U8TEXT)`` on Windows.
+
+  **Example**::
+
+    fmt::print(stderr, "Don't {}!", "panic");
+  \endrst
+ */
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(internal::is_string<S>::value)>
+inline void print(std::FILE* f, const S& format_str, Args&&... args) {
+  vprint(f, to_string_view(format_str),
+         internal::make_args_checked<Args...>(format_str, args...));
+}
+
+FMT_API void vprint(string_view format_str, format_args args);
+FMT_API void vprint(wstring_view format_str, wformat_args args);
+
+/**
+  \rst
+  Prints formatted data to ``stdout``.
+
+  **Example**::
+
+    fmt::print("Elapsed time: {0:.2f} seconds", 1.23);
+  \endrst
+ */
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(internal::is_string<S>::value)>
+inline void print(const S& format_str, Args&&... args) {
+  vprint(to_string_view(format_str),
+         internal::make_args_checked<Args...>(format_str, args...));
+}
+FMT_END_NAMESPACE
+
+#endif  // FMT_CORE_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format-inl.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format-inl.h
new file mode 100644
index 000000000..147062fe5
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format-inl.h
@@ -0,0 +1,1000 @@
+// Formatting library for C++
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_FORMAT_INL_H_
+#define FMT_FORMAT_INL_H_
+
+#include "format.h"
+
+#include <string.h>
+
+#include <cctype>
+#include <cerrno>
+#include <climits>
+#include <cmath>
+#include <cstdarg>
+#include <cstddef>  // for std::ptrdiff_t
+#include <cstring>  // for std::memmove
+#include <cwchar>
+#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
+#  include <locale>
+#endif
+
+#if FMT_USE_WINDOWS_H
+#  if !defined(FMT_HEADER_ONLY) && !defined(WIN32_LEAN_AND_MEAN)
+#    define WIN32_LEAN_AND_MEAN
+#  endif
+#  if defined(NOMINMAX) || defined(FMT_WIN_MINMAX)
+#    include <windows.h>
+#  else
+#    define NOMINMAX
+#    include <windows.h>
+#    undef NOMINMAX
+#  endif
+#endif
+
+#if FMT_EXCEPTIONS
+#  define FMT_TRY try
+#  define FMT_CATCH(x) catch (x)
+#else
+#  define FMT_TRY if (true)
+#  define FMT_CATCH(x) if (false)
+#endif
+
+#ifdef _MSC_VER
+#  pragma warning(push)
+#  pragma warning(disable : 4127)  // conditional expression is constant
+#  pragma warning(disable : 4702)  // unreachable code
+// Disable deprecation warning for strerror. The latter is not called but
+// MSVC fails to detect it.
+#  pragma warning(disable : 4996)
+#endif
+
+// Dummy implementations of strerror_r and strerror_s called if corresponding
+// system functions are not available.
+inline fmt::internal::null<> strerror_r(int, char*, ...) {
+  return fmt::internal::null<>();
+}
+inline fmt::internal::null<> strerror_s(char*, std::size_t, ...) {
+  return fmt::internal::null<>();
+}
+
+FMT_BEGIN_NAMESPACE
+namespace internal {
+
+#ifndef _MSC_VER
+#  define FMT_SNPRINTF snprintf
+#else  // _MSC_VER
+inline int fmt_snprintf(char* buffer, size_t size, const char* format, ...) {
+  va_list args;
+  va_start(args, format);
+  int result = vsnprintf_s(buffer, size, _TRUNCATE, format, args);
+  va_end(args);
+  return result;
+}
+#  define FMT_SNPRINTF fmt_snprintf
+#endif  // _MSC_VER
+
+using format_func = void (*)(internal::buffer<char>&, int, string_view);
+
+// Portable thread-safe version of strerror.
+// Sets buffer to point to a string describing the error code.
+// This can be either a pointer to a string stored in buffer,
+// or a pointer to some static immutable string.
+// Returns one of the following values:
+//   0      - success
+//   ERANGE - buffer is not large enough to store the error message
+//   other  - failure
+// Buffer should be at least of size 1.
+FMT_FUNC int safe_strerror(int error_code, char*& buffer,
+                           std::size_t buffer_size) FMT_NOEXCEPT {
+  FMT_ASSERT(buffer != nullptr && buffer_size != 0, "invalid buffer");
+
+  class dispatcher {
+   private:
+    int error_code_;
+    char*& buffer_;
+    std::size_t buffer_size_;
+
+    // A noop assignment operator to avoid bogus warnings.
+    void operator=(const dispatcher&) {}
+
+    // Handle the result of XSI-compliant version of strerror_r.
+    int handle(int result) {
+      // glibc versions before 2.13 return result in errno.
+      return result == -1 ? errno : result;
+    }
+
+    // Handle the result of GNU-specific version of strerror_r.
+    int handle(char* message) {
+      // If the buffer is full then the message is probably truncated.
+      if (message == buffer_ && strlen(buffer_) == buffer_size_ - 1)
+        return ERANGE;
+      buffer_ = message;
+      return 0;
+    }
+
+    // Handle the case when strerror_r is not available.
+    int handle(internal::null<>) {
+      return fallback(strerror_s(buffer_, buffer_size_, error_code_));
+    }
+
+    // Fallback to strerror_s when strerror_r is not available.
+    int fallback(int result) {
+      // If the buffer is full then the message is probably truncated.
+      return result == 0 && strlen(buffer_) == buffer_size_ - 1 ? ERANGE
+                                                                : result;
+    }
+
+#if !FMT_MSC_VER
+    // Fallback to strerror if strerror_r and strerror_s are not available.
+    int fallback(internal::null<>) {
+      errno = 0;
+      buffer_ = strerror(error_code_);
+      return errno;
+    }
+#endif
+
+   public:
+    dispatcher(int err_code, char*& buf, std::size_t buf_size)
+        : error_code_(err_code), buffer_(buf), buffer_size_(buf_size) {}
+
+    int run() { return handle(strerror_r(error_code_, buffer_, buffer_size_)); }
+  };
+  return dispatcher(error_code, buffer, buffer_size).run();
+}
+
+FMT_FUNC void format_error_code(internal::buffer<char>& out, int error_code,
+                                string_view message) FMT_NOEXCEPT {
+  // Report error code making sure that the output fits into
+  // inline_buffer_size to avoid dynamic memory allocation and potential
+  // bad_alloc.
+  out.resize(0);
+  static const char SEP[] = ": ";
+  static const char ERROR_STR[] = "error ";
+  // Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
+  std::size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2;
+  auto abs_value = static_cast<uint32_or_64_t<int>>(error_code);
+  if (internal::is_negative(error_code)) {
+    abs_value = 0 - abs_value;
+    ++error_code_size;
+  }
+  error_code_size += internal::to_unsigned(internal::count_digits(abs_value));
+  internal::writer w(out);
+  if (message.size() <= inline_buffer_size - error_code_size) {
+    w.write(message);
+    w.write(SEP);
+  }
+  w.write(ERROR_STR);
+  w.write(error_code);
+  assert(out.size() <= inline_buffer_size);
+}
+
+// A wrapper around fwrite that throws on error.
+FMT_FUNC void fwrite_fully(const void* ptr, size_t size, size_t count,
+                           FILE* stream) {
+  size_t written = std::fwrite(ptr, size, count, stream);
+  if (written < count) {
+    FMT_THROW(system_error(errno, "cannot write to file"));
+  }
+}
+
+FMT_FUNC void report_error(format_func func, int error_code,
+                           string_view message) FMT_NOEXCEPT {
+  memory_buffer full_message;
+  func(full_message, error_code, message);
+  // Don't use fwrite_fully because the latter may throw.
+  (void)std::fwrite(full_message.data(), full_message.size(), 1, stderr);
+  std::fputc('\n', stderr);
+}
+}  // namespace internal
+
+#if !defined(FMT_STATIC_THOUSANDS_SEPARATOR)
+namespace internal {
+
+template <typename Locale>
+locale_ref::locale_ref(const Locale& loc) : locale_(&loc) {
+  static_assert(std::is_same<Locale, std::locale>::value, "");
+}
+
+template <typename Locale> Locale locale_ref::get() const {
+  static_assert(std::is_same<Locale, std::locale>::value, "");
+  return locale_ ? *static_cast<const std::locale*>(locale_) : std::locale();
+}
+
+template <typename Char> FMT_FUNC Char thousands_sep_impl(locale_ref loc) {
+  return std::use_facet<std::numpunct<Char>>(loc.get<std::locale>())
+      .thousands_sep();
+}
+template <typename Char> FMT_FUNC Char decimal_point_impl(locale_ref loc) {
+  return std::use_facet<std::numpunct<Char>>(loc.get<std::locale>())
+      .decimal_point();
+}
+}  // namespace internal
+#else
+template <typename Char>
+FMT_FUNC Char internal::thousands_sep_impl(locale_ref) {
+  return FMT_STATIC_THOUSANDS_SEPARATOR;
+}
+template <typename Char>
+FMT_FUNC Char internal::decimal_point_impl(locale_ref) {
+  return '.';
+}
+#endif
+
+FMT_API FMT_FUNC format_error::~format_error() FMT_NOEXCEPT {}
+FMT_API FMT_FUNC system_error::~system_error() FMT_NOEXCEPT {}
+
+FMT_FUNC void system_error::init(int err_code, string_view format_str,
+                                 format_args args) {
+  error_code_ = err_code;
+  memory_buffer buffer;
+  format_system_error(buffer, err_code, vformat(format_str, args));
+  std::runtime_error& base = *this;
+  base = std::runtime_error(to_string(buffer));
+}
+
+namespace internal {
+
+template <> FMT_FUNC int count_digits<4>(internal::fallback_uintptr n) {
+  // Assume little endian; pointer formatting is implementation-defined anyway.
+  int i = static_cast<int>(sizeof(void*)) - 1;
+  while (i > 0 && n.value[i] == 0) --i;
+  auto char_digits = std::numeric_limits<unsigned char>::digits / 4;
+  return i >= 0 ? i * char_digits + count_digits<4, unsigned>(n.value[i]) : 1;
+}
+
+template <typename T>
+int format_float(char* buf, std::size_t size, const char* format, int precision,
+                 T value) {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  if (precision > 100000)
+    throw std::runtime_error(
+        "fuzz mode - avoid large allocation inside snprintf");
+#endif
+  // Suppress the warning about nonliteral format string.
+  auto snprintf_ptr = FMT_SNPRINTF;
+  return precision < 0 ? snprintf_ptr(buf, size, format, value)
+                       : snprintf_ptr(buf, size, format, precision, value);
+}
+
+template <typename T>
+const char basic_data<T>::digits[] =
+    "0001020304050607080910111213141516171819"
+    "2021222324252627282930313233343536373839"
+    "4041424344454647484950515253545556575859"
+    "6061626364656667686970717273747576777879"
+    "8081828384858687888990919293949596979899";
+
+template <typename T>
+const char basic_data<T>::hex_digits[] = "0123456789abcdef";
+
+#define FMT_POWERS_OF_10(factor)                                             \
+  factor * 10, factor * 100, factor * 1000, factor * 10000, factor * 100000, \
+      factor * 1000000, factor * 10000000, factor * 100000000,               \
+      factor * 1000000000
+
+template <typename T>
+const uint64_t basic_data<T>::powers_of_10_64[] = {
+    1, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ull),
+    10000000000000000000ull};
+
+template <typename T>
+const uint32_t basic_data<T>::zero_or_powers_of_10_32[] = {0,
+                                                           FMT_POWERS_OF_10(1)};
+
+template <typename T>
+const uint64_t basic_data<T>::zero_or_powers_of_10_64[] = {
+    0, FMT_POWERS_OF_10(1), FMT_POWERS_OF_10(1000000000ull),
+    10000000000000000000ull};
+
+// Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340.
+// These are generated by support/compute-powers.py.
+template <typename T>
+const uint64_t basic_data<T>::pow10_significands[] = {
+    0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76,
+    0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df,
+    0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c,
+    0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5,
+    0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57,
+    0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7,
+    0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e,
+    0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996,
+    0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126,
+    0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053,
+    0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f,
+    0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b,
+    0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06,
+    0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb,
+    0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000,
+    0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984,
+    0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068,
+    0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8,
+    0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758,
+    0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85,
+    0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d,
+    0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25,
+    0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2,
+    0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a,
+    0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410,
+    0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129,
+    0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85,
+    0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841,
+    0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b,
+};
+
+// Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding
+// to significands above.
+template <typename T>
+const int16_t basic_data<T>::pow10_exponents[] = {
+    -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954,
+    -927,  -901,  -874,  -847,  -821,  -794,  -768,  -741,  -715,  -688, -661,
+    -635,  -608,  -582,  -555,  -529,  -502,  -475,  -449,  -422,  -396, -369,
+    -343,  -316,  -289,  -263,  -236,  -210,  -183,  -157,  -130,  -103, -77,
+    -50,   -24,   3,     30,    56,    83,    109,   136,   162,   189,  216,
+    242,   269,   295,   322,   348,   375,   402,   428,   455,   481,  508,
+    534,   561,   588,   614,   641,   667,   694,   720,   747,   774,  800,
+    827,   853,   880,   907,   933,   960,   986,   1013,  1039,  1066};
+
+template <typename T>
+const char basic_data<T>::foreground_color[] = "\x1b[38;2;";
+template <typename T>
+const char basic_data<T>::background_color[] = "\x1b[48;2;";
+template <typename T> const char basic_data<T>::reset_color[] = "\x1b[0m";
+template <typename T> const wchar_t basic_data<T>::wreset_color[] = L"\x1b[0m";
+
+template <typename T> struct bits {
+  static FMT_CONSTEXPR_DECL const int value =
+      static_cast<int>(sizeof(T) * std::numeric_limits<unsigned char>::digits);
+};
+
+// A handmade floating-point number f * pow(2, e).
+class fp {
+ private:
+  using significand_type = uint64_t;
+
+  // All sizes are in bits.
+  // Subtract 1 to account for an implicit most significant bit in the
+  // normalized form.
+  static FMT_CONSTEXPR_DECL const int double_significand_size =
+      std::numeric_limits<double>::digits - 1;
+  static FMT_CONSTEXPR_DECL const uint64_t implicit_bit =
+      1ull << double_significand_size;
+
+ public:
+  significand_type f;
+  int e;
+
+  static FMT_CONSTEXPR_DECL const int significand_size =
+      bits<significand_type>::value;
+
+  fp() : f(0), e(0) {}
+  fp(uint64_t f_val, int e_val) : f(f_val), e(e_val) {}
+
+  // Constructs fp from an IEEE754 double. It is a template to prevent compile
+  // errors on platforms where double is not IEEE754.
+  template <typename Double> explicit fp(Double d) {
+    // Assume double is in the format [sign][exponent][significand].
+    using limits = std::numeric_limits<Double>;
+    const int exponent_size =
+        bits<Double>::value - double_significand_size - 1;  // -1 for sign
+    const uint64_t significand_mask = implicit_bit - 1;
+    const uint64_t exponent_mask = (~0ull >> 1) & ~significand_mask;
+    const int exponent_bias = (1 << exponent_size) - limits::max_exponent - 1;
+    auto u = bit_cast<uint64_t>(d);
+    auto biased_e = (u & exponent_mask) >> double_significand_size;
+    f = u & significand_mask;
+    if (biased_e != 0)
+      f += implicit_bit;
+    else
+      biased_e = 1;  // Subnormals use biased exponent 1 (min exponent).
+    e = static_cast<int>(biased_e - exponent_bias - double_significand_size);
+  }
+
+  // Normalizes the value converted from double and multiplied by (1 << SHIFT).
+  template <int SHIFT = 0> void normalize() {
+    // Handle subnormals.
+    auto shifted_implicit_bit = implicit_bit << SHIFT;
+    while ((f & shifted_implicit_bit) == 0) {
+      f <<= 1;
+      --e;
+    }
+    // Subtract 1 to account for hidden bit.
+    auto offset = significand_size - double_significand_size - SHIFT - 1;
+    f <<= offset;
+    e -= offset;
+  }
+
+  // Compute lower and upper boundaries (m^- and m^+ in the Grisu paper), where
+  // a boundary is a value half way between the number and its predecessor
+  // (lower) or successor (upper). The upper boundary is normalized and lower
+  // has the same exponent but may be not normalized.
+  void compute_boundaries(fp& lower, fp& upper) const {
+    lower =
+        f == implicit_bit ? fp((f << 2) - 1, e - 2) : fp((f << 1) - 1, e - 1);
+    upper = fp((f << 1) + 1, e - 1);
+    upper.normalize<1>();  // 1 is to account for the exponent shift above.
+    lower.f <<= lower.e - upper.e;
+    lower.e = upper.e;
+  }
+};
+
+// Returns an fp number representing x - y. Result may not be normalized.
+inline fp operator-(fp x, fp y) {
+  FMT_ASSERT(x.f >= y.f && x.e == y.e, "invalid operands");
+  return fp(x.f - y.f, x.e);
+}
+
+// Computes an fp number r with r.f = x.f * y.f / pow(2, 64) rounded to nearest
+// with half-up tie breaking, r.e = x.e + y.e + 64. Result may not be
+// normalized.
+FMT_FUNC fp operator*(fp x, fp y) {
+  int exp = x.e + y.e + 64;
+#if FMT_USE_INT128
+  auto product = static_cast<__uint128_t>(x.f) * y.f;
+  auto f = static_cast<uint64_t>(product >> 64);
+  if ((static_cast<uint64_t>(product) & (1ULL << 63)) != 0) ++f;
+  return fp(f, exp);
+#else
+  // Multiply 32-bit parts of significands.
+  uint64_t mask = (1ULL << 32) - 1;
+  uint64_t a = x.f >> 32, b = x.f & mask;
+  uint64_t c = y.f >> 32, d = y.f & mask;
+  uint64_t ac = a * c, bc = b * c, ad = a * d, bd = b * d;
+  // Compute mid 64-bit of result and round.
+  uint64_t mid = (bd >> 32) + (ad & mask) + (bc & mask) + (1U << 31);
+  return fp(ac + (ad >> 32) + (bc >> 32) + (mid >> 32), exp);
+#endif
+}
+
+// Returns cached power (of 10) c_k = c_k.f * pow(2, c_k.e) such that its
+// (binary) exponent satisfies min_exponent <= c_k.e <= min_exponent + 28.
+FMT_FUNC fp get_cached_power(int min_exponent, int& pow10_exponent) {
+  const double one_over_log2_10 = 0.30102999566398114;  // 1 / log2(10)
+  int index = static_cast<int>(
+      std::ceil((min_exponent + fp::significand_size - 1) * one_over_log2_10));
+  // Decimal exponent of the first (smallest) cached power of 10.
+  const int first_dec_exp = -348;
+  // Difference between 2 consecutive decimal exponents in cached powers of 10.
+  const int dec_exp_step = 8;
+  index = (index - first_dec_exp - 1) / dec_exp_step + 1;
+  pow10_exponent = first_dec_exp + index * dec_exp_step;
+  return fp(data::pow10_significands[index], data::pow10_exponents[index]);
+}
+
+enum round_direction { unknown, up, down };
+
+// Given the divisor (normally a power of 10), the remainder = v % divisor for
+// some number v and the error, returns whether v should be rounded up, down, or
+// whether the rounding direction can't be determined due to error.
+// error should be less than divisor / 2.
+inline round_direction get_round_direction(uint64_t divisor, uint64_t remainder,
+                                           uint64_t error) {
+  FMT_ASSERT(remainder < divisor, "");  // divisor - remainder won't overflow.
+  FMT_ASSERT(error < divisor, "");      // divisor - error won't overflow.
+  FMT_ASSERT(error < divisor - error, "");  // error * 2 won't overflow.
+  // Round down if (remainder + error) * 2 <= divisor.
+  if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2)
+    return down;
+  // Round up if (remainder - error) * 2 >= divisor.
+  if (remainder >= error &&
+      remainder - error >= divisor - (remainder - error)) {
+    return up;
+  }
+  return unknown;
+}
+
+namespace digits {
+enum result {
+  more,  // Generate more digits.
+  done,  // Done generating digits.
+  error  // Digit generation cancelled due to an error.
+};
+}
+
+// Generates output using the Grisu digit-gen algorithm.
+// error: the size of the region (lower, upper) outside of which numbers
+// definitely do not round to value (Delta in Grisu3).
+template <typename Handler>
+digits::result grisu_gen_digits(fp value, uint64_t error, int& exp,
+                                Handler& handler) {
+  fp one(1ull << -value.e, value.e);
+  // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be
+  // zero because it contains a product of two 64-bit numbers with MSB set (due
+  // to normalization) - 1, shifted right by at most 60 bits.
+  uint32_t integral = static_cast<uint32_t>(value.f >> -one.e);
+  FMT_ASSERT(integral != 0, "");
+  FMT_ASSERT(integral == value.f >> -one.e, "");
+  // The fractional part of scaled value (p2 in Grisu) c = value % one.
+  uint64_t fractional = value.f & (one.f - 1);
+  exp = count_digits(integral);  // kappa in Grisu.
+  // Divide by 10 to prevent overflow.
+  auto result = handler.on_start(data::powers_of_10_64[exp - 1] << -one.e,
+                                 value.f / 10, error * 10, exp);
+  if (result != digits::more) return result;
+  // Generate digits for the integral part. This can produce up to 10 digits.
+  do {
+    uint32_t digit = 0;
+    // This optimization by miloyip reduces the number of integer divisions by
+    // one per iteration.
+    switch (exp) {
+    case 10:
+      digit = integral / 1000000000;
+      integral %= 1000000000;
+      break;
+    case 9:
+      digit = integral / 100000000;
+      integral %= 100000000;
+      break;
+    case 8:
+      digit = integral / 10000000;
+      integral %= 10000000;
+      break;
+    case 7:
+      digit = integral / 1000000;
+      integral %= 1000000;
+      break;
+    case 6:
+      digit = integral / 100000;
+      integral %= 100000;
+      break;
+    case 5:
+      digit = integral / 10000;
+      integral %= 10000;
+      break;
+    case 4:
+      digit = integral / 1000;
+      integral %= 1000;
+      break;
+    case 3:
+      digit = integral / 100;
+      integral %= 100;
+      break;
+    case 2:
+      digit = integral / 10;
+      integral %= 10;
+      break;
+    case 1:
+      digit = integral;
+      integral = 0;
+      break;
+    default:
+      FMT_ASSERT(false, "invalid number of digits");
+    }
+    --exp;
+    uint64_t remainder =
+        (static_cast<uint64_t>(integral) << -one.e) + fractional;
+    result = handler.on_digit(static_cast<char>('0' + digit),
+                              data::powers_of_10_64[exp] << -one.e, remainder,
+                              error, exp, true);
+    if (result != digits::more) return result;
+  } while (exp > 0);
+  // Generate digits for the fractional part.
+  for (;;) {
+    fractional *= 10;
+    error *= 10;
+    char digit =
+        static_cast<char>('0' + static_cast<char>(fractional >> -one.e));
+    fractional &= one.f - 1;
+    --exp;
+    result = handler.on_digit(digit, one.f, fractional, error, exp, false);
+    if (result != digits::more) return result;
+  }
+}
+
+// The fixed precision digit handler.
+struct fixed_handler {
+  char* buf;
+  int size;
+  int precision;
+  int exp10;
+  bool fixed;
+
+  digits::result on_start(uint64_t divisor, uint64_t remainder, uint64_t error,
+                          int& exp) {
+    // Non-fixed formats require at least one digit and no precision adjustment.
+    if (!fixed) return digits::more;
+    // Adjust fixed precision by exponent because it is relative to decimal
+    // point.
+    precision += exp + exp10;
+    // Check if precision is satisfied just by leading zeros, e.g.
+    // format("{:.2f}", 0.001) gives "0.00" without generating any digits.
+    if (precision > 0) return digits::more;
+    if (precision < 0) return digits::done;
+    auto dir = get_round_direction(divisor, remainder, error);
+    if (dir == unknown) return digits::error;
+    buf[size++] = dir == up ? '1' : '0';
+    return digits::done;
+  }
+
+  digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder,
+                          uint64_t error, int, bool integral) {
+    FMT_ASSERT(remainder < divisor, "");
+    buf[size++] = digit;
+    if (size < precision) return digits::more;
+    if (!integral) {
+      // Check if error * 2 < divisor with overflow prevention.
+      // The check is not needed for the integral part because error = 1
+      // and divisor > (1 << 32) there.
+      if (error >= divisor || error >= divisor - error) return digits::error;
+    } else {
+      FMT_ASSERT(error == 1 && divisor > 2, "");
+    }
+    auto dir = get_round_direction(divisor, remainder, error);
+    if (dir != up) return dir == down ? digits::done : digits::error;
+    ++buf[size - 1];
+    for (int i = size - 1; i > 0 && buf[i] > '9'; --i) {
+      buf[i] = '0';
+      ++buf[i - 1];
+    }
+    if (buf[0] > '9') {
+      buf[0] = '1';
+      buf[size++] = '0';
+    }
+    return digits::done;
+  }
+};
+
+// The shortest representation digit handler.
+template <int GRISU_VERSION> struct grisu_shortest_handler {
+  char* buf;
+  int size;
+  // Distance between scaled value and upper bound (wp_W in Grisu3).
+  uint64_t diff;
+
+  digits::result on_start(uint64_t, uint64_t, uint64_t, int&) {
+    return digits::more;
+  }
+
+  // Decrement the generated number approaching value from above.
+  void round(uint64_t d, uint64_t divisor, uint64_t& remainder,
+             uint64_t error) {
+    while (
+        remainder < d && error - remainder >= divisor &&
+        (remainder + divisor < d || d - remainder >= remainder + divisor - d)) {
+      --buf[size - 1];
+      remainder += divisor;
+    }
+  }
+
+  // Implements Grisu's round_weed.
+  digits::result on_digit(char digit, uint64_t divisor, uint64_t remainder,
+                          uint64_t error, int exp, bool integral) {
+    buf[size++] = digit;
+    if (remainder >= error) return digits::more;
+    if (GRISU_VERSION != 3) {
+      uint64_t d = integral ? diff : diff * data::powers_of_10_64[-exp];
+      round(d, divisor, remainder, error);
+      return digits::done;
+    }
+    uint64_t unit = integral ? 1 : data::powers_of_10_64[-exp];
+    uint64_t up = (diff - 1) * unit;  // wp_Wup
+    round(up, divisor, remainder, error);
+    uint64_t down = (diff + 1) * unit;  // wp_Wdown
+    if (remainder < down && error - remainder >= divisor &&
+        (remainder + divisor < down ||
+         down - remainder > remainder + divisor - down)) {
+      return digits::error;
+    }
+    return 2 * unit <= remainder && remainder <= error - 4 * unit
+               ? digits::done
+               : digits::error;
+  }
+};
+
+template <typename Double,
+          enable_if_t<(sizeof(Double) == sizeof(uint64_t)), int>>
+FMT_API bool grisu_format(Double value, buffer<char>& buf, int precision,
+                          unsigned options, int& exp) {
+  FMT_ASSERT(value >= 0, "value is negative");
+  bool fixed = (options & grisu_options::fixed) != 0;
+  if (value <= 0) {  // <= instead of == to silence a warning.
+    if (precision <= 0 || !fixed) {
+      exp = 0;
+      buf.push_back('0');
+    } else {
+      exp = -precision;
+      buf.resize(precision);
+      std::uninitialized_fill_n(buf.data(), precision, '0');
+    }
+    return true;
+  }
+
+  fp fp_value(value);
+  const int min_exp = -60;  // alpha in Grisu.
+  int cached_exp10 = 0;     // K in Grisu.
+  if (precision != -1) {
+    if (precision > 17) return false;
+    fp_value.normalize();
+    auto cached_pow = get_cached_power(
+        min_exp - (fp_value.e + fp::significand_size), cached_exp10);
+    fp_value = fp_value * cached_pow;
+    fixed_handler handler{buf.data(), 0, precision, -cached_exp10, fixed};
+    if (grisu_gen_digits(fp_value, 1, exp, handler) == digits::error)
+      return false;
+    buf.resize(to_unsigned(handler.size));
+  } else {
+    fp lower, upper;  // w^- and w^+ in the Grisu paper.
+    fp_value.compute_boundaries(lower, upper);
+    // Find a cached power of 10 such that multiplying upper by it will bring
+    // the exponent in the range [min_exp, -32].
+    auto cached_pow = get_cached_power(  // \tilde{c}_{-k} in Grisu.
+        min_exp - (upper.e + fp::significand_size), cached_exp10);
+    fp_value.normalize();
+    fp_value = fp_value * cached_pow;
+    lower = lower * cached_pow;  // \tilde{M}^- in Grisu.
+    upper = upper * cached_pow;  // \tilde{M}^+ in Grisu.
+    assert(min_exp <= upper.e && upper.e <= -32);
+    auto result = digits::result();
+    int size = 0;
+    if ((options & grisu_options::grisu3) != 0) {
+      --lower.f;  // \tilde{M}^- - 1 ulp -> M^-_{\downarrow}.
+      ++upper.f;  // \tilde{M}^+ + 1 ulp -> M^+_{\uparrow}.
+      // Numbers outside of (lower, upper) definitely do not round to value.
+      grisu_shortest_handler<3> handler{buf.data(), 0, (upper - fp_value).f};
+      result = grisu_gen_digits(upper, upper.f - lower.f, exp, handler);
+      size = handler.size;
+    } else {
+      ++lower.f;  // \tilde{M}^- + 1 ulp -> M^-_{\uparrow}.
+      --upper.f;  // \tilde{M}^+ - 1 ulp -> M^+_{\downarrow}.
+      grisu_shortest_handler<2> handler{buf.data(), 0, (upper - fp_value).f};
+      result = grisu_gen_digits(upper, upper.f - lower.f, exp, handler);
+      size = handler.size;
+    }
+    if (result == digits::error) return false;
+    buf.resize(to_unsigned(size));
+  }
+  exp -= cached_exp10;
+  return true;
+}
+
+template <typename Double>
+char* sprintf_format(Double value, internal::buffer<char>& buf,
+                     sprintf_specs specs) {
+  // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail.
+  FMT_ASSERT(buf.capacity() != 0, "empty buffer");
+
+  // Build format string.
+  enum { max_format_size = 10 };  // longest format: %#-*.*Lg
+  char format[max_format_size];
+  char* format_ptr = format;
+  *format_ptr++ = '%';
+  if (specs.alt || !specs.type) *format_ptr++ = '#';
+  if (specs.precision >= 0) {
+    *format_ptr++ = '.';
+    *format_ptr++ = '*';
+  }
+  if (std::is_same<Double, long double>::value) *format_ptr++ = 'L';
+
+  char type = specs.type;
+
+  if (type == '%')
+    type = 'f';
+  else if (type == 0 || type == 'n')
+    type = 'g';
+#if FMT_MSC_VER
+  if (type == 'F') {
+    // MSVC's printf doesn't support 'F'.
+    type = 'f';
+  }
+#endif
+  *format_ptr++ = type;
+  *format_ptr = '\0';
+
+  // Format using snprintf.
+  char* start = nullptr;
+  char* decimal_point_pos = nullptr;
+  for (;;) {
+    std::size_t buffer_size = buf.capacity();
+    start = &buf[0];
+    int result =
+        format_float(start, buffer_size, format, specs.precision, value);
+    if (result >= 0) {
+      unsigned n = internal::to_unsigned(result);
+      if (n < buf.capacity()) {
+        // Find the decimal point.
+        auto p = buf.data(), end = p + n;
+        if (*p == '+' || *p == '-') ++p;
+        if (specs.type != 'a' && specs.type != 'A') {
+          while (p < end && *p >= '0' && *p <= '9') ++p;
+          if (p < end && *p != 'e' && *p != 'E') {
+            decimal_point_pos = p;
+            if (!specs.type) {
+              // Keep only one trailing zero after the decimal point.
+              ++p;
+              if (*p == '0') ++p;
+              while (p != end && *p >= '1' && *p <= '9') ++p;
+              char* where = p;
+              while (p != end && *p == '0') ++p;
+              if (p == end || *p < '0' || *p > '9') {
+                if (p != end) std::memmove(where, p, to_unsigned(end - p));
+                n -= static_cast<unsigned>(p - where);
+              }
+            }
+          }
+        }
+        buf.resize(n);
+        break;  // The buffer is large enough - continue with formatting.
+      }
+      buf.reserve(n + 1);
+    } else {
+      // If result is negative we ask to increase the capacity by at least 1,
+      // but as std::vector, the buffer grows exponentially.
+      buf.reserve(buf.capacity() + 1);
+    }
+  }
+  return decimal_point_pos;
+}
+}  // namespace internal
+
+#if FMT_USE_WINDOWS_H
+
+FMT_FUNC internal::utf8_to_utf16::utf8_to_utf16(string_view s) {
+  static const char ERROR_MSG[] = "cannot convert string from UTF-8 to UTF-16";
+  if (s.size() > INT_MAX)
+    FMT_THROW(windows_error(ERROR_INVALID_PARAMETER, ERROR_MSG));
+  int s_size = static_cast<int>(s.size());
+  if (s_size == 0) {
+    // MultiByteToWideChar does not support zero length, handle separately.
+    buffer_.resize(1);
+    buffer_[0] = 0;
+    return;
+  }
+
+  int length = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s.data(),
+                                   s_size, nullptr, 0);
+  if (length == 0) FMT_THROW(windows_error(GetLastError(), ERROR_MSG));
+  buffer_.resize(length + 1);
+  length = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), s_size,
+                               &buffer_[0], length);
+  if (length == 0) FMT_THROW(windows_error(GetLastError(), ERROR_MSG));
+  buffer_[length] = 0;
+}
+
+FMT_FUNC internal::utf16_to_utf8::utf16_to_utf8(wstring_view s) {
+  if (int error_code = convert(s)) {
+    FMT_THROW(windows_error(error_code,
+                            "cannot convert string from UTF-16 to UTF-8"));
+  }
+}
+
+FMT_FUNC int internal::utf16_to_utf8::convert(wstring_view s) {
+  if (s.size() > INT_MAX) return ERROR_INVALID_PARAMETER;
+  int s_size = static_cast<int>(s.size());
+  if (s_size == 0) {
+    // WideCharToMultiByte does not support zero length, handle separately.
+    buffer_.resize(1);
+    buffer_[0] = 0;
+    return 0;
+  }
+
+  int length = WideCharToMultiByte(CP_UTF8, 0, s.data(), s_size, nullptr, 0,
+                                   nullptr, nullptr);
+  if (length == 0) return GetLastError();
+  buffer_.resize(length + 1);
+  length = WideCharToMultiByte(CP_UTF8, 0, s.data(), s_size, &buffer_[0],
+                               length, nullptr, nullptr);
+  if (length == 0) return GetLastError();
+  buffer_[length] = 0;
+  return 0;
+}
+
+FMT_FUNC void windows_error::init(int err_code, string_view format_str,
+                                  format_args args) {
+  error_code_ = err_code;
+  memory_buffer buffer;
+  internal::format_windows_error(buffer, err_code, vformat(format_str, args));
+  std::runtime_error& base = *this;
+  base = std::runtime_error(to_string(buffer));
+}
+
+FMT_FUNC void internal::format_windows_error(internal::buffer<char>& out,
+                                             int error_code,
+                                             string_view message) FMT_NOEXCEPT {
+  FMT_TRY {
+    wmemory_buffer buf;
+    buf.resize(inline_buffer_size);
+    for (;;) {
+      wchar_t* system_message = &buf[0];
+      int result = FormatMessageW(
+          FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, nullptr,
+          error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), system_message,
+          static_cast<uint32_t>(buf.size()), nullptr);
+      if (result != 0) {
+        utf16_to_utf8 utf8_message;
+        if (utf8_message.convert(system_message) == ERROR_SUCCESS) {
+          internal::writer w(out);
+          w.write(message);
+          w.write(": ");
+          w.write(utf8_message);
+          return;
+        }
+        break;
+      }
+      if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+        break;  // Can't get error message, report error code instead.
+      buf.resize(buf.size() * 2);
+    }
+  }
+  FMT_CATCH(...) {}
+  format_error_code(out, error_code, message);
+}
+
+#endif  // FMT_USE_WINDOWS_H
+
+FMT_FUNC void format_system_error(internal::buffer<char>& out, int error_code,
+                                  string_view message) FMT_NOEXCEPT {
+  FMT_TRY {
+    memory_buffer buf;
+    buf.resize(inline_buffer_size);
+    for (;;) {
+      char* system_message = &buf[0];
+      int result =
+          internal::safe_strerror(error_code, system_message, buf.size());
+      if (result == 0) {
+        internal::writer w(out);
+        w.write(message);
+        w.write(": ");
+        w.write(system_message);
+        return;
+      }
+      if (result != ERANGE)
+        break;  // Can't get error message, report error code instead.
+      buf.resize(buf.size() * 2);
+    }
+  }
+  FMT_CATCH(...) {}
+  format_error_code(out, error_code, message);
+}
+
+FMT_FUNC void internal::error_handler::on_error(const char* message) {
+  FMT_THROW(format_error(message));
+}
+
+FMT_FUNC void report_system_error(int error_code,
+                                  fmt::string_view message) FMT_NOEXCEPT {
+  report_error(format_system_error, error_code, message);
+}
+
+#if FMT_USE_WINDOWS_H
+FMT_FUNC void report_windows_error(int error_code,
+                                   fmt::string_view message) FMT_NOEXCEPT {
+  report_error(internal::format_windows_error, error_code, message);
+}
+#endif
+
+FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) {
+  memory_buffer buffer;
+  internal::vformat_to(buffer, format_str,
+                       basic_format_args<buffer_context<char>>(args));
+  internal::fwrite_fully(buffer.data(), 1, buffer.size(), f);
+}
+
+FMT_FUNC void vprint(std::FILE* f, wstring_view format_str, wformat_args args) {
+  wmemory_buffer buffer;
+  internal::vformat_to(buffer, format_str, args);
+  buffer.push_back(L'\0');
+  if (std::fputws(buffer.data(), f) == -1) {
+    FMT_THROW(system_error(errno, "cannot write to file"));
+  }
+}
+
+FMT_FUNC void vprint(string_view format_str, format_args args) {
+  vprint(stdout, format_str, args);
+}
+
+FMT_FUNC void vprint(wstring_view format_str, wformat_args args) {
+  vprint(stdout, format_str, args);
+}
+
+FMT_END_NAMESPACE
+
+#ifdef _MSC_VER
+#  pragma warning(pop)
+#endif
+
+#endif  // FMT_FORMAT_INL_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format.cc b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format.cc
new file mode 100644
index 000000000..679ac799a
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format.cc
@@ -0,0 +1,57 @@
+// Formatting library for C++
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#include "format-inl.h"
+
+FMT_BEGIN_NAMESPACE
+template struct FMT_API internal::basic_data<void>;
+
+// Workaround a bug in MSVC2013 that prevents instantiation of grisu_format.
+bool (*instantiate_grisu_format)(double, internal::buffer<char>&, int, unsigned,
+                                 int&) = internal::grisu_format;
+
+#ifndef FMT_STATIC_THOUSANDS_SEPARATOR
+template FMT_API internal::locale_ref::locale_ref(const std::locale& loc);
+template FMT_API std::locale internal::locale_ref::get<std::locale>() const;
+#endif
+
+// Explicit instantiations for char.
+
+template FMT_API char internal::thousands_sep_impl(locale_ref);
+template FMT_API char internal::decimal_point_impl(locale_ref);
+
+template FMT_API void internal::buffer<char>::append(const char*, const char*);
+
+template FMT_API void internal::arg_map<format_context>::init(
+    const basic_format_args<format_context>& args);
+
+template FMT_API std::string internal::vformat<char>(
+    string_view, basic_format_args<format_context>);
+
+template FMT_API format_context::iterator internal::vformat_to(
+    internal::buffer<char>&, string_view, basic_format_args<format_context>);
+
+template FMT_API char* internal::sprintf_format(double, internal::buffer<char>&,
+                                                sprintf_specs);
+template FMT_API char* internal::sprintf_format(long double,
+                                                internal::buffer<char>&,
+                                                sprintf_specs);
+
+// Explicit instantiations for wchar_t.
+
+template FMT_API wchar_t internal::thousands_sep_impl(locale_ref);
+template FMT_API wchar_t internal::decimal_point_impl(locale_ref);
+
+template FMT_API void internal::buffer<wchar_t>::append(const wchar_t*,
+                                                        const wchar_t*);
+
+template FMT_API void internal::arg_map<wformat_context>::init(
+    const basic_format_args<wformat_context>&);
+
+template FMT_API std::wstring internal::vformat<wchar_t>(
+    wstring_view, basic_format_args<wformat_context>);
+FMT_END_NAMESPACE
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format.h
new file mode 100644
index 000000000..dcf4a3998
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/format.h
@@ -0,0 +1,3602 @@
+/*
+ Formatting library for C++
+
+ Copyright (c) 2012 - present, Victor Zverovich
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ --- Optional exception to the license ---
+
+ As an exception, if, as a result of your compiling your source code, portions
+ of this Software are embedded into a machine-executable object form of such
+ source code, you may redistribute such embedded portions in such object form
+ without including the above copyright and permission notices.
+ */
+
+#ifndef FMT_FORMAT_H_
+#define FMT_FORMAT_H_
+
+#define FMT_HEADER_ONLY     // Added by diy for header-only usage
+
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <stdexcept>
+
+#include "core.h"
+
+#ifdef __clang__
+#  define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__)
+#else
+#  define FMT_CLANG_VERSION 0
+#endif
+
+#ifdef __INTEL_COMPILER
+#  define FMT_ICC_VERSION __INTEL_COMPILER
+#elif defined(__ICL)
+#  define FMT_ICC_VERSION __ICL
+#else
+#  define FMT_ICC_VERSION 0
+#endif
+
+#ifdef __NVCC__
+#  define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__)
+#else
+#  define FMT_CUDA_VERSION 0
+#endif
+
+#ifdef __has_builtin
+#  define FMT_HAS_BUILTIN(x) __has_builtin(x)
+#else
+#  define FMT_HAS_BUILTIN(x) 0
+#endif
+
+#ifndef FMT_THROW
+#  if FMT_EXCEPTIONS
+#    if FMT_MSC_VER
+FMT_BEGIN_NAMESPACE
+namespace internal {
+template <typename Exception> inline void do_throw(const Exception& x) {
+  // Silence unreachable code warnings in MSVC because these are nearly
+  // impossible to fix in a generic code.
+  volatile bool b = true;
+  if (b) throw x;
+}
+}  // namespace internal
+FMT_END_NAMESPACE
+#      define FMT_THROW(x) fmt::internal::do_throw(x)
+#    else
+#      define FMT_THROW(x) throw x
+#    endif
+#  else
+#    define FMT_THROW(x)              \
+      do {                            \
+        static_cast<void>(sizeof(x)); \
+        assert(false);                \
+      } while (false)
+#  endif
+#endif
+
+#ifndef FMT_USE_USER_DEFINED_LITERALS
+// For Intel and NVIDIA compilers both they and the system gcc/msc support UDLs.
+#  if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 ||      \
+       FMT_MSC_VER >= 1900) &&                                              \
+      (!(FMT_ICC_VERSION || FMT_CUDA_VERSION) || FMT_ICC_VERSION >= 1500 || \
+       FMT_CUDA_VERSION >= 700)
+#    define FMT_USE_USER_DEFINED_LITERALS 1
+#  else
+#    define FMT_USE_USER_DEFINED_LITERALS 0
+#  endif
+#endif
+
+#ifndef FMT_USE_UDL_TEMPLATE
+// EDG front end based compilers (icc, nvcc) do not support UDL templates yet
+// and GCC 9 warns about them.
+#  if FMT_USE_USER_DEFINED_LITERALS && FMT_ICC_VERSION == 0 && \
+      FMT_CUDA_VERSION == 0 &&                                 \
+      ((FMT_GCC_VERSION >= 600 && FMT_GCC_VERSION <= 900 &&    \
+        __cplusplus >= 201402L) ||                             \
+       FMT_CLANG_VERSION >= 304)
+#    define FMT_USE_UDL_TEMPLATE 1
+#  else
+#    define FMT_USE_UDL_TEMPLATE 0
+#  endif
+#endif
+
+#ifdef FMT_USE_INT128
+// Do nothing.
+#elif defined(__SIZEOF_INT128__)
+#  define FMT_USE_INT128 1
+#else
+#  define FMT_USE_INT128 0
+#endif
+
+// __builtin_clz is broken in clang with Microsoft CodeGen:
+// https://github.com/fmtlib/fmt/issues/519
+#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clz)) && !FMT_MSC_VER
+#  define FMT_BUILTIN_CLZ(n) __builtin_clz(n)
+#endif
+#if (FMT_GCC_VERSION || FMT_HAS_BUILTIN(__builtin_clzll)) && !FMT_MSC_VER
+#  define FMT_BUILTIN_CLZLL(n) __builtin_clzll(n)
+#endif
+
+// Some compilers masquerade as both MSVC and GCC-likes or otherwise support
+// __builtin_clz and __builtin_clzll, so only define FMT_BUILTIN_CLZ using the
+// MSVC intrinsics if the clz and clzll builtins are not available.
+#if FMT_MSC_VER && !defined(FMT_BUILTIN_CLZLL) && !defined(_MANAGED)
+#  include <intrin.h>  // _BitScanReverse, _BitScanReverse64
+
+FMT_BEGIN_NAMESPACE
+namespace internal {
+// Avoid Clang with Microsoft CodeGen's -Wunknown-pragmas warning.
+#  ifndef __clang__
+#    pragma intrinsic(_BitScanReverse)
+#  endif
+inline uint32_t clz(uint32_t x) {
+  unsigned long r = 0;
+  _BitScanReverse(&r, x);
+
+  assert(x != 0);
+  // Static analysis complains about using uninitialized data
+  // "r", but the only way that can happen is if "x" is 0,
+  // which the callers guarantee to not happen.
+#  pragma warning(suppress : 6102)
+  return 31 - r;
+}
+#  define FMT_BUILTIN_CLZ(n) fmt::internal::clz(n)
+
+#  if defined(_WIN64) && !defined(__clang__)
+#    pragma intrinsic(_BitScanReverse64)
+#  endif
+
+inline uint32_t clzll(uint64_t x) {
+  unsigned long r = 0;
+#  ifdef _WIN64
+  _BitScanReverse64(&r, x);
+#  else
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&r, static_cast<uint32_t>(x >> 32))) return 63 - (r + 32);
+
+  // Scan the low 32 bits.
+  _BitScanReverse(&r, static_cast<uint32_t>(x));
+#  endif
+
+  assert(x != 0);
+  // Static analysis complains about using uninitialized data
+  // "r", but the only way that can happen is if "x" is 0,
+  // which the callers guarantee to not happen.
+#  pragma warning(suppress : 6102)
+  return 63 - r;
+}
+#  define FMT_BUILTIN_CLZLL(n) fmt::internal::clzll(n)
+}  // namespace internal
+FMT_END_NAMESPACE
+#endif
+
+FMT_BEGIN_NAMESPACE
+namespace internal {
+
+// A fallback implementation of uintptr_t for systems that lack it.
+struct fallback_uintptr {
+  unsigned char value[sizeof(void*)];
+};
+#ifdef UINTPTR_MAX
+using uintptr_t = ::uintptr_t;
+#else
+using uintptr_t = fallback_uintptr;
+#endif
+
+// An equivalent of `*reinterpret_cast<Dest*>(&source)` that doesn't produce
+// undefined behavior (e.g. due to type aliasing).
+// Example: uint64_t d = bit_cast<uint64_t>(2.718);
+template <typename Dest, typename Source>
+inline Dest bit_cast(const Source& source) {
+  static_assert(sizeof(Dest) == sizeof(Source), "size mismatch");
+  Dest dest;
+  std::memcpy(&dest, &source, sizeof(dest));
+  return dest;
+}
+
+// An approximation of iterator_t for pre-C++20 systems.
+template <typename T>
+using iterator_t = decltype(std::begin(std::declval<T&>()));
+
+// Detect the iterator category of *any* given type in a SFINAE-friendly way.
+// Unfortunately, older implementations of std::iterator_traits are not safe
+// for use in a SFINAE-context.
+template <typename It, typename Enable = void>
+struct iterator_category : std::false_type {};
+
+template <typename T> struct iterator_category<T*> {
+  using type = std::random_access_iterator_tag;
+};
+
+template <typename It>
+struct iterator_category<It, void_t<typename It::iterator_category>> {
+  using type = typename It::iterator_category;
+};
+
+// Detect if *any* given type models the OutputIterator concept.
+template <typename It> class is_output_iterator {
+  // Check for mutability because all iterator categories derived from
+  // std::input_iterator_tag *may* also meet the requirements of an
+  // OutputIterator, thereby falling into the category of 'mutable iterators'
+  // [iterator.requirements.general] clause 4. The compiler reveals this
+  // property only at the point of *actually dereferencing* the iterator!
+  template <typename U>
+  static decltype(*(std::declval<U>())) test(std::input_iterator_tag);
+  template <typename U> static char& test(std::output_iterator_tag);
+  template <typename U> static const char& test(...);
+
+  using type = decltype(test<It>(typename iterator_category<It>::type{}));
+
+ public:
+  static const bool value = !std::is_const<remove_reference_t<type>>::value;
+};
+
+// A workaround for std::string not having mutable data() until C++17.
+template <typename Char> inline Char* get_data(std::basic_string<Char>& s) {
+  return &s[0];
+}
+template <typename Container>
+inline typename Container::value_type* get_data(Container& c) {
+  return c.data();
+}
+
+#ifdef _SECURE_SCL
+// Make a checked iterator to avoid MSVC warnings.
+template <typename T> using checked_ptr = stdext::checked_array_iterator<T*>;
+template <typename T> checked_ptr<T> make_checked(T* p, std::size_t size) {
+  return {p, size};
+}
+#else
+template <typename T> using checked_ptr = T*;
+template <typename T> inline T* make_checked(T* p, std::size_t) { return p; }
+#endif
+
+template <typename Container, FMT_ENABLE_IF(is_contiguous<Container>::value)>
+inline checked_ptr<typename Container::value_type> reserve(
+    std::back_insert_iterator<Container>& it, std::size_t n) {
+  Container& c = get_container(it);
+  std::size_t size = c.size();
+  c.resize(size + n);
+  return make_checked(get_data(c) + size, n);
+}
+
+template <typename Iterator>
+inline Iterator& reserve(Iterator& it, std::size_t) {
+  return it;
+}
+
+// An output iterator that counts the number of objects written to it and
+// discards them.
+template <typename T> class counting_iterator {
+ private:
+  std::size_t count_;
+  mutable T blackhole_;
+
+ public:
+  using iterator_category = std::output_iterator_tag;
+  using value_type = T;
+  using difference_type = std::ptrdiff_t;
+  using pointer = T*;
+  using reference = T&;
+  using _Unchecked_type = counting_iterator;  // Mark iterator as checked.
+
+  counting_iterator() : count_(0) {}
+
+  std::size_t count() const { return count_; }
+
+  counting_iterator& operator++() {
+    ++count_;
+    return *this;
+  }
+
+  counting_iterator operator++(int) {
+    auto it = *this;
+    ++*this;
+    return it;
+  }
+
+  T& operator*() const { return blackhole_; }
+};
+
+template <typename OutputIt> class truncating_iterator_base {
+ protected:
+  OutputIt out_;
+  std::size_t limit_;
+  std::size_t count_;
+
+  truncating_iterator_base(OutputIt out, std::size_t limit)
+      : out_(out), limit_(limit), count_(0) {}
+
+ public:
+  using iterator_category = std::output_iterator_tag;
+  using difference_type = void;
+  using pointer = void;
+  using reference = void;
+  using _Unchecked_type =
+      truncating_iterator_base;  // Mark iterator as checked.
+
+  OutputIt base() const { return out_; }
+  std::size_t count() const { return count_; }
+};
+
+// An output iterator that truncates the output and counts the number of objects
+// written to it.
+template <typename OutputIt,
+          typename Enable = typename std::is_void<
+              typename std::iterator_traits<OutputIt>::value_type>::type>
+class truncating_iterator;
+
+template <typename OutputIt>
+class truncating_iterator<OutputIt, std::false_type>
+    : public truncating_iterator_base<OutputIt> {
+  using traits = std::iterator_traits<OutputIt>;
+
+  mutable typename traits::value_type blackhole_;
+
+ public:
+  using value_type = typename traits::value_type;
+
+  truncating_iterator(OutputIt out, std::size_t limit)
+      : truncating_iterator_base<OutputIt>(out, limit) {}
+
+  truncating_iterator& operator++() {
+    if (this->count_++ < this->limit_) ++this->out_;
+    return *this;
+  }
+
+  truncating_iterator operator++(int) {
+    auto it = *this;
+    ++*this;
+    return it;
+  }
+
+  value_type& operator*() const {
+    return this->count_ < this->limit_ ? *this->out_ : blackhole_;
+  }
+};
+
+template <typename OutputIt>
+class truncating_iterator<OutputIt, std::true_type>
+    : public truncating_iterator_base<OutputIt> {
+ public:
+  using value_type = typename OutputIt::container_type::value_type;
+
+  truncating_iterator(OutputIt out, std::size_t limit)
+      : truncating_iterator_base<OutputIt>(out, limit) {}
+
+  truncating_iterator& operator=(value_type val) {
+    if (this->count_++ < this->limit_) this->out_ = val;
+    return *this;
+  }
+
+  truncating_iterator& operator++() { return *this; }
+  truncating_iterator& operator++(int) { return *this; }
+  truncating_iterator& operator*() { return *this; }
+};
+
+// A range with the specified output iterator and value type.
+template <typename OutputIt, typename T = typename OutputIt::value_type>
+class output_range {
+ private:
+  OutputIt it_;
+
+ public:
+  using value_type = T;
+  using iterator = OutputIt;
+  struct sentinel {};
+
+  explicit output_range(OutputIt it) : it_(it) {}
+  OutputIt begin() const { return it_; }
+  sentinel end() const { return {}; }  // Sentinel is not used yet.
+};
+
+// A range with an iterator appending to a buffer.
+template <typename T>
+class buffer_range
+    : public output_range<std::back_insert_iterator<buffer<T>>, T> {
+ public:
+  using iterator = std::back_insert_iterator<buffer<T>>;
+  using output_range<iterator, T>::output_range;
+  buffer_range(buffer<T>& buf)
+      : output_range<iterator, T>(std::back_inserter(buf)) {}
+};
+
+template <typename Char>
+inline size_t count_code_points(basic_string_view<Char> s) {
+  return s.size();
+}
+
+// Counts the number of code points in a UTF-8 string.
+inline size_t count_code_points(basic_string_view<char8_t> s) {
+  const char8_t* data = s.data();
+  size_t num_code_points = 0;
+  for (size_t i = 0, size = s.size(); i != size; ++i) {
+    if ((data[i] & 0xc0) != 0x80) ++num_code_points;
+  }
+  return num_code_points;
+}
+
+inline char8_t to_char8_t(char c) { return static_cast<char8_t>(c); }
+
+template <typename InputIt, typename OutChar>
+using needs_conversion = bool_constant<
+    std::is_same<typename std::iterator_traits<InputIt>::value_type,
+                 char>::value &&
+    std::is_same<OutChar, char8_t>::value>;
+
+template <typename OutChar, typename InputIt, typename OutputIt,
+          FMT_ENABLE_IF(!needs_conversion<InputIt, OutChar>::value)>
+OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) {
+  return std::copy(begin, end, it);
+}
+
+template <typename OutChar, typename InputIt, typename OutputIt,
+          FMT_ENABLE_IF(needs_conversion<InputIt, OutChar>::value)>
+OutputIt copy_str(InputIt begin, InputIt end, OutputIt it) {
+  return std::transform(begin, end, it, to_char8_t);
+}
+
+#ifndef FMT_USE_GRISU
+#  define FMT_USE_GRISU 0
+#endif
+
+template <typename T> constexpr bool use_grisu() {
+  return FMT_USE_GRISU && std::numeric_limits<double>::is_iec559 &&
+         sizeof(T) <= sizeof(double);
+}
+
+template <typename T>
+template <typename U>
+void buffer<T>::append(const U* begin, const U* end) {
+  std::size_t new_size = size_ + to_unsigned(end - begin);
+  reserve(new_size);
+  std::uninitialized_copy(begin, end, make_checked(ptr_, capacity_) + size_);
+  size_ = new_size;
+}
+}  // namespace internal
+
+// A UTF-8 string view.
+class u8string_view : public basic_string_view<char8_t> {
+ public:
+  u8string_view(const char* s)
+      : basic_string_view<char8_t>(reinterpret_cast<const char8_t*>(s)) {}
+  u8string_view(const char* s, size_t count) FMT_NOEXCEPT
+      : basic_string_view<char8_t>(reinterpret_cast<const char8_t*>(s), count) {
+  }
+};
+
+#if FMT_USE_USER_DEFINED_LITERALS
+inline namespace literals {
+inline u8string_view operator"" _u(const char* s, std::size_t n) {
+  return {s, n};
+}
+}  // namespace literals
+#endif
+
+// The number of characters to store in the basic_memory_buffer object itself
+// to avoid dynamic memory allocation.
+enum { inline_buffer_size = 500 };
+
+/**
+  \rst
+  A dynamically growing memory buffer for trivially copyable/constructible types
+  with the first ``SIZE`` elements stored in the object itself.
+
+  You can use one of the following type aliases for common character types:
+
+  +----------------+------------------------------+
+  | Type           | Definition                   |
+  +================+==============================+
+  | memory_buffer  | basic_memory_buffer<char>    |
+  +----------------+------------------------------+
+  | wmemory_buffer | basic_memory_buffer<wchar_t> |
+  +----------------+------------------------------+
+
+  **Example**::
+
+     fmt::memory_buffer out;
+     format_to(out, "The answer is {}.", 42);
+
+  This will append the following output to the ``out`` object:
+
+  .. code-block:: none
+
+     The answer is 42.
+
+  The output can be converted to an ``std::string`` with ``to_string(out)``.
+  \endrst
+ */
+template <typename T, std::size_t SIZE = inline_buffer_size,
+          typename Allocator = std::allocator<T>>
+class basic_memory_buffer : private Allocator, public internal::buffer<T> {
+ private:
+  T store_[SIZE];
+
+  // Deallocate memory allocated by the buffer.
+  void deallocate() {
+    T* data = this->data();
+    if (data != store_) Allocator::deallocate(data, this->capacity());
+  }
+
+ protected:
+  void grow(std::size_t size) FMT_OVERRIDE;
+
+ public:
+  using value_type = T;
+  using const_reference = const T&;
+
+  explicit basic_memory_buffer(const Allocator& alloc = Allocator())
+      : Allocator(alloc) {
+    this->set(store_, SIZE);
+  }
+  ~basic_memory_buffer() { deallocate(); }
+
+ private:
+  // Move data from other to this buffer.
+  void move(basic_memory_buffer& other) {
+    Allocator &this_alloc = *this, &other_alloc = other;
+    this_alloc = std::move(other_alloc);
+    T* data = other.data();
+    std::size_t size = other.size(), capacity = other.capacity();
+    if (data == other.store_) {
+      this->set(store_, capacity);
+      std::uninitialized_copy(other.store_, other.store_ + size,
+                              internal::make_checked(store_, capacity));
+    } else {
+      this->set(data, capacity);
+      // Set pointer to the inline array so that delete is not called
+      // when deallocating.
+      other.set(other.store_, 0);
+    }
+    this->resize(size);
+  }
+
+ public:
+  /**
+    \rst
+    Constructs a :class:`fmt::basic_memory_buffer` object moving the content
+    of the other object to it.
+    \endrst
+   */
+  basic_memory_buffer(basic_memory_buffer&& other) { move(other); }
+
+  /**
+    \rst
+    Moves the content of the other ``basic_memory_buffer`` object to this one.
+    \endrst
+   */
+  basic_memory_buffer& operator=(basic_memory_buffer&& other) {
+    assert(this != &other);
+    deallocate();
+    move(other);
+    return *this;
+  }
+
+  // Returns a copy of the allocator associated with this buffer.
+  Allocator get_allocator() const { return *this; }
+};
+
+template <typename T, std::size_t SIZE, typename Allocator>
+void basic_memory_buffer<T, SIZE, Allocator>::grow(std::size_t size) {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  if (size > 1000) throw std::runtime_error("fuzz mode - won't grow that much");
+#endif
+  std::size_t old_capacity = this->capacity();
+  std::size_t new_capacity = old_capacity + old_capacity / 2;
+  if (size > new_capacity) new_capacity = size;
+  T* old_data = this->data();
+  T* new_data = std::allocator_traits<Allocator>::allocate(*this, new_capacity);
+  // The following code doesn't throw, so the raw pointer above doesn't leak.
+  std::uninitialized_copy(old_data, old_data + this->size(),
+                          internal::make_checked(new_data, new_capacity));
+  this->set(new_data, new_capacity);
+  // deallocate must not throw according to the standard, but even if it does,
+  // the buffer already uses the new storage and will deallocate it in
+  // destructor.
+  if (old_data != store_) Allocator::deallocate(old_data, old_capacity);
+}
+
+using memory_buffer = basic_memory_buffer<char>;
+using wmemory_buffer = basic_memory_buffer<wchar_t>;
+
+/** A formatting error such as invalid format string. */
+class FMT_API format_error : public std::runtime_error {
+ public:
+  explicit format_error(const char* message) : std::runtime_error(message) {}
+  explicit format_error(const std::string& message)
+      : std::runtime_error(message) {}
+  ~format_error() FMT_NOEXCEPT;
+};
+
+namespace internal {
+
+// Returns true if value is negative, false otherwise.
+// Same as `value < 0` but doesn't produce warnings if T is an unsigned type.
+template <typename T, FMT_ENABLE_IF(std::numeric_limits<T>::is_signed)>
+FMT_CONSTEXPR bool is_negative(T value) {
+  return value < 0;
+}
+template <typename T, FMT_ENABLE_IF(!std::numeric_limits<T>::is_signed)>
+FMT_CONSTEXPR bool is_negative(T) {
+  return false;
+}
+
+// Smallest of uint32_t and uint64_t that is large enough to represent all
+// values of T.
+template <typename T>
+using uint32_or_64_t =
+    conditional_t<std::numeric_limits<T>::digits <= 32, uint32_t, uint64_t>;
+
+// Static data is placed in this class template for the header-only config.
+template <typename T = void> struct FMT_EXTERN_TEMPLATE_API basic_data {
+  static const uint64_t powers_of_10_64[];
+  static const uint32_t zero_or_powers_of_10_32[];
+  static const uint64_t zero_or_powers_of_10_64[];
+  static const uint64_t pow10_significands[];
+  static const int16_t pow10_exponents[];
+  static const char digits[];
+  static const char hex_digits[];
+  static const char foreground_color[];
+  static const char background_color[];
+  static const char reset_color[5];
+  static const wchar_t wreset_color[5];
+};
+
+FMT_EXTERN template struct basic_data<void>;
+
+// This is a struct rather than an alias to avoid shadowing warnings in gcc.
+struct data : basic_data<> {};
+
+#ifdef FMT_BUILTIN_CLZLL
+// Returns the number of decimal digits in n. Leading zeros are not counted
+// except for n == 0 in which case count_digits returns 1.
+inline int count_digits(uint64_t n) {
+  // Based on http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+  // and the benchmark https://github.com/localvoid/cxx-benchmark-count-digits.
+  int t = (64 - FMT_BUILTIN_CLZLL(n | 1)) * 1233 >> 12;
+  return t - (n < data::zero_or_powers_of_10_64[t]) + 1;
+}
+#else
+// Fallback version of count_digits used when __builtin_clz is not available.
+inline int count_digits(uint64_t n) {
+  int count = 1;
+  for (;;) {
+    // Integer division is slow so do it for a group of four digits instead
+    // of for every digit. The idea comes from the talk by Alexandrescu
+    // "Three Optimization Tips for C++". See speed-test for a comparison.
+    if (n < 10) return count;
+    if (n < 100) return count + 1;
+    if (n < 1000) return count + 2;
+    if (n < 10000) return count + 3;
+    n /= 10000u;
+    count += 4;
+  }
+}
+#endif
+
+// Counts the number of digits in n. BITS = log2(radix).
+template <unsigned BITS, typename UInt> inline int count_digits(UInt n) {
+  int num_digits = 0;
+  do {
+    ++num_digits;
+  } while ((n >>= BITS) != 0);
+  return num_digits;
+}
+
+template <> int count_digits<4>(internal::fallback_uintptr n);
+
+#if FMT_HAS_CPP_ATTRIBUTE(always_inline)
+#  define FMT_ALWAYS_INLINE __attribute__((always_inline))
+#else
+#  define FMT_ALWAYS_INLINE
+#endif
+
+template <typename Handler>
+inline char* lg(uint32_t n, Handler h) FMT_ALWAYS_INLINE;
+
+// Computes g = floor(log10(n)) and calls h.on<g>(n);
+template <typename Handler> inline char* lg(uint32_t n, Handler h) {
+  return n < 100 ? n < 10 ? h.template on<0>(n) : h.template on<1>(n)
+                 : n < 1000000
+                       ? n < 10000 ? n < 1000 ? h.template on<2>(n)
+                                              : h.template on<3>(n)
+                                   : n < 100000 ? h.template on<4>(n)
+                                                : h.template on<5>(n)
+                       : n < 100000000 ? n < 10000000 ? h.template on<6>(n)
+                                                      : h.template on<7>(n)
+                                       : n < 1000000000 ? h.template on<8>(n)
+                                                        : h.template on<9>(n);
+}
+
+// An lg handler that formats a decimal number.
+// Usage: lg(n, decimal_formatter(buffer));
+class decimal_formatter {
+ private:
+  char* buffer_;
+
+  void write_pair(unsigned N, uint32_t index) {
+    std::memcpy(buffer_ + N, data::digits + index * 2, 2);
+  }
+
+ public:
+  explicit decimal_formatter(char* buf) : buffer_(buf) {}
+
+  template <unsigned N> char* on(uint32_t u) {
+    if (N == 0) {
+      *buffer_ = static_cast<char>(u) + '0';
+    } else if (N == 1) {
+      write_pair(0, u);
+    } else {
+      // The idea of using 4.32 fixed-point numbers is based on
+      // https://github.com/jeaiii/itoa
+      unsigned n = N - 1;
+      unsigned a = n / 5 * n * 53 / 16;
+      uint64_t t =
+          ((1ULL << (32 + a)) / data::zero_or_powers_of_10_32[n] + 1 - n / 9);
+      t = ((t * u) >> a) + n / 5 * 4;
+      write_pair(0, t >> 32);
+      for (unsigned i = 2; i < N; i += 2) {
+        t = 100ULL * static_cast<uint32_t>(t);
+        write_pair(i, t >> 32);
+      }
+      if (N % 2 == 0) {
+        buffer_[N] =
+            static_cast<char>((10ULL * static_cast<uint32_t>(t)) >> 32) + '0';
+      }
+    }
+    return buffer_ += N + 1;
+  }
+};
+
+#ifdef FMT_BUILTIN_CLZ
+// Optional version of count_digits for better performance on 32-bit platforms.
+inline int count_digits(uint32_t n) {
+  int t = (32 - FMT_BUILTIN_CLZ(n | 1)) * 1233 >> 12;
+  return t - (n < data::zero_or_powers_of_10_32[t]) + 1;
+}
+#endif
+
+template <typename Char> FMT_API Char thousands_sep_impl(locale_ref loc);
+template <typename Char> inline Char thousands_sep(locale_ref loc) {
+  return Char(thousands_sep_impl<char>(loc));
+}
+template <> inline wchar_t thousands_sep(locale_ref loc) {
+  return thousands_sep_impl<wchar_t>(loc);
+}
+
+template <typename Char> FMT_API Char decimal_point_impl(locale_ref loc);
+template <typename Char> inline Char decimal_point(locale_ref loc) {
+  return Char(decimal_point_impl<char>(loc));
+}
+template <> inline wchar_t decimal_point(locale_ref loc) {
+  return decimal_point_impl<wchar_t>(loc);
+}
+
+// Formats a decimal unsigned integer value writing into buffer.
+// add_thousands_sep is called after writing each char to add a thousands
+// separator if necessary.
+template <typename UInt, typename Char, typename F>
+inline Char* format_decimal(Char* buffer, UInt value, int num_digits,
+                            F add_thousands_sep) {
+  FMT_ASSERT(num_digits >= 0, "invalid digit count");
+  buffer += num_digits;
+  Char* end = buffer;
+  while (value >= 100) {
+    // Integer division is slow so do it for a group of two digits instead
+    // of for every digit. The idea comes from the talk by Alexandrescu
+    // "Three Optimization Tips for C++". See speed-test for a comparison.
+    unsigned index = static_cast<unsigned>((value % 100) * 2);
+    value /= 100;
+    *--buffer = static_cast<Char>(data::digits[index + 1]);
+    add_thousands_sep(buffer);
+    *--buffer = static_cast<Char>(data::digits[index]);
+    add_thousands_sep(buffer);
+  }
+  if (value < 10) {
+    *--buffer = static_cast<Char>('0' + value);
+    return end;
+  }
+  unsigned index = static_cast<unsigned>(value * 2);
+  *--buffer = static_cast<Char>(data::digits[index + 1]);
+  add_thousands_sep(buffer);
+  *--buffer = static_cast<Char>(data::digits[index]);
+  return end;
+}
+
+template <typename Char, typename UInt, typename Iterator, typename F>
+inline Iterator format_decimal(Iterator out, UInt value, int num_digits,
+                               F add_thousands_sep) {
+  FMT_ASSERT(num_digits >= 0, "invalid digit count");
+  // Buffer should be large enough to hold all digits (<= digits10 + 1).
+  enum { max_size = std::numeric_limits<UInt>::digits10 + 1 };
+  Char buffer[max_size + max_size / 3];
+  auto end = format_decimal(buffer, value, num_digits, add_thousands_sep);
+  return internal::copy_str<Char>(buffer, end, out);
+}
+
+template <typename Char, typename It, typename UInt>
+inline It format_decimal(It out, UInt value, int num_digits) {
+  return format_decimal<Char>(out, value, num_digits, [](Char*) {});
+}
+
+template <unsigned BASE_BITS, typename Char, typename UInt>
+inline Char* format_uint(Char* buffer, UInt value, int num_digits,
+                         bool upper = false) {
+  buffer += num_digits;
+  Char* end = buffer;
+  do {
+    const char* digits = upper ? "0123456789ABCDEF" : data::hex_digits;
+    unsigned digit = (value & ((1 << BASE_BITS) - 1));
+    *--buffer = static_cast<Char>(BASE_BITS < 4 ? static_cast<char>('0' + digit)
+                                                : digits[digit]);
+  } while ((value >>= BASE_BITS) != 0);
+  return end;
+}
+
+template <unsigned BASE_BITS, typename Char>
+Char* format_uint(Char* buffer, internal::fallback_uintptr n, int num_digits,
+                  bool = false) {
+  auto char_digits = std::numeric_limits<unsigned char>::digits / 4;
+  int start = (num_digits + char_digits - 1) / char_digits - 1;
+  if (int start_digits = num_digits % char_digits) {
+    unsigned value = n.value[start--];
+    buffer = format_uint<BASE_BITS>(buffer, value, start_digits);
+  }
+  for (; start >= 0; --start) {
+    unsigned value = n.value[start];
+    buffer += char_digits;
+    auto p = buffer;
+    for (int i = 0; i < char_digits; ++i) {
+      unsigned digit = (value & ((1 << BASE_BITS) - 1));
+      *--p = static_cast<Char>(data::hex_digits[digit]);
+      value >>= BASE_BITS;
+    }
+  }
+  return buffer;
+}
+
+template <unsigned BASE_BITS, typename Char, typename It, typename UInt>
+inline It format_uint(It out, UInt value, int num_digits, bool upper = false) {
+  // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1).
+  char buffer[std::numeric_limits<UInt>::digits / BASE_BITS + 1];
+  format_uint<BASE_BITS>(buffer, value, num_digits, upper);
+  return internal::copy_str<Char>(buffer, buffer + num_digits, out);
+}
+
+#ifndef _WIN32
+#  define FMT_USE_WINDOWS_H 0
+#elif !defined(FMT_USE_WINDOWS_H)
+#  define FMT_USE_WINDOWS_H 1
+#endif
+
+// Define FMT_USE_WINDOWS_H to 0 to disable use of windows.h.
+// All the functionality that relies on it will be disabled too.
+#if FMT_USE_WINDOWS_H
+// A converter from UTF-8 to UTF-16.
+// It is only provided for Windows since other systems support UTF-8 natively.
+class utf8_to_utf16 {
+ private:
+  wmemory_buffer buffer_;
+
+ public:
+  FMT_API explicit utf8_to_utf16(string_view s);
+  operator wstring_view() const { return wstring_view(&buffer_[0], size()); }
+  size_t size() const { return buffer_.size() - 1; }
+  const wchar_t* c_str() const { return &buffer_[0]; }
+  std::wstring str() const { return std::wstring(&buffer_[0], size()); }
+};
+
+// A converter from UTF-16 to UTF-8.
+// It is only provided for Windows since other systems support UTF-8 natively.
+class utf16_to_utf8 {
+ private:
+  memory_buffer buffer_;
+
+ public:
+  utf16_to_utf8() {}
+  FMT_API explicit utf16_to_utf8(wstring_view s);
+  operator string_view() const { return string_view(&buffer_[0], size()); }
+  size_t size() const { return buffer_.size() - 1; }
+  const char* c_str() const { return &buffer_[0]; }
+  std::string str() const { return std::string(&buffer_[0], size()); }
+
+  // Performs conversion returning a system error code instead of
+  // throwing exception on conversion error. This method may still throw
+  // in case of memory allocation error.
+  FMT_API int convert(wstring_view s);
+};
+
+FMT_API void format_windows_error(fmt::internal::buffer<char>& out,
+                                  int error_code,
+                                  fmt::string_view message) FMT_NOEXCEPT;
+#endif
+
+template <typename T = void> struct null {};
+
+// Workaround an array initialization issue in gcc 4.8.
+template <typename Char> struct fill_t {
+ private:
+  Char data_[6];
+
+ public:
+  FMT_CONSTEXPR Char& operator[](size_t index) { return data_[index]; }
+  FMT_CONSTEXPR const Char& operator[](size_t index) const {
+    return data_[index];
+  }
+
+  static FMT_CONSTEXPR fill_t<Char> make() {
+    auto fill = fill_t<Char>();
+    fill[0] = Char(' ');
+    return fill;
+  }
+};
+}  // namespace internal
+
+// We cannot use enum classes as bit fields because of a gcc bug
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414.
+namespace align {
+enum type { none, left, right, center, numeric };
+}
+using align_t = align::type;
+
+namespace sign {
+enum type { none, minus, plus, space };
+}
+using sign_t = sign::type;
+
+// Format specifiers for built-in and string types.
+template <typename Char> struct basic_format_specs {
+  int width;
+  int precision;
+  char type;
+  align_t align : 4;
+  sign_t sign : 3;
+  bool alt : 1;  // Alternate form ('#').
+  internal::fill_t<Char> fill;
+
+  constexpr basic_format_specs()
+      : width(0),
+        precision(-1),
+        type(0),
+        align(align::none),
+        sign(sign::none),
+        alt(false),
+        fill(internal::fill_t<Char>::make()) {}
+};
+
+using format_specs = basic_format_specs<char>;
+
+namespace internal {
+
+// Writes the exponent exp in the form "[+-]d{2,3}" to buffer.
+template <typename Char, typename It> It write_exponent(int exp, It it) {
+  FMT_ASSERT(-1000 < exp && exp < 1000, "exponent out of range");
+  if (exp < 0) {
+    *it++ = static_cast<Char>('-');
+    exp = -exp;
+  } else {
+    *it++ = static_cast<Char>('+');
+  }
+  if (exp >= 100) {
+    *it++ = static_cast<Char>(static_cast<char>('0' + exp / 100));
+    exp %= 100;
+  }
+  const char* d = data::digits + exp * 2;
+  *it++ = static_cast<Char>(d[0]);
+  *it++ = static_cast<Char>(d[1]);
+  return it;
+}
+
+struct gen_digits_params {
+  int num_digits;
+  bool fixed;
+  bool upper;
+  bool trailing_zeros;
+};
+
+// The number is given as v = digits * pow(10, exp).
+template <typename Char, typename It>
+It grisu_prettify(const char* digits, int size, int exp, It it,
+                  gen_digits_params params, Char decimal_point) {
+  // pow(10, full_exp - 1) <= v <= pow(10, full_exp).
+  int full_exp = size + exp;
+  if (!params.fixed) {
+    // Insert a decimal point after the first digit and add an exponent.
+    *it++ = static_cast<Char>(*digits);
+    if (size > 1) *it++ = decimal_point;
+    exp += size - 1;
+    it = copy_str<Char>(digits + 1, digits + size, it);
+    if (size < params.num_digits)
+      it = std::fill_n(it, params.num_digits - size, static_cast<Char>('0'));
+    *it++ = static_cast<Char>(params.upper ? 'E' : 'e');
+    return write_exponent<Char>(exp, it);
+  }
+  if (size <= full_exp) {
+    // 1234e7 -> 12340000000[.0+]
+    it = copy_str<Char>(digits, digits + size, it);
+    it = std::fill_n(it, full_exp - size, static_cast<Char>('0'));
+    int num_zeros = (std::max)(params.num_digits - full_exp, 1);
+    if (params.trailing_zeros) {
+      *it++ = decimal_point;
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+      if (num_zeros > 1000)
+        throw std::runtime_error("fuzz mode - avoiding excessive cpu use");
+#endif
+      it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
+    }
+  } else if (full_exp > 0) {
+    // 1234e-2 -> 12.34[0+]
+    it = copy_str<Char>(digits, digits + full_exp, it);
+    if (!params.trailing_zeros) {
+      // Remove trailing zeros.
+      while (size > full_exp && digits[size - 1] == '0') --size;
+      if (size != full_exp) *it++ = decimal_point;
+      return copy_str<Char>(digits + full_exp, digits + size, it);
+    }
+    *it++ = decimal_point;
+    it = copy_str<Char>(digits + full_exp, digits + size, it);
+    if (params.num_digits > size) {
+      // Add trailing zeros.
+      int num_zeros = params.num_digits - size;
+      it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
+    }
+  } else {
+    // 1234e-6 -> 0.001234
+    *it++ = static_cast<Char>('0');
+    int num_zeros = -full_exp;
+    if (params.num_digits >= 0 && params.num_digits < num_zeros)
+      num_zeros = params.num_digits;
+    if (!params.trailing_zeros)
+      while (size > 0 && digits[size - 1] == '0') --size;
+    if (num_zeros != 0 || size != 0) {
+      *it++ = decimal_point;
+      it = std::fill_n(it, num_zeros, static_cast<Char>('0'));
+      it = copy_str<Char>(digits, digits + size, it);
+    }
+  }
+  return it;
+}
+
+namespace grisu_options {
+enum { fixed = 1, grisu3 = 2 };
+}
+
+// Formats value using the Grisu algorithm:
+// https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf
+template <typename Double, FMT_ENABLE_IF(sizeof(Double) == sizeof(uint64_t))>
+FMT_API bool grisu_format(Double, buffer<char>&, int, unsigned, int&);
+template <typename Double, FMT_ENABLE_IF(sizeof(Double) != sizeof(uint64_t))>
+inline bool grisu_format(Double, buffer<char>&, int, unsigned, int&) {
+  return false;
+}
+
+struct sprintf_specs {
+  int precision;
+  char type;
+  bool alt : 1;
+
+  template <typename Char>
+  constexpr sprintf_specs(basic_format_specs<Char> specs)
+      : precision(specs.precision), type(specs.type), alt(specs.alt) {}
+
+  constexpr bool has_precision() const { return precision >= 0; }
+};
+
+template <typename Double>
+char* sprintf_format(Double, internal::buffer<char>&, sprintf_specs);
+
+template <typename Handler>
+FMT_CONSTEXPR void handle_int_type_spec(char spec, Handler&& handler) {
+  switch (spec) {
+  case 0:
+  case 'd':
+    handler.on_dec();
+    break;
+  case 'x':
+  case 'X':
+    handler.on_hex();
+    break;
+  case 'b':
+  case 'B':
+    handler.on_bin();
+    break;
+  case 'o':
+    handler.on_oct();
+    break;
+  case 'n':
+    handler.on_num();
+    break;
+  default:
+    handler.on_error();
+  }
+}
+
+template <typename Handler>
+FMT_CONSTEXPR void handle_float_type_spec(char spec, Handler&& handler) {
+  switch (spec) {
+  case 0:
+  case 'g':
+  case 'G':
+    handler.on_general();
+    break;
+  case 'e':
+  case 'E':
+    handler.on_exp();
+    break;
+  case 'f':
+  case 'F':
+    handler.on_fixed();
+    break;
+  case '%':
+    handler.on_percent();
+    break;
+  case 'a':
+  case 'A':
+    handler.on_hex();
+    break;
+  case 'n':
+    handler.on_num();
+    break;
+  default:
+    handler.on_error();
+    break;
+  }
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR void handle_char_specs(const basic_format_specs<Char>* specs,
+                                     Handler&& handler) {
+  if (!specs) return handler.on_char();
+  if (specs->type && specs->type != 'c') return handler.on_int();
+  if (specs->align == align::numeric || specs->sign != sign::none || specs->alt)
+    handler.on_error("invalid format specifier for char");
+  handler.on_char();
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR void handle_cstring_type_spec(Char spec, Handler&& handler) {
+  if (spec == 0 || spec == 's')
+    handler.on_string();
+  else if (spec == 'p')
+    handler.on_pointer();
+  else
+    handler.on_error("invalid type specifier");
+}
+
+template <typename Char, typename ErrorHandler>
+FMT_CONSTEXPR void check_string_type_spec(Char spec, ErrorHandler&& eh) {
+  if (spec != 0 && spec != 's') eh.on_error("invalid type specifier");
+}
+
+template <typename Char, typename ErrorHandler>
+FMT_CONSTEXPR void check_pointer_type_spec(Char spec, ErrorHandler&& eh) {
+  if (spec != 0 && spec != 'p') eh.on_error("invalid type specifier");
+}
+
+template <typename ErrorHandler> class int_type_checker : private ErrorHandler {
+ public:
+  FMT_CONSTEXPR explicit int_type_checker(ErrorHandler eh) : ErrorHandler(eh) {}
+
+  FMT_CONSTEXPR void on_dec() {}
+  FMT_CONSTEXPR void on_hex() {}
+  FMT_CONSTEXPR void on_bin() {}
+  FMT_CONSTEXPR void on_oct() {}
+  FMT_CONSTEXPR void on_num() {}
+
+  FMT_CONSTEXPR void on_error() {
+    ErrorHandler::on_error("invalid type specifier");
+  }
+};
+
+template <typename ErrorHandler>
+class float_type_checker : private ErrorHandler {
+ public:
+  FMT_CONSTEXPR explicit float_type_checker(ErrorHandler eh)
+      : ErrorHandler(eh) {}
+
+  FMT_CONSTEXPR void on_general() {}
+  FMT_CONSTEXPR void on_exp() {}
+  FMT_CONSTEXPR void on_fixed() {}
+  FMT_CONSTEXPR void on_percent() {}
+  FMT_CONSTEXPR void on_hex() {}
+  FMT_CONSTEXPR void on_num() {}
+
+  FMT_CONSTEXPR void on_error() {
+    ErrorHandler::on_error("invalid type specifier");
+  }
+};
+
+template <typename ErrorHandler>
+class char_specs_checker : public ErrorHandler {
+ private:
+  char type_;
+
+ public:
+  FMT_CONSTEXPR char_specs_checker(char type, ErrorHandler eh)
+      : ErrorHandler(eh), type_(type) {}
+
+  FMT_CONSTEXPR void on_int() {
+    handle_int_type_spec(type_, int_type_checker<ErrorHandler>(*this));
+  }
+  FMT_CONSTEXPR void on_char() {}
+};
+
+template <typename ErrorHandler>
+class cstring_type_checker : public ErrorHandler {
+ public:
+  FMT_CONSTEXPR explicit cstring_type_checker(ErrorHandler eh)
+      : ErrorHandler(eh) {}
+
+  FMT_CONSTEXPR void on_string() {}
+  FMT_CONSTEXPR void on_pointer() {}
+};
+
+template <typename Context>
+void arg_map<Context>::init(const basic_format_args<Context>& args) {
+  if (map_) return;
+  map_ = new entry[internal::to_unsigned(args.max_size())];
+  if (args.is_packed()) {
+    for (int i = 0;; ++i) {
+      internal::type arg_type = args.type(i);
+      if (arg_type == internal::none_type) return;
+      if (arg_type == internal::named_arg_type) push_back(args.values_[i]);
+    }
+  }
+  for (int i = 0, n = args.max_size(); i < n; ++i) {
+    auto type = args.args_[i].type_;
+    if (type == internal::named_arg_type) push_back(args.args_[i].value_);
+  }
+}
+
+// This template provides operations for formatting and writing data into a
+// character range.
+template <typename Range> class basic_writer {
+ public:
+  using char_type = typename Range::value_type;
+  using iterator = typename Range::iterator;
+  using format_specs = basic_format_specs<char_type>;
+
+ private:
+  iterator out_;  // Output iterator.
+  internal::locale_ref locale_;
+
+  // Attempts to reserve space for n extra characters in the output range.
+  // Returns a pointer to the reserved range or a reference to out_.
+  auto reserve(std::size_t n) -> decltype(internal::reserve(out_, n)) {
+    return internal::reserve(out_, n);
+  }
+
+  template <typename F> struct padded_int_writer {
+    size_t size_;
+    string_view prefix;
+    char_type fill;
+    std::size_t padding;
+    F f;
+
+    size_t size() const { return size_; }
+    size_t width() const { return size_; }
+
+    template <typename It> void operator()(It&& it) const {
+      if (prefix.size() != 0)
+        it = internal::copy_str<char_type>(prefix.begin(), prefix.end(), it);
+      it = std::fill_n(it, padding, fill);
+      f(it);
+    }
+  };
+
+  // Writes an integer in the format
+  //   <left-padding><prefix><numeric-padding><digits><right-padding>
+  // where <digits> are written by f(it).
+  template <typename F>
+  void write_int(int num_digits, string_view prefix, format_specs specs, F f) {
+    std::size_t size = prefix.size() + internal::to_unsigned(num_digits);
+    char_type fill = specs.fill[0];
+    std::size_t padding = 0;
+    if (specs.align == align::numeric) {
+      auto unsiged_width = internal::to_unsigned(specs.width);
+      if (unsiged_width > size) {
+        padding = unsiged_width - size;
+        size = unsiged_width;
+      }
+    } else if (specs.precision > num_digits) {
+      size = prefix.size() + internal::to_unsigned(specs.precision);
+      padding = internal::to_unsigned(specs.precision - num_digits);
+      fill = static_cast<char_type>('0');
+    }
+    if (specs.align == align::none) specs.align = align::right;
+    write_padded(specs, padded_int_writer<F>{size, prefix, fill, padding, f});
+  }
+
+  // Writes a decimal integer.
+  template <typename Int> void write_decimal(Int value) {
+    auto abs_value = static_cast<uint32_or_64_t<Int>>(value);
+    bool is_negative = internal::is_negative(value);
+    if (is_negative) abs_value = 0 - abs_value;
+    int num_digits = internal::count_digits(abs_value);
+    auto&& it =
+        reserve((is_negative ? 1 : 0) + static_cast<size_t>(num_digits));
+    if (is_negative) *it++ = static_cast<char_type>('-');
+    it = internal::format_decimal<char_type>(it, abs_value, num_digits);
+  }
+
+  // The handle_int_type_spec handler that writes an integer.
+  template <typename Int, typename Specs> struct int_writer {
+    using unsigned_type = uint32_or_64_t<Int>;
+
+    basic_writer<Range>& writer;
+    const Specs& specs;
+    unsigned_type abs_value;
+    char prefix[4];
+    unsigned prefix_size;
+
+    string_view get_prefix() const { return string_view(prefix, prefix_size); }
+
+    int_writer(basic_writer<Range>& w, Int value, const Specs& s)
+        : writer(w),
+          specs(s),
+          abs_value(static_cast<unsigned_type>(value)),
+          prefix_size(0) {
+      if (internal::is_negative(value)) {
+        prefix[0] = '-';
+        ++prefix_size;
+        abs_value = 0 - abs_value;
+      } else if (specs.sign != sign::none && specs.sign != sign::minus) {
+        prefix[0] = specs.sign == sign::plus ? '+' : ' ';
+        ++prefix_size;
+      }
+    }
+
+    struct dec_writer {
+      unsigned_type abs_value;
+      int num_digits;
+
+      template <typename It> void operator()(It&& it) const {
+        it = internal::format_decimal<char_type>(it, abs_value, num_digits);
+      }
+    };
+
+    void on_dec() {
+      int num_digits = internal::count_digits(abs_value);
+      writer.write_int(num_digits, get_prefix(), specs,
+                       dec_writer{abs_value, num_digits});
+    }
+
+    struct hex_writer {
+      int_writer& self;
+      int num_digits;
+
+      template <typename It> void operator()(It&& it) const {
+        it = internal::format_uint<4, char_type>(it, self.abs_value, num_digits,
+                                                 self.specs.type != 'x');
+      }
+    };
+
+    void on_hex() {
+      if (specs.alt) {
+        prefix[prefix_size++] = '0';
+        prefix[prefix_size++] = specs.type;
+      }
+      int num_digits = internal::count_digits<4>(abs_value);
+      writer.write_int(num_digits, get_prefix(), specs,
+                       hex_writer{*this, num_digits});
+    }
+
+    template <int BITS> struct bin_writer {
+      unsigned_type abs_value;
+      int num_digits;
+
+      template <typename It> void operator()(It&& it) const {
+        it = internal::format_uint<BITS, char_type>(it, abs_value, num_digits);
+      }
+    };
+
+    void on_bin() {
+      if (specs.alt) {
+        prefix[prefix_size++] = '0';
+        prefix[prefix_size++] = static_cast<char>(specs.type);
+      }
+      int num_digits = internal::count_digits<1>(abs_value);
+      writer.write_int(num_digits, get_prefix(), specs,
+                       bin_writer<1>{abs_value, num_digits});
+    }
+
+    void on_oct() {
+      int num_digits = internal::count_digits<3>(abs_value);
+      if (specs.alt && specs.precision <= num_digits) {
+        // Octal prefix '0' is counted as a digit, so only add it if precision
+        // is not greater than the number of digits.
+        prefix[prefix_size++] = '0';
+      }
+      writer.write_int(num_digits, get_prefix(), specs,
+                       bin_writer<3>{abs_value, num_digits});
+    }
+
+    enum { sep_size = 1 };
+
+    struct num_writer {
+      unsigned_type abs_value;
+      int size;
+      char_type sep;
+
+      template <typename It> void operator()(It&& it) const {
+        basic_string_view<char_type> s(&sep, sep_size);
+        // Index of a decimal digit with the least significant digit having
+        // index 0.
+        unsigned digit_index = 0;
+        it = internal::format_decimal<char_type>(
+            it, abs_value, size, [s, &digit_index](char_type*& buffer) {
+              if (++digit_index % 3 != 0) return;
+              buffer -= s.size();
+              std::uninitialized_copy(s.data(), s.data() + s.size(),
+                                      internal::make_checked(buffer, s.size()));
+            });
+      }
+    };
+
+    void on_num() {
+      char_type sep = internal::thousands_sep<char_type>(writer.locale_);
+      if (!sep) return on_dec();
+      int num_digits = internal::count_digits(abs_value);
+      int size = num_digits + sep_size * ((num_digits - 1) / 3);
+      writer.write_int(size, get_prefix(), specs,
+                       num_writer{abs_value, size, sep});
+    }
+
+    FMT_NORETURN void on_error() {
+      FMT_THROW(format_error("invalid type specifier"));
+    }
+  };
+
+  enum { inf_size = 3 };  // This is an enum to workaround a bug in MSVC.
+
+  struct inf_or_nan_writer {
+    char sign;
+    bool as_percentage;
+    const char* str;
+
+    size_t size() const {
+      return static_cast<std::size_t>(inf_size + (sign ? 1 : 0) +
+                                      (as_percentage ? 1 : 0));
+    }
+    size_t width() const { return size(); }
+
+    template <typename It> void operator()(It&& it) const {
+      if (sign) *it++ = static_cast<char_type>(sign);
+      it = internal::copy_str<char_type>(
+          str, str + static_cast<std::size_t>(inf_size), it);
+      if (as_percentage) *it++ = static_cast<char_type>('%');
+    }
+  };
+
+  struct double_writer {
+    char sign;
+    internal::buffer<char>& buffer;
+    char* decimal_point_pos;
+    char_type decimal_point;
+
+    size_t size() const { return buffer.size() + (sign ? 1 : 0); }
+    size_t width() const { return size(); }
+
+    template <typename It> void operator()(It&& it) {
+      if (sign) *it++ = static_cast<char_type>(sign);
+      auto begin = buffer.begin();
+      if (decimal_point_pos) {
+        it = internal::copy_str<char_type>(begin, decimal_point_pos, it);
+        *it++ = decimal_point;
+        begin = decimal_point_pos + 1;
+      }
+      it = internal::copy_str<char_type>(begin, buffer.end(), it);
+    }
+  };
+
+  class grisu_writer {
+   private:
+    internal::buffer<char>& digits_;
+    size_t size_;
+    char sign_;
+    int exp_;
+    internal::gen_digits_params params_;
+    char_type decimal_point_;
+
+   public:
+    grisu_writer(char sign, internal::buffer<char>& digits, int exp,
+                 const internal::gen_digits_params& params,
+                 char_type decimal_point)
+        : digits_(digits),
+          sign_(sign),
+          exp_(exp),
+          params_(params),
+          decimal_point_(decimal_point) {
+      int num_digits = static_cast<int>(digits.size());
+      int full_exp = num_digits + exp - 1;
+      int precision = params.num_digits > 0 ? params.num_digits : 11;
+      params_.fixed |= full_exp >= -4 && full_exp < precision;
+      auto it = internal::grisu_prettify<char>(
+          digits.data(), num_digits, exp, internal::counting_iterator<char>(),
+          params_, '.');
+      size_ = it.count();
+    }
+
+    size_t size() const { return size_ + (sign_ ? 1 : 0); }
+    size_t width() const { return size(); }
+
+    template <typename It> void operator()(It&& it) {
+      if (sign_) *it++ = static_cast<char_type>(sign_);
+      int num_digits = static_cast<int>(digits_.size());
+      it = internal::grisu_prettify<char_type>(digits_.data(), num_digits, exp_,
+                                               it, params_, decimal_point_);
+    }
+  };
+
+  template <typename Char> struct str_writer {
+    const Char* s;
+    size_t size_;
+
+    size_t size() const { return size_; }
+    size_t width() const {
+      return internal::count_code_points(basic_string_view<Char>(s, size_));
+    }
+
+    template <typename It> void operator()(It&& it) const {
+      it = internal::copy_str<char_type>(s, s + size_, it);
+    }
+  };
+
+  template <typename UIntPtr> struct pointer_writer {
+    UIntPtr value;
+    int num_digits;
+
+    size_t size() const { return to_unsigned(num_digits) + 2; }
+    size_t width() const { return size(); }
+
+    template <typename It> void operator()(It&& it) const {
+      *it++ = static_cast<char_type>('0');
+      *it++ = static_cast<char_type>('x');
+      it = internal::format_uint<4, char_type>(it, value, num_digits);
+    }
+  };
+
+ public:
+  /** Constructs a ``basic_writer`` object. */
+  explicit basic_writer(Range out,
+                        internal::locale_ref loc = internal::locale_ref())
+      : out_(out.begin()), locale_(loc) {}
+
+  iterator out() const { return out_; }
+
+  // Writes a value in the format
+  //   <left-padding><value><right-padding>
+  // where <value> is written by f(it).
+  template <typename F> void write_padded(const format_specs& specs, F&& f) {
+    // User-perceived width (in code points).
+    unsigned width = to_unsigned(specs.width);
+    size_t size = f.size();  // The number of code units.
+    size_t num_code_points = width != 0 ? f.width() : size;
+    if (width <= num_code_points) return f(reserve(size));
+    auto&& it = reserve(width + (size - num_code_points));
+    char_type fill = specs.fill[0];
+    std::size_t padding = width - num_code_points;
+    if (specs.align == align::right) {
+      it = std::fill_n(it, padding, fill);
+      f(it);
+    } else if (specs.align == align::center) {
+      std::size_t left_padding = padding / 2;
+      it = std::fill_n(it, left_padding, fill);
+      f(it);
+      it = std::fill_n(it, padding - left_padding, fill);
+    } else {
+      f(it);
+      it = std::fill_n(it, padding, fill);
+    }
+  }
+
+  void write(int value) { write_decimal(value); }
+  void write(long value) { write_decimal(value); }
+  void write(long long value) { write_decimal(value); }
+
+  void write(unsigned value) { write_decimal(value); }
+  void write(unsigned long value) { write_decimal(value); }
+  void write(unsigned long long value) { write_decimal(value); }
+
+  // Writes a formatted integer.
+  template <typename T, typename Spec>
+  void write_int(T value, const Spec& spec) {
+    internal::handle_int_type_spec(spec.type,
+                                   int_writer<T, Spec>(*this, value, spec));
+  }
+
+  void write(double value, const format_specs& specs = format_specs()) {
+    write_double(value, specs);
+  }
+
+  /**
+    \rst
+    Formats *value* using the general format for floating-point numbers
+    (``'g'``) and writes it to the buffer.
+    \endrst
+   */
+  void write(long double value, const format_specs& specs = format_specs()) {
+    write_double(value, specs);
+  }
+
+  // Formats a floating-point number (double or long double).
+  template <typename T, bool USE_GRISU = fmt::internal::use_grisu<T>()>
+  void write_double(T value, const format_specs& specs);
+
+  /** Writes a character to the buffer. */
+  void write(char value) {
+    auto&& it = reserve(1);
+    *it++ = value;
+  }
+
+  template <typename Char, FMT_ENABLE_IF(std::is_same<Char, char_type>::value)>
+  void write(Char value) {
+    auto&& it = reserve(1);
+    *it++ = value;
+  }
+
+  /**
+    \rst
+    Writes *value* to the buffer.
+    \endrst
+   */
+  void write(string_view value) {
+    auto&& it = reserve(value.size());
+    it = internal::copy_str<char_type>(value.begin(), value.end(), it);
+  }
+  void write(wstring_view value) {
+    static_assert(std::is_same<char_type, wchar_t>::value, "");
+    auto&& it = reserve(value.size());
+    it = std::copy(value.begin(), value.end(), it);
+  }
+
+  // Writes a formatted string.
+  template <typename Char>
+  void write(const Char* s, std::size_t size, const format_specs& specs) {
+    write_padded(specs, str_writer<Char>{s, size});
+  }
+
+  template <typename Char>
+  void write(basic_string_view<Char> s,
+             const format_specs& specs = format_specs()) {
+    const Char* data = s.data();
+    std::size_t size = s.size();
+    if (specs.precision >= 0 && internal::to_unsigned(specs.precision) < size)
+      size = internal::to_unsigned(specs.precision);
+    write(data, size, specs);
+  }
+
+  template <typename UIntPtr>
+  void write_pointer(UIntPtr value, const format_specs* specs) {
+    int num_digits = internal::count_digits<4>(value);
+    auto pw = pointer_writer<UIntPtr>{value, num_digits};
+    if (!specs) return pw(reserve(to_unsigned(num_digits) + 2));
+    format_specs specs_copy = *specs;
+    if (specs_copy.align == align::none) specs_copy.align = align::right;
+    write_padded(specs_copy, pw);
+  }
+};
+
+using writer = basic_writer<buffer_range<char>>;
+
+template <typename Range, typename ErrorHandler = internal::error_handler>
+class arg_formatter_base {
+ public:
+  using char_type = typename Range::value_type;
+  using iterator = typename Range::iterator;
+  using format_specs = basic_format_specs<char_type>;
+
+ private:
+  using writer_type = basic_writer<Range>;
+  writer_type writer_;
+  format_specs* specs_;
+
+  struct char_writer {
+    char_type value;
+
+    size_t size() const { return 1; }
+    size_t width() const { return 1; }
+
+    template <typename It> void operator()(It&& it) const { *it++ = value; }
+  };
+
+  void write_char(char_type value) {
+    if (specs_)
+      writer_.write_padded(*specs_, char_writer{value});
+    else
+      writer_.write(value);
+  }
+
+  void write_pointer(const void* p) {
+    writer_.write_pointer(internal::bit_cast<internal::uintptr_t>(p), specs_);
+  }
+
+ protected:
+  writer_type& writer() { return writer_; }
+  FMT_DEPRECATED format_specs* spec() { return specs_; }
+  format_specs* specs() { return specs_; }
+  iterator out() { return writer_.out(); }
+
+  void write(bool value) {
+    string_view sv(value ? "true" : "false");
+    specs_ ? writer_.write(sv, *specs_) : writer_.write(sv);
+  }
+
+  void write(const char_type* value) {
+    if (!value) {
+      FMT_THROW(format_error("string pointer is null"));
+    } else {
+      auto length = std::char_traits<char_type>::length(value);
+      basic_string_view<char_type> sv(value, length);
+      specs_ ? writer_.write(sv, *specs_) : writer_.write(sv);
+    }
+  }
+
+ public:
+  arg_formatter_base(Range r, format_specs* s, locale_ref loc)
+      : writer_(r, loc), specs_(s) {}
+
+  iterator operator()(monostate) {
+    FMT_ASSERT(false, "invalid argument type");
+    return out();
+  }
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  iterator operator()(T value) {
+    if (specs_)
+      writer_.write_int(value, *specs_);
+    else
+      writer_.write(value);
+    return out();
+  }
+
+  iterator operator()(char_type value) {
+    internal::handle_char_specs(
+        specs_, char_spec_handler(*this, static_cast<char_type>(value)));
+    return out();
+  }
+
+  iterator operator()(bool value) {
+    if (specs_ && specs_->type) return (*this)(value ? 1 : 0);
+    write(value != 0);
+    return out();
+  }
+
+  template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+  iterator operator()(T value) {
+    writer_.write_double(value, specs_ ? *specs_ : format_specs());
+    return out();
+  }
+
+  struct char_spec_handler : ErrorHandler {
+    arg_formatter_base& formatter;
+    char_type value;
+
+    char_spec_handler(arg_formatter_base& f, char_type val)
+        : formatter(f), value(val) {}
+
+    void on_int() {
+      if (formatter.specs_)
+        formatter.writer_.write_int(value, *formatter.specs_);
+      else
+        formatter.writer_.write(value);
+    }
+    void on_char() { formatter.write_char(value); }
+  };
+
+  struct cstring_spec_handler : internal::error_handler {
+    arg_formatter_base& formatter;
+    const char_type* value;
+
+    cstring_spec_handler(arg_formatter_base& f, const char_type* val)
+        : formatter(f), value(val) {}
+
+    void on_string() { formatter.write(value); }
+    void on_pointer() { formatter.write_pointer(value); }
+  };
+
+  iterator operator()(const char_type* value) {
+    if (!specs_) return write(value), out();
+    internal::handle_cstring_type_spec(specs_->type,
+                                       cstring_spec_handler(*this, value));
+    return out();
+  }
+
+  iterator operator()(basic_string_view<char_type> value) {
+    if (specs_) {
+      internal::check_string_type_spec(specs_->type, internal::error_handler());
+      writer_.write(value, *specs_);
+    } else {
+      writer_.write(value);
+    }
+    return out();
+  }
+
+  iterator operator()(const void* value) {
+    if (specs_)
+      check_pointer_type_spec(specs_->type, internal::error_handler());
+    write_pointer(value);
+    return out();
+  }
+};
+
+template <typename Char> FMT_CONSTEXPR bool is_name_start(Char c) {
+  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c;
+}
+
+// Parses the range [begin, end) as an unsigned integer. This function assumes
+// that the range is non-empty and the first character is a digit.
+template <typename Char, typename ErrorHandler>
+FMT_CONSTEXPR int parse_nonnegative_int(const Char*& begin, const Char* end,
+                                        ErrorHandler&& eh) {
+  assert(begin != end && '0' <= *begin && *begin <= '9');
+  if (*begin == '0') {
+    ++begin;
+    return 0;
+  }
+  unsigned value = 0;
+  // Convert to unsigned to prevent a warning.
+  constexpr unsigned max_int = (std::numeric_limits<int>::max)();
+  unsigned big = max_int / 10;
+  do {
+    // Check for overflow.
+    if (value > big) {
+      value = max_int + 1;
+      break;
+    }
+    value = value * 10 + unsigned(*begin - '0');
+    ++begin;
+  } while (begin != end && '0' <= *begin && *begin <= '9');
+  if (value > max_int) eh.on_error("number is too big");
+  return static_cast<int>(value);
+}
+
+template <typename Context> class custom_formatter {
+ private:
+  using char_type = typename Context::char_type;
+
+  basic_parse_context<char_type>& parse_ctx_;
+  Context& ctx_;
+
+ public:
+  explicit custom_formatter(basic_parse_context<char_type>& parse_ctx,
+                            Context& ctx)
+      : parse_ctx_(parse_ctx), ctx_(ctx) {}
+
+  bool operator()(typename basic_format_arg<Context>::handle h) const {
+    h.format(parse_ctx_, ctx_);
+    return true;
+  }
+
+  template <typename T> bool operator()(T) const { return false; }
+};
+
+template <typename T>
+using is_integer =
+    bool_constant<std::is_integral<T>::value && !std::is_same<T, bool>::value &&
+                  !std::is_same<T, char>::value &&
+                  !std::is_same<T, wchar_t>::value>;
+
+template <typename ErrorHandler> class width_checker {
+ public:
+  explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {}
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  FMT_CONSTEXPR unsigned long long operator()(T value) {
+    if (is_negative(value)) handler_.on_error("negative width");
+    return static_cast<unsigned long long>(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  FMT_CONSTEXPR unsigned long long operator()(T) {
+    handler_.on_error("width is not integer");
+    return 0;
+  }
+
+ private:
+  ErrorHandler& handler_;
+};
+
+template <typename ErrorHandler> class precision_checker {
+ public:
+  explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {}
+
+  template <typename T, FMT_ENABLE_IF(is_integer<T>::value)>
+  FMT_CONSTEXPR unsigned long long operator()(T value) {
+    if (is_negative(value)) handler_.on_error("negative precision");
+    return static_cast<unsigned long long>(value);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!is_integer<T>::value)>
+  FMT_CONSTEXPR unsigned long long operator()(T) {
+    handler_.on_error("precision is not integer");
+    return 0;
+  }
+
+ private:
+  ErrorHandler& handler_;
+};
+
+// A format specifier handler that sets fields in basic_format_specs.
+template <typename Char> class specs_setter {
+ public:
+  explicit FMT_CONSTEXPR specs_setter(basic_format_specs<Char>& specs)
+      : specs_(specs) {}
+
+  FMT_CONSTEXPR specs_setter(const specs_setter& other)
+      : specs_(other.specs_) {}
+
+  FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; }
+  FMT_CONSTEXPR void on_fill(Char fill) { specs_.fill[0] = fill; }
+  FMT_CONSTEXPR void on_plus() { specs_.sign = sign::plus; }
+  FMT_CONSTEXPR void on_minus() { specs_.sign = sign::minus; }
+  FMT_CONSTEXPR void on_space() { specs_.sign = sign::space; }
+  FMT_CONSTEXPR void on_hash() { specs_.alt = true; }
+
+  FMT_CONSTEXPR void on_zero() {
+    specs_.align = align::numeric;
+    specs_.fill[0] = Char('0');
+  }
+
+  FMT_CONSTEXPR void on_width(int width) { specs_.width = width; }
+  FMT_CONSTEXPR void on_precision(int precision) {
+    specs_.precision = precision;
+  }
+  FMT_CONSTEXPR void end_precision() {}
+
+  FMT_CONSTEXPR void on_type(Char type) {
+    specs_.type = static_cast<char>(type);
+  }
+
+ protected:
+  basic_format_specs<Char>& specs_;
+};
+
+template <typename ErrorHandler> class numeric_specs_checker {
+ public:
+  FMT_CONSTEXPR numeric_specs_checker(ErrorHandler& eh, internal::type arg_type)
+      : error_handler_(eh), arg_type_(arg_type) {}
+
+  FMT_CONSTEXPR void require_numeric_argument() {
+    if (!is_arithmetic(arg_type_))
+      error_handler_.on_error("format specifier requires numeric argument");
+  }
+
+  FMT_CONSTEXPR void check_sign() {
+    require_numeric_argument();
+    if (is_integral(arg_type_) && arg_type_ != int_type &&
+        arg_type_ != long_long_type && arg_type_ != internal::char_type) {
+      error_handler_.on_error("format specifier requires signed argument");
+    }
+  }
+
+  FMT_CONSTEXPR void check_precision() {
+    if (is_integral(arg_type_) || arg_type_ == internal::pointer_type)
+      error_handler_.on_error("precision not allowed for this argument type");
+  }
+
+ private:
+  ErrorHandler& error_handler_;
+  internal::type arg_type_;
+};
+
+// A format specifier handler that checks if specifiers are consistent with the
+// argument type.
+template <typename Handler> class specs_checker : public Handler {
+ public:
+  FMT_CONSTEXPR specs_checker(const Handler& handler, internal::type arg_type)
+      : Handler(handler), checker_(*this, arg_type) {}
+
+  FMT_CONSTEXPR specs_checker(const specs_checker& other)
+      : Handler(other), checker_(*this, other.arg_type_) {}
+
+  FMT_CONSTEXPR void on_align(align_t align) {
+    if (align == align::numeric) checker_.require_numeric_argument();
+    Handler::on_align(align);
+  }
+
+  FMT_CONSTEXPR void on_plus() {
+    checker_.check_sign();
+    Handler::on_plus();
+  }
+
+  FMT_CONSTEXPR void on_minus() {
+    checker_.check_sign();
+    Handler::on_minus();
+  }
+
+  FMT_CONSTEXPR void on_space() {
+    checker_.check_sign();
+    Handler::on_space();
+  }
+
+  FMT_CONSTEXPR void on_hash() {
+    checker_.require_numeric_argument();
+    Handler::on_hash();
+  }
+
+  FMT_CONSTEXPR void on_zero() {
+    checker_.require_numeric_argument();
+    Handler::on_zero();
+  }
+
+  FMT_CONSTEXPR void end_precision() { checker_.check_precision(); }
+
+ private:
+  numeric_specs_checker<Handler> checker_;
+};
+
+template <template <typename> class Handler, typename T, typename FormatArg,
+          typename ErrorHandler>
+FMT_CONSTEXPR void set_dynamic_spec(T& value, FormatArg arg, ErrorHandler eh) {
+  unsigned long long big_value =
+      visit_format_arg(Handler<ErrorHandler>(eh), arg);
+  if (big_value > to_unsigned((std::numeric_limits<int>::max)()))
+    eh.on_error("number is too big");
+  value = static_cast<T>(big_value);
+}
+
+struct auto_id {};
+
+template <typename Context>
+FMT_CONSTEXPR typename Context::format_arg get_arg(Context& ctx, int id) {
+  auto arg = ctx.arg(id);
+  if (!arg) ctx.on_error("argument index out of range");
+  return arg;
+}
+
+// The standard format specifier handler with checking.
+template <typename ParseContext, typename Context>
+class specs_handler : public specs_setter<typename Context::char_type> {
+ public:
+  using char_type = typename Context::char_type;
+
+  FMT_CONSTEXPR specs_handler(basic_format_specs<char_type>& specs,
+                              ParseContext& parse_ctx, Context& ctx)
+      : specs_setter<char_type>(specs),
+        parse_context_(parse_ctx),
+        context_(ctx) {}
+
+  template <typename Id> FMT_CONSTEXPR void on_dynamic_width(Id arg_id) {
+    set_dynamic_spec<width_checker>(this->specs_.width, get_arg(arg_id),
+                                    context_.error_handler());
+  }
+
+  template <typename Id> FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) {
+    set_dynamic_spec<precision_checker>(this->specs_.precision, get_arg(arg_id),
+                                        context_.error_handler());
+  }
+
+  void on_error(const char* message) { context_.on_error(message); }
+
+ private:
+  // This is only needed for compatibility with gcc 4.4.
+  using format_arg = typename Context::format_arg;
+
+  FMT_CONSTEXPR format_arg get_arg(auto_id) {
+    return internal::get_arg(context_, parse_context_.next_arg_id());
+  }
+
+  FMT_CONSTEXPR format_arg get_arg(int arg_id) {
+    parse_context_.check_arg_id(arg_id);
+    return internal::get_arg(context_, arg_id);
+  }
+
+  FMT_CONSTEXPR format_arg get_arg(basic_string_view<char_type> arg_id) {
+    parse_context_.check_arg_id(arg_id);
+    return context_.arg(arg_id);
+  }
+
+  ParseContext& parse_context_;
+  Context& context_;
+};
+
+struct string_view_metadata {
+  FMT_CONSTEXPR string_view_metadata() : offset_(0u), size_(0u) {}
+  template <typename Char>
+  FMT_CONSTEXPR string_view_metadata(basic_string_view<Char> primary_string,
+                                     basic_string_view<Char> view)
+      : offset_(to_unsigned(view.data() - primary_string.data())),
+        size_(view.size()) {}
+  FMT_CONSTEXPR string_view_metadata(std::size_t offset, std::size_t size)
+      : offset_(offset), size_(size) {}
+  template <typename Char>
+  FMT_CONSTEXPR basic_string_view<Char> to_view(const Char* str) const {
+    return {str + offset_, size_};
+  }
+
+  std::size_t offset_;
+  std::size_t size_;
+};
+
+enum class arg_id_kind { none, index, name };
+
+// An argument reference.
+template <typename Char> struct arg_ref {
+  FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {}
+  FMT_CONSTEXPR explicit arg_ref(int index)
+      : kind(arg_id_kind::index), val(index) {}
+  FMT_CONSTEXPR explicit arg_ref(string_view_metadata name)
+      : kind(arg_id_kind::name), val(name) {}
+
+  FMT_CONSTEXPR arg_ref& operator=(int idx) {
+    kind = arg_id_kind::index;
+    val.index = idx;
+    return *this;
+  }
+
+  arg_id_kind kind;
+  union value {
+    FMT_CONSTEXPR value() : index(0u) {}
+    FMT_CONSTEXPR value(int id) : index(id) {}
+    FMT_CONSTEXPR value(string_view_metadata n) : name(n) {}
+
+    int index;
+    string_view_metadata name;
+  } val;
+};
+
+// Format specifiers with width and precision resolved at formatting rather
+// than parsing time to allow re-using the same parsed specifiers with
+// different sets of arguments (precompilation of format strings).
+template <typename Char>
+struct dynamic_format_specs : basic_format_specs<Char> {
+  arg_ref<Char> width_ref;
+  arg_ref<Char> precision_ref;
+};
+
+// Format spec handler that saves references to arguments representing dynamic
+// width and precision to be resolved at formatting time.
+template <typename ParseContext>
+class dynamic_specs_handler
+    : public specs_setter<typename ParseContext::char_type> {
+ public:
+  using char_type = typename ParseContext::char_type;
+
+  FMT_CONSTEXPR dynamic_specs_handler(dynamic_format_specs<char_type>& specs,
+                                      ParseContext& ctx)
+      : specs_setter<char_type>(specs), specs_(specs), context_(ctx) {}
+
+  FMT_CONSTEXPR dynamic_specs_handler(const dynamic_specs_handler& other)
+      : specs_setter<char_type>(other),
+        specs_(other.specs_),
+        context_(other.context_) {}
+
+  template <typename Id> FMT_CONSTEXPR void on_dynamic_width(Id arg_id) {
+    specs_.width_ref = make_arg_ref(arg_id);
+  }
+
+  template <typename Id> FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) {
+    specs_.precision_ref = make_arg_ref(arg_id);
+  }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    context_.on_error(message);
+  }
+
+ private:
+  using arg_ref_type = arg_ref<char_type>;
+
+  FMT_CONSTEXPR arg_ref_type make_arg_ref(int arg_id) {
+    context_.check_arg_id(arg_id);
+    return arg_ref_type(arg_id);
+  }
+
+  FMT_CONSTEXPR arg_ref_type make_arg_ref(auto_id) {
+    return arg_ref_type(context_.next_arg_id());
+  }
+
+  FMT_CONSTEXPR arg_ref_type make_arg_ref(basic_string_view<char_type> arg_id) {
+    context_.check_arg_id(arg_id);
+    basic_string_view<char_type> format_str(
+        context_.begin(), to_unsigned(context_.end() - context_.begin()));
+    const auto id_metadata = string_view_metadata(format_str, arg_id);
+    return arg_ref_type(id_metadata);
+  }
+
+  dynamic_format_specs<char_type>& specs_;
+  ParseContext& context_;
+};
+
+template <typename Char, typename IDHandler>
+FMT_CONSTEXPR const Char* parse_arg_id(const Char* begin, const Char* end,
+                                       IDHandler&& handler) {
+  assert(begin != end);
+  Char c = *begin;
+  if (c == '}' || c == ':') return handler(), begin;
+  if (c >= '0' && c <= '9') {
+    int index = parse_nonnegative_int(begin, end, handler);
+    if (begin == end || (*begin != '}' && *begin != ':'))
+      return handler.on_error("invalid format string"), begin;
+    handler(index);
+    return begin;
+  }
+  if (!is_name_start(c))
+    return handler.on_error("invalid format string"), begin;
+  auto it = begin;
+  do {
+    ++it;
+  } while (it != end && (is_name_start(c = *it) || ('0' <= c && c <= '9')));
+  handler(basic_string_view<Char>(begin, to_unsigned(it - begin)));
+  return it;
+}
+
+// Adapts SpecHandler to IDHandler API for dynamic width.
+template <typename SpecHandler, typename Char> struct width_adapter {
+  explicit FMT_CONSTEXPR width_adapter(SpecHandler& h) : handler(h) {}
+
+  FMT_CONSTEXPR void operator()() { handler.on_dynamic_width(auto_id()); }
+  FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_width(id); }
+  FMT_CONSTEXPR void operator()(basic_string_view<Char> id) {
+    handler.on_dynamic_width(id);
+  }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    handler.on_error(message);
+  }
+
+  SpecHandler& handler;
+};
+
+// Adapts SpecHandler to IDHandler API for dynamic precision.
+template <typename SpecHandler, typename Char> struct precision_adapter {
+  explicit FMT_CONSTEXPR precision_adapter(SpecHandler& h) : handler(h) {}
+
+  FMT_CONSTEXPR void operator()() { handler.on_dynamic_precision(auto_id()); }
+  FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_precision(id); }
+  FMT_CONSTEXPR void operator()(basic_string_view<Char> id) {
+    handler.on_dynamic_precision(id);
+  }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    handler.on_error(message);
+  }
+
+  SpecHandler& handler;
+};
+
+// Parses fill and alignment.
+template <typename Char, typename Handler>
+FMT_CONSTEXPR const Char* parse_align(const Char* begin, const Char* end,
+                                      Handler&& handler) {
+  FMT_ASSERT(begin != end, "");
+  auto align = align::none;
+  int i = 0;
+  if (begin + 1 != end) ++i;
+  do {
+    switch (static_cast<char>(begin[i])) {
+    case '<':
+      align = align::left;
+      break;
+    case '>':
+      align = align::right;
+      break;
+    case '=':
+      align = align::numeric;
+      break;
+    case '^':
+      align = align::center;
+      break;
+    }
+    if (align != align::none) {
+      if (i > 0) {
+        auto c = *begin;
+        if (c == '{')
+          return handler.on_error("invalid fill character '{'"), begin;
+        begin += 2;
+        handler.on_fill(c);
+      } else
+        ++begin;
+      handler.on_align(align);
+      break;
+    }
+  } while (i-- > 0);
+  return begin;
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR const Char* parse_width(const Char* begin, const Char* end,
+                                      Handler&& handler) {
+  FMT_ASSERT(begin != end, "");
+  if ('0' <= *begin && *begin <= '9') {
+    handler.on_width(parse_nonnegative_int(begin, end, handler));
+  } else if (*begin == '{') {
+    ++begin;
+    if (begin != end)
+      begin = parse_arg_id(begin, end, width_adapter<Handler, Char>(handler));
+    if (begin == end || *begin != '}')
+      return handler.on_error("invalid format string"), begin;
+    ++begin;
+  }
+  return begin;
+}
+
+template <typename Char, typename Handler>
+FMT_CONSTEXPR const Char* parse_precision(const Char* begin, const Char* end,
+                                          Handler&& handler) {
+  ++begin;
+  auto c = begin != end ? *begin : Char();
+  if ('0' <= c && c <= '9') {
+    handler.on_precision(parse_nonnegative_int(begin, end, handler));
+  } else if (c == '{') {
+    ++begin;
+    if (begin != end) {
+      begin =
+          parse_arg_id(begin, end, precision_adapter<Handler, Char>(handler));
+    }
+    if (begin == end || *begin++ != '}')
+      return handler.on_error("invalid format string"), begin;
+  } else {
+    return handler.on_error("missing precision specifier"), begin;
+  }
+  handler.end_precision();
+  return begin;
+}
+
+// Parses standard format specifiers and sends notifications about parsed
+// components to handler.
+template <typename Char, typename SpecHandler>
+FMT_CONSTEXPR const Char* parse_format_specs(const Char* begin, const Char* end,
+                                             SpecHandler&& handler) {
+  if (begin == end || *begin == '}') return begin;
+
+  begin = parse_align(begin, end, handler);
+  if (begin == end) return begin;
+
+  // Parse sign.
+  switch (static_cast<char>(*begin)) {
+  case '+':
+    handler.on_plus();
+    ++begin;
+    break;
+  case '-':
+    handler.on_minus();
+    ++begin;
+    break;
+  case ' ':
+    handler.on_space();
+    ++begin;
+    break;
+  }
+  if (begin == end) return begin;
+
+  if (*begin == '#') {
+    handler.on_hash();
+    if (++begin == end) return begin;
+  }
+
+  // Parse zero flag.
+  if (*begin == '0') {
+    handler.on_zero();
+    if (++begin == end) return begin;
+  }
+
+  begin = parse_width(begin, end, handler);
+  if (begin == end) return begin;
+
+  // Parse precision.
+  if (*begin == '.') {
+    begin = parse_precision(begin, end, handler);
+  }
+
+  // Parse type.
+  if (begin != end && *begin != '}') handler.on_type(*begin++);
+  return begin;
+}
+
+// Return the result via the out param to workaround gcc bug 77539.
+template <bool IS_CONSTEXPR, typename T, typename Ptr = const T*>
+FMT_CONSTEXPR bool find(Ptr first, Ptr last, T value, Ptr& out) {
+  for (out = first; out != last; ++out) {
+    if (*out == value) return true;
+  }
+  return false;
+}
+
+template <>
+inline bool find<false, char>(const char* first, const char* last, char value,
+                              const char*& out) {
+  out = static_cast<const char*>(
+      std::memchr(first, value, internal::to_unsigned(last - first)));
+  return out != nullptr;
+}
+
+template <typename Handler, typename Char> struct id_adapter {
+  FMT_CONSTEXPR void operator()() { handler.on_arg_id(); }
+  FMT_CONSTEXPR void operator()(int id) { handler.on_arg_id(id); }
+  FMT_CONSTEXPR void operator()(basic_string_view<Char> id) {
+    handler.on_arg_id(id);
+  }
+  FMT_CONSTEXPR void on_error(const char* message) {
+    handler.on_error(message);
+  }
+  Handler& handler;
+};
+
+template <bool IS_CONSTEXPR, typename Char, typename Handler>
+FMT_CONSTEXPR void parse_format_string(basic_string_view<Char> format_str,
+                                       Handler&& handler) {
+  struct pfs_writer {
+    FMT_CONSTEXPR void operator()(const Char* begin, const Char* end) {
+      if (begin == end) return;
+      for (;;) {
+        const Char* p = nullptr;
+        if (!find<IS_CONSTEXPR>(begin, end, '}', p))
+          return handler_.on_text(begin, end);
+        ++p;
+        if (p == end || *p != '}')
+          return handler_.on_error("unmatched '}' in format string");
+        handler_.on_text(begin, p);
+        begin = p + 1;
+      }
+    }
+    Handler& handler_;
+  } write{handler};
+  auto begin = format_str.data();
+  auto end = begin + format_str.size();
+  while (begin != end) {
+    // Doing two passes with memchr (one for '{' and another for '}') is up to
+    // 2.5x faster than the naive one-pass implementation on big format strings.
+    const Char* p = begin;
+    if (*begin != '{' && !find<IS_CONSTEXPR>(begin, end, '{', p))
+      return write(begin, end);
+    write(begin, p);
+    ++p;
+    if (p == end) return handler.on_error("invalid format string");
+    if (static_cast<char>(*p) == '}') {
+      handler.on_arg_id();
+      handler.on_replacement_field(p);
+    } else if (*p == '{') {
+      handler.on_text(p, p + 1);
+    } else {
+      p = parse_arg_id(p, end, id_adapter<Handler, Char>{handler});
+      Char c = p != end ? *p : Char();
+      if (c == '}') {
+        handler.on_replacement_field(p);
+      } else if (c == ':') {
+        p = handler.on_format_specs(p + 1, end);
+        if (p == end || *p != '}')
+          return handler.on_error("unknown format specifier");
+      } else {
+        return handler.on_error("missing '}' in format string");
+      }
+    }
+    begin = p + 1;
+  }
+}
+
+template <typename T, typename ParseContext>
+FMT_CONSTEXPR const typename ParseContext::char_type* parse_format_specs(
+    ParseContext& ctx) {
+  using char_type = typename ParseContext::char_type;
+  using context = buffer_context<char_type>;
+  using mapped_type =
+      conditional_t<internal::mapped_type_constant<T, context>::value !=
+                        internal::custom_type,
+                    decltype(arg_mapper<context>().map(std::declval<T>())), T>;
+  conditional_t<has_formatter<mapped_type, context>::value,
+                formatter<mapped_type, char_type>,
+                internal::fallback_formatter<T, char_type>>
+      f;
+  return f.parse(ctx);
+}
+
+template <typename Char, typename ErrorHandler, typename... Args>
+class format_string_checker {
+ public:
+  explicit FMT_CONSTEXPR format_string_checker(
+      basic_string_view<Char> format_str, ErrorHandler eh)
+      : arg_id_((std::numeric_limits<unsigned>::max)()),
+        context_(format_str, eh),
+        parse_funcs_{&parse_format_specs<Args, parse_context_type>...} {}
+
+  FMT_CONSTEXPR void on_text(const Char*, const Char*) {}
+
+  FMT_CONSTEXPR void on_arg_id() {
+    arg_id_ = context_.next_arg_id();
+    check_arg_id();
+  }
+  FMT_CONSTEXPR void on_arg_id(int id) {
+    arg_id_ = id;
+    context_.check_arg_id(id);
+    check_arg_id();
+  }
+  FMT_CONSTEXPR void on_arg_id(basic_string_view<Char>) {
+    on_error("compile-time checks don't support named arguments");
+  }
+
+  FMT_CONSTEXPR void on_replacement_field(const Char*) {}
+
+  FMT_CONSTEXPR const Char* on_format_specs(const Char* begin, const Char*) {
+    advance_to(context_, begin);
+    return arg_id_ < num_args ? parse_funcs_[arg_id_](context_) : begin;
+  }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    context_.on_error(message);
+  }
+
+ private:
+  using parse_context_type = basic_parse_context<Char, ErrorHandler>;
+  enum { num_args = sizeof...(Args) };
+
+  FMT_CONSTEXPR void check_arg_id() {
+    if (arg_id_ >= num_args) context_.on_error("argument index out of range");
+  }
+
+  // Format specifier parsing function.
+  using parse_func = const Char* (*)(parse_context_type&);
+
+  unsigned arg_id_;
+  parse_context_type context_;
+  parse_func parse_funcs_[num_args > 0 ? num_args : 1];
+};
+
+template <typename Char, typename ErrorHandler, typename... Args>
+FMT_CONSTEXPR bool do_check_format_string(basic_string_view<Char> s,
+                                          ErrorHandler eh = ErrorHandler()) {
+  format_string_checker<Char, ErrorHandler, Args...> checker(s, eh);
+  parse_format_string<true>(s, checker);
+  return true;
+}
+
+template <typename... Args, typename S,
+          enable_if_t<(is_compile_string<S>::value), int>>
+void check_format_string(S format_str) {
+  FMT_CONSTEXPR_DECL bool invalid_format =
+      internal::do_check_format_string<typename S::char_type,
+                                       internal::error_handler, Args...>(
+          to_string_view(format_str));
+  (void)invalid_format;
+}
+
+template <template <typename> class Handler, typename Spec, typename Context>
+void handle_dynamic_spec(Spec& value, arg_ref<typename Context::char_type> ref,
+                         Context& ctx,
+                         const typename Context::char_type* format_str) {
+  switch (ref.kind) {
+  case arg_id_kind::none:
+    break;
+  case arg_id_kind::index:
+    internal::set_dynamic_spec<Handler>(value, ctx.arg(ref.val.index),
+                                        ctx.error_handler());
+    break;
+  case arg_id_kind::name: {
+    const auto arg_id = ref.val.name.to_view(format_str);
+    internal::set_dynamic_spec<Handler>(value, ctx.arg(arg_id),
+                                        ctx.error_handler());
+    break;
+  }
+  }
+}
+}  // namespace internal
+
+template <typename Range>
+using basic_writer FMT_DEPRECATED_ALIAS = internal::basic_writer<Range>;
+using writer FMT_DEPRECATED_ALIAS = internal::writer;
+using wwriter FMT_DEPRECATED_ALIAS =
+    internal::basic_writer<internal::buffer_range<wchar_t>>;
+
+/** The default argument formatter. */
+template <typename Range>
+class arg_formatter : public internal::arg_formatter_base<Range> {
+ private:
+  using char_type = typename Range::value_type;
+  using base = internal::arg_formatter_base<Range>;
+  using context_type = basic_format_context<typename base::iterator, char_type>;
+
+  context_type& ctx_;
+  basic_parse_context<char_type>* parse_ctx_;
+
+ public:
+  using range = Range;
+  using iterator = typename base::iterator;
+  using format_specs = typename base::format_specs;
+
+  /**
+    \rst
+    Constructs an argument formatter object.
+    *ctx* is a reference to the formatting context,
+    *specs* contains format specifier information for standard argument types.
+    \endrst
+   */
+  explicit arg_formatter(context_type& ctx,
+                         basic_parse_context<char_type>* parse_ctx = nullptr,
+                         format_specs* specs = nullptr)
+      : base(Range(ctx.out()), specs, ctx.locale()),
+        ctx_(ctx),
+        parse_ctx_(parse_ctx) {}
+
+  using base::operator();
+
+  /** Formats an argument of a user-defined type. */
+  iterator operator()(typename basic_format_arg<context_type>::handle handle) {
+    handle.format(*parse_ctx_, ctx_);
+    return this->out();
+  }
+};
+
+/**
+ An error returned by an operating system or a language runtime,
+ for example a file opening error.
+*/
+class FMT_API system_error : public std::runtime_error {
+ private:
+  void init(int err_code, string_view format_str, format_args args);
+
+ protected:
+  int error_code_;
+
+  system_error() : std::runtime_error(""), error_code_(0) {}
+
+ public:
+  /**
+   \rst
+   Constructs a :class:`fmt::system_error` object with a description
+   formatted with `fmt::format_system_error`. *message* and additional
+   arguments passed into the constructor are formatted similarly to
+   `fmt::format`.
+
+   **Example**::
+
+     // This throws a system_error with the description
+     //   cannot open file 'madeup': No such file or directory
+     // or similar (system message may vary).
+     const char *filename = "madeup";
+     std::FILE *file = std::fopen(filename, "r");
+     if (!file)
+       throw fmt::system_error(errno, "cannot open file '{}'", filename);
+   \endrst
+  */
+  template <typename... Args>
+  system_error(int error_code, string_view message, const Args&... args)
+      : std::runtime_error("") {
+    init(error_code, message, make_format_args(args...));
+  }
+  ~system_error() FMT_NOEXCEPT;
+
+  int error_code() const { return error_code_; }
+};
+
+/**
+  \rst
+  Formats an error returned by an operating system or a language runtime,
+  for example a file opening error, and writes it to *out* in the following
+  form:
+
+  .. parsed-literal::
+     *<message>*: *<system-message>*
+
+  where *<message>* is the passed message and *<system-message>* is
+  the system message corresponding to the error code.
+  *error_code* is a system error code as given by ``errno``.
+  If *error_code* is not a valid error code such as -1, the system message
+  may look like "Unknown error -1" and is platform-dependent.
+  \endrst
+ */
+FMT_API void format_system_error(internal::buffer<char>& out, int error_code,
+                                 fmt::string_view message) FMT_NOEXCEPT;
+
+struct float_spec_handler {
+  char type;
+  bool upper;
+  bool fixed;
+  bool as_percentage;
+  bool use_locale;
+
+  explicit float_spec_handler(char t)
+      : type(t),
+        upper(false),
+        fixed(false),
+        as_percentage(false),
+        use_locale(false) {}
+
+  void on_general() {
+    if (type == 'G') upper = true;
+  }
+
+  void on_exp() {
+    if (type == 'E') upper = true;
+  }
+
+  void on_fixed() {
+    fixed = true;
+    if (type == 'F') upper = true;
+  }
+
+  void on_percent() {
+    fixed = true;
+    as_percentage = true;
+  }
+
+  void on_hex() {
+    if (type == 'A') upper = true;
+  }
+
+  void on_num() { use_locale = true; }
+
+  FMT_NORETURN void on_error() {
+    FMT_THROW(format_error("invalid type specifier"));
+  }
+};
+
+template <typename Range>
+template <typename T, bool USE_GRISU>
+void internal::basic_writer<Range>::write_double(T value,
+                                                 const format_specs& specs) {
+  // Check type.
+  float_spec_handler handler(static_cast<char>(specs.type));
+  internal::handle_float_type_spec(handler.type, handler);
+
+  char sign = 0;
+  // Use signbit instead of value < 0 since the latter is always false for NaN.
+  if (std::signbit(value)) {
+    sign = '-';
+    value = -value;
+  } else if (specs.sign != sign::none) {
+    if (specs.sign == sign::plus)
+      sign = '+';
+    else if (specs.sign == sign::space)
+      sign = ' ';
+  }
+
+  if (!std::isfinite(value)) {
+    // Format infinity and NaN ourselves because sprintf's output is not
+    // consistent across platforms.
+    const char* str = std::isinf(value) ? (handler.upper ? "INF" : "inf")
+                                        : (handler.upper ? "NAN" : "nan");
+    return write_padded(specs,
+                        inf_or_nan_writer{sign, handler.as_percentage, str});
+  }
+
+  if (handler.as_percentage) value *= 100;
+
+  memory_buffer buffer;
+  int exp = 0;
+  int precision = specs.precision >= 0 || !specs.type ? specs.precision : 6;
+  unsigned options = handler.fixed ? internal::grisu_options::fixed : 0;
+  bool use_grisu = USE_GRISU &&
+                   (specs.type != 'a' && specs.type != 'A' &&
+                    specs.type != 'e' && specs.type != 'E') &&
+                   internal::grisu_format(static_cast<double>(value), buffer,
+                                          precision, options, exp);
+  char* decimal_point_pos = nullptr;
+  if (!use_grisu)
+    decimal_point_pos = internal::sprintf_format(value, buffer, specs);
+
+  if (handler.as_percentage) {
+    buffer.push_back('%');
+    --exp;  // Adjust decimal place position.
+  }
+  format_specs as = specs;
+  if (specs.align == align::numeric) {
+    if (sign) {
+      auto&& it = reserve(1);
+      *it++ = static_cast<char_type>(sign);
+      sign = 0;
+      if (as.width) --as.width;
+    }
+    as.align = align::right;
+  } else if (specs.align == align::none) {
+    as.align = align::right;
+  }
+  char_type decimal_point = handler.use_locale
+                                ? internal::decimal_point<char_type>(locale_)
+                                : static_cast<char_type>('.');
+  if (use_grisu) {
+    auto params = internal::gen_digits_params();
+    params.fixed = handler.fixed;
+    params.num_digits = precision;
+    params.trailing_zeros =
+        (precision != 0 && (handler.fixed || !specs.type)) || specs.alt;
+    write_padded(as, grisu_writer(sign, buffer, exp, params, decimal_point));
+  } else {
+    write_padded(as,
+                 double_writer{sign, buffer, decimal_point_pos, decimal_point});
+  }
+}
+
+// Reports a system error without throwing an exception.
+// Can be used to report errors from destructors.
+FMT_API void report_system_error(int error_code,
+                                 string_view message) FMT_NOEXCEPT;
+
+#if FMT_USE_WINDOWS_H
+
+/** A Windows error. */
+class windows_error : public system_error {
+ private:
+  FMT_API void init(int error_code, string_view format_str, format_args args);
+
+ public:
+  /**
+   \rst
+   Constructs a :class:`fmt::windows_error` object with the description
+   of the form
+
+   .. parsed-literal::
+     *<message>*: *<system-message>*
+
+   where *<message>* is the formatted message and *<system-message>* is the
+   system message corresponding to the error code.
+   *error_code* is a Windows error code as given by ``GetLastError``.
+   If *error_code* is not a valid error code such as -1, the system message
+   will look like "error -1".
+
+   **Example**::
+
+     // This throws a windows_error with the description
+     //   cannot open file 'madeup': The system cannot find the file specified.
+     // or similar (system message may vary).
+     const char *filename = "madeup";
+     LPOFSTRUCT of = LPOFSTRUCT();
+     HFILE file = OpenFile(filename, &of, OF_READ);
+     if (file == HFILE_ERROR) {
+       throw fmt::windows_error(GetLastError(),
+                                "cannot open file '{}'", filename);
+     }
+   \endrst
+  */
+  template <typename... Args>
+  windows_error(int error_code, string_view message, const Args&... args) {
+    init(error_code, message, make_format_args(args...));
+  }
+};
+
+// Reports a Windows error without throwing an exception.
+// Can be used to report errors from destructors.
+FMT_API void report_windows_error(int error_code,
+                                  string_view message) FMT_NOEXCEPT;
+
+#endif
+
+/** Fast integer formatter. */
+class format_int {
+ private:
+  // Buffer should be large enough to hold all digits (digits10 + 1),
+  // a sign and a null character.
+  enum { buffer_size = std::numeric_limits<unsigned long long>::digits10 + 3 };
+  mutable char buffer_[buffer_size];
+  char* str_;
+
+  // Formats value in reverse and returns a pointer to the beginning.
+  char* format_decimal(unsigned long long value) {
+    char* ptr = buffer_ + (buffer_size - 1);  // Parens to workaround MSVC bug.
+    while (value >= 100) {
+      // Integer division is slow so do it for a group of two digits instead
+      // of for every digit. The idea comes from the talk by Alexandrescu
+      // "Three Optimization Tips for C++". See speed-test for a comparison.
+      unsigned index = static_cast<unsigned>((value % 100) * 2);
+      value /= 100;
+      *--ptr = internal::data::digits[index + 1];
+      *--ptr = internal::data::digits[index];
+    }
+    if (value < 10) {
+      *--ptr = static_cast<char>('0' + value);
+      return ptr;
+    }
+    unsigned index = static_cast<unsigned>(value * 2);
+    *--ptr = internal::data::digits[index + 1];
+    *--ptr = internal::data::digits[index];
+    return ptr;
+  }
+
+  void format_signed(long long value) {
+    unsigned long long abs_value = static_cast<unsigned long long>(value);
+    bool negative = value < 0;
+    if (negative) abs_value = 0 - abs_value;
+    str_ = format_decimal(abs_value);
+    if (negative) *--str_ = '-';
+  }
+
+ public:
+  explicit format_int(int value) { format_signed(value); }
+  explicit format_int(long value) { format_signed(value); }
+  explicit format_int(long long value) { format_signed(value); }
+  explicit format_int(unsigned value) : str_(format_decimal(value)) {}
+  explicit format_int(unsigned long value) : str_(format_decimal(value)) {}
+  explicit format_int(unsigned long long value) : str_(format_decimal(value)) {}
+
+  /** Returns the number of characters written to the output buffer. */
+  std::size_t size() const {
+    return internal::to_unsigned(buffer_ - str_ + buffer_size - 1);
+  }
+
+  /**
+    Returns a pointer to the output buffer content. No terminating null
+    character is appended.
+   */
+  const char* data() const { return str_; }
+
+  /**
+    Returns a pointer to the output buffer content with terminating null
+    character appended.
+   */
+  const char* c_str() const {
+    buffer_[buffer_size - 1] = '\0';
+    return str_;
+  }
+
+  /**
+    \rst
+    Returns the content of the output buffer as an ``std::string``.
+    \endrst
+   */
+  std::string str() const { return std::string(str_, size()); }
+};
+
+// A formatter specialization for the core types corresponding to internal::type
+// constants.
+template <typename T, typename Char>
+struct formatter<T, Char,
+                 enable_if_t<internal::type_constant<T, Char>::value !=
+                             internal::custom_type>> {
+  FMT_CONSTEXPR formatter() : format_str_(nullptr) {}
+
+  // Parses format specifiers stopping either at the end of the range or at the
+  // terminating '}'.
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    format_str_ = ctx.begin();
+    using handler_type = internal::dynamic_specs_handler<ParseContext>;
+    auto type = internal::type_constant<T, Char>::value;
+    internal::specs_checker<handler_type> handler(handler_type(specs_, ctx),
+                                                  type);
+    auto it = parse_format_specs(ctx.begin(), ctx.end(), handler);
+    auto eh = ctx.error_handler();
+    switch (type) {
+    case internal::none_type:
+    case internal::named_arg_type:
+      FMT_ASSERT(false, "invalid argument type");
+      break;
+    case internal::int_type:
+    case internal::uint_type:
+    case internal::long_long_type:
+    case internal::ulong_long_type:
+    case internal::bool_type:
+      handle_int_type_spec(specs_.type,
+                           internal::int_type_checker<decltype(eh)>(eh));
+      break;
+    case internal::char_type:
+      handle_char_specs(
+          &specs_, internal::char_specs_checker<decltype(eh)>(specs_.type, eh));
+      break;
+    case internal::double_type:
+    case internal::long_double_type:
+      handle_float_type_spec(specs_.type,
+                             internal::float_type_checker<decltype(eh)>(eh));
+      break;
+    case internal::cstring_type:
+      internal::handle_cstring_type_spec(
+          specs_.type, internal::cstring_type_checker<decltype(eh)>(eh));
+      break;
+    case internal::string_type:
+      internal::check_string_type_spec(specs_.type, eh);
+      break;
+    case internal::pointer_type:
+      internal::check_pointer_type_spec(specs_.type, eh);
+      break;
+    case internal::custom_type:
+      // Custom format specifiers should be checked in parse functions of
+      // formatter specializations.
+      break;
+    }
+    return it;
+  }
+
+  template <typename FormatContext>
+  auto format(const T& val, FormatContext& ctx) -> decltype(ctx.out()) {
+    internal::handle_dynamic_spec<internal::width_checker>(
+        specs_.width, specs_.width_ref, ctx, format_str_);
+    internal::handle_dynamic_spec<internal::precision_checker>(
+        specs_.precision, specs_.precision_ref, ctx, format_str_);
+    using range_type =
+        internal::output_range<typename FormatContext::iterator,
+                               typename FormatContext::char_type>;
+    return visit_format_arg(arg_formatter<range_type>(ctx, nullptr, &specs_),
+                            internal::make_arg<FormatContext>(val));
+  }
+
+ private:
+  internal::dynamic_format_specs<Char> specs_;
+  const Char* format_str_;
+};
+
+#define FMT_FORMAT_AS(Type, Base)                                             \
+  template <typename Char>                                                    \
+  struct formatter<Type, Char> : formatter<Base, Char> {                      \
+    template <typename FormatContext>                                         \
+    auto format(const Type& val, FormatContext& ctx) -> decltype(ctx.out()) { \
+      return formatter<Base, Char>::format(val, ctx);                         \
+    }                                                                         \
+  }
+
+FMT_FORMAT_AS(signed char, int);
+FMT_FORMAT_AS(unsigned char, unsigned);
+FMT_FORMAT_AS(short, int);
+FMT_FORMAT_AS(unsigned short, unsigned);
+FMT_FORMAT_AS(long, long long);
+FMT_FORMAT_AS(unsigned long, unsigned long long);
+FMT_FORMAT_AS(float, double);
+FMT_FORMAT_AS(Char*, const Char*);
+FMT_FORMAT_AS(std::basic_string<Char>, basic_string_view<Char>);
+FMT_FORMAT_AS(std::nullptr_t, const void*);
+FMT_FORMAT_AS(internal::std_string_view<Char>, basic_string_view<Char>);
+
+template <typename Char>
+struct formatter<void*, Char> : formatter<const void*, Char> {
+  template <typename FormatContext>
+  auto format(void* val, FormatContext& ctx) -> decltype(ctx.out()) {
+    return formatter<const void*, Char>::format(val, ctx);
+  }
+};
+
+template <typename Char, size_t N>
+struct formatter<Char[N], Char> : formatter<basic_string_view<Char>, Char> {
+  template <typename FormatContext>
+  auto format(const Char* val, FormatContext& ctx) -> decltype(ctx.out()) {
+    return formatter<basic_string_view<Char>, Char>::format(val, ctx);
+  }
+};
+
+// A formatter for types known only at run time such as variant alternatives.
+//
+// Usage:
+//   using variant = std::variant<int, std::string>;
+//   template <>
+//   struct formatter<variant>: dynamic_formatter<> {
+//     void format(buffer &buf, const variant &v, context &ctx) {
+//       visit([&](const auto &val) { format(buf, val, ctx); }, v);
+//     }
+//   };
+template <typename Char = char> class dynamic_formatter {
+ private:
+  struct null_handler : internal::error_handler {
+    void on_align(align_t) {}
+    void on_plus() {}
+    void on_minus() {}
+    void on_space() {}
+    void on_hash() {}
+  };
+
+ public:
+  template <typename ParseContext>
+  auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    format_str_ = ctx.begin();
+    // Checks are deferred to formatting time when the argument type is known.
+    internal::dynamic_specs_handler<ParseContext> handler(specs_, ctx);
+    return parse_format_specs(ctx.begin(), ctx.end(), handler);
+  }
+
+  template <typename T, typename FormatContext>
+  auto format(const T& val, FormatContext& ctx) -> decltype(ctx.out()) {
+    handle_specs(ctx);
+    internal::specs_checker<null_handler> checker(
+        null_handler(),
+        internal::mapped_type_constant<T, FormatContext>::value);
+    checker.on_align(specs_.align);
+    switch (specs_.sign) {
+    case sign::none:
+      break;
+    case sign::plus:
+      checker.on_plus();
+      break;
+    case sign::minus:
+      checker.on_minus();
+      break;
+    case sign::space:
+      checker.on_space();
+      break;
+    }
+    if (specs_.alt) checker.on_hash();
+    if (specs_.precision >= 0) checker.end_precision();
+    using range = internal::output_range<typename FormatContext::iterator,
+                                         typename FormatContext::char_type>;
+    visit_format_arg(arg_formatter<range>(ctx, nullptr, &specs_),
+                     internal::make_arg<FormatContext>(val));
+    return ctx.out();
+  }
+
+ private:
+  template <typename Context> void handle_specs(Context& ctx) {
+    internal::handle_dynamic_spec<internal::width_checker>(
+        specs_.width, specs_.width_ref, ctx, format_str_);
+    internal::handle_dynamic_spec<internal::precision_checker>(
+        specs_.precision, specs_.precision_ref, ctx, format_str_);
+  }
+
+  internal::dynamic_format_specs<Char> specs_;
+  const Char* format_str_;
+};
+
+template <typename Range, typename Char>
+typename basic_format_context<Range, Char>::format_arg
+basic_format_context<Range, Char>::arg(basic_string_view<char_type> name) {
+  map_.init(args_);
+  format_arg arg = map_.find(name);
+  if (arg.type() == internal::none_type) this->on_error("argument not found");
+  return arg;
+}
+
+template <typename Char, typename ErrorHandler>
+FMT_CONSTEXPR void advance_to(basic_parse_context<Char, ErrorHandler>& ctx,
+                              const Char* p) {
+  ctx.advance_to(ctx.begin() + (p - &*ctx.begin()));
+}
+
+template <typename ArgFormatter, typename Char, typename Context>
+struct format_handler : internal::error_handler {
+  using range = typename ArgFormatter::range;
+
+  format_handler(range r, basic_string_view<Char> str,
+                 basic_format_args<Context> format_args,
+                 internal::locale_ref loc)
+      : parse_context(str), context(r.begin(), format_args, loc) {}
+
+  void on_text(const Char* begin, const Char* end) {
+    auto size = internal::to_unsigned(end - begin);
+    auto out = context.out();
+    auto&& it = internal::reserve(out, size);
+    it = std::copy_n(begin, size, it);
+    context.advance_to(out);
+  }
+
+  void get_arg(int id) { arg = internal::get_arg(context, id); }
+
+  void on_arg_id() { get_arg(parse_context.next_arg_id()); }
+  void on_arg_id(int id) {
+    parse_context.check_arg_id(id);
+    get_arg(id);
+  }
+  void on_arg_id(basic_string_view<Char> id) { arg = context.arg(id); }
+
+  void on_replacement_field(const Char* p) {
+    advance_to(parse_context, p);
+    internal::custom_formatter<Context> f(parse_context, context);
+    if (!visit_format_arg(f, arg))
+      context.advance_to(
+          visit_format_arg(ArgFormatter(context, &parse_context), arg));
+  }
+
+  const Char* on_format_specs(const Char* begin, const Char* end) {
+    advance_to(parse_context, begin);
+    internal::custom_formatter<Context> f(parse_context, context);
+    if (visit_format_arg(f, arg)) return parse_context.begin();
+    basic_format_specs<Char> specs;
+    using internal::specs_handler;
+    using parse_context_t = basic_parse_context<Char>;
+    internal::specs_checker<specs_handler<parse_context_t, Context>> handler(
+        specs_handler<parse_context_t, Context>(specs, parse_context, context),
+        arg.type());
+    begin = parse_format_specs(begin, end, handler);
+    if (begin == end || *begin != '}') on_error("missing '}' in format string");
+    advance_to(parse_context, begin);
+    context.advance_to(
+        visit_format_arg(ArgFormatter(context, &parse_context, &specs), arg));
+    return begin;
+  }
+
+  basic_parse_context<Char> parse_context;
+  Context context;
+  basic_format_arg<Context> arg;
+};
+
+/** Formats arguments and writes the output to the range. */
+template <typename ArgFormatter, typename Char, typename Context>
+typename Context::iterator vformat_to(
+    typename ArgFormatter::range out, basic_string_view<Char> format_str,
+    basic_format_args<Context> args,
+    internal::locale_ref loc = internal::locale_ref()) {
+  format_handler<ArgFormatter, Char, Context> h(out, format_str, args, loc);
+  internal::parse_format_string<false>(format_str, h);
+  return h.context.out();
+}
+
+// Casts ``p`` to ``const void*`` for pointer formatting.
+// Example:
+//   auto s = format("{}", ptr(p));
+template <typename T> inline const void* ptr(const T* p) { return p; }
+template <typename T> inline const void* ptr(const std::unique_ptr<T>& p) {
+  return p.get();
+}
+template <typename T> inline const void* ptr(const std::shared_ptr<T>& p) {
+  return p.get();
+}
+
+template <typename It, typename Char> struct arg_join : internal::view {
+  It begin;
+  It end;
+  basic_string_view<Char> sep;
+
+  arg_join(It b, It e, basic_string_view<Char> s) : begin(b), end(e), sep(s) {}
+};
+
+template <typename It, typename Char>
+struct formatter<arg_join<It, Char>, Char>
+    : formatter<typename std::iterator_traits<It>::value_type, Char> {
+  template <typename FormatContext>
+  auto format(const arg_join<It, Char>& value, FormatContext& ctx)
+      -> decltype(ctx.out()) {
+    using base = formatter<typename std::iterator_traits<It>::value_type, Char>;
+    auto it = value.begin;
+    auto out = ctx.out();
+    if (it != value.end) {
+      out = base::format(*it++, ctx);
+      while (it != value.end) {
+        out = std::copy(value.sep.begin(), value.sep.end(), out);
+        ctx.advance_to(out);
+        out = base::format(*it++, ctx);
+      }
+    }
+    return out;
+  }
+};
+
+/**
+  Returns an object that formats the iterator range `[begin, end)` with elements
+  separated by `sep`.
+ */
+template <typename It>
+arg_join<It, char> join(It begin, It end, string_view sep) {
+  return {begin, end, sep};
+}
+
+template <typename It>
+arg_join<It, wchar_t> join(It begin, It end, wstring_view sep) {
+  return {begin, end, sep};
+}
+
+/**
+  \rst
+  Returns an object that formats `range` with elements separated by `sep`.
+
+  **Example**::
+
+    std::vector<int> v = {1, 2, 3};
+    fmt::print("{}", fmt::join(v, ", "));
+    // Output: "1, 2, 3"
+  \endrst
+ */
+template <typename Range>
+arg_join<internal::iterator_t<const Range>, char> join(const Range& range,
+                                                       string_view sep) {
+  return join(std::begin(range), std::end(range), sep);
+}
+
+template <typename Range>
+arg_join<internal::iterator_t<const Range>, wchar_t> join(const Range& range,
+                                                          wstring_view sep) {
+  return join(std::begin(range), std::end(range), sep);
+}
+
+/**
+  \rst
+  Converts *value* to ``std::string`` using the default format for type *T*.
+  It doesn't support user-defined types with custom formatters.
+
+  **Example**::
+
+    #include <fmt/format.h>
+
+    std::string answer = fmt::to_string(42);
+  \endrst
+ */
+template <typename T> inline std::string to_string(const T& value) {
+  return format("{}", value);
+}
+
+/**
+  Converts *value* to ``std::wstring`` using the default format for type *T*.
+ */
+template <typename T> inline std::wstring to_wstring(const T& value) {
+  return format(L"{}", value);
+}
+
+template <typename Char, std::size_t SIZE>
+std::basic_string<Char> to_string(const basic_memory_buffer<Char, SIZE>& buf) {
+  return std::basic_string<Char>(buf.data(), buf.size());
+}
+
+template <typename Char>
+typename buffer_context<Char>::iterator internal::vformat_to(
+    internal::buffer<Char>& buf, basic_string_view<Char> format_str,
+    basic_format_args<buffer_context<Char>> args) {
+  using range = buffer_range<Char>;
+  return vformat_to<arg_formatter<range>>(buf, to_string_view(format_str),
+                                          args);
+}
+
+template <typename S, typename Char = char_t<S>,
+          FMT_ENABLE_IF(internal::is_string<S>::value)>
+inline typename buffer_context<Char>::iterator vformat_to(
+    internal::buffer<Char>& buf, const S& format_str,
+    basic_format_args<buffer_context<Char>> args) {
+  return internal::vformat_to(buf, to_string_view(format_str), args);
+}
+
+template <typename S, typename... Args, std::size_t SIZE = inline_buffer_size,
+          typename Char = enable_if_t<internal::is_string<S>::value, char_t<S>>>
+inline typename buffer_context<Char>::iterator format_to(
+    basic_memory_buffer<Char, SIZE>& buf, const S& format_str, Args&&... args) {
+  internal::check_format_string<Args...>(format_str);
+  using context = buffer_context<Char>;
+  return internal::vformat_to(buf, to_string_view(format_str),
+                              {make_format_args<context>(args...)});
+}
+
+template <typename OutputIt, typename Char = char>
+using format_context_t = basic_format_context<OutputIt, Char>;
+
+template <typename OutputIt, typename Char = char>
+using format_args_t = basic_format_args<format_context_t<OutputIt, Char>>;
+
+template <typename S, typename OutputIt, typename... Args,
+          FMT_ENABLE_IF(
+              internal::is_output_iterator<OutputIt>::value &&
+              !internal::is_contiguous_back_insert_iterator<OutputIt>::value)>
+inline OutputIt vformat_to(OutputIt out, const S& format_str,
+                           format_args_t<OutputIt, char_t<S>> args) {
+  using range = internal::output_range<OutputIt, char_t<S>>;
+  return vformat_to<arg_formatter<range>>(range(out),
+                                          to_string_view(format_str), args);
+}
+
+/**
+ \rst
+ Formats arguments, writes the result to the output iterator ``out`` and returns
+ the iterator past the end of the output range.
+
+ **Example**::
+
+   std::vector<char> out;
+   fmt::format_to(std::back_inserter(out), "{}", 42);
+ \endrst
+ */
+template <typename OutputIt, typename S, typename... Args,
+          FMT_ENABLE_IF(
+              internal::is_output_iterator<OutputIt>::value &&
+              !internal::is_contiguous_back_insert_iterator<OutputIt>::value &&
+              internal::is_string<S>::value)>
+inline OutputIt format_to(OutputIt out, const S& format_str, Args&&... args) {
+  internal::check_format_string<Args...>(format_str);
+  using context = format_context_t<OutputIt, char_t<S>>;
+  return vformat_to(out, to_string_view(format_str),
+                    {make_format_args<context>(args...)});
+}
+
+template <typename OutputIt> struct format_to_n_result {
+  /** Iterator past the end of the output range. */
+  OutputIt out;
+  /** Total (not truncated) output size. */
+  std::size_t size;
+};
+
+template <typename OutputIt, typename Char = typename OutputIt::value_type>
+using format_to_n_context =
+    format_context_t<fmt::internal::truncating_iterator<OutputIt>, Char>;
+
+template <typename OutputIt, typename Char = typename OutputIt::value_type>
+using format_to_n_args = basic_format_args<format_to_n_context<OutputIt, Char>>;
+
+template <typename OutputIt, typename Char, typename... Args>
+inline format_arg_store<format_to_n_context<OutputIt, Char>, Args...>
+make_format_to_n_args(const Args&... args) {
+  return format_arg_store<format_to_n_context<OutputIt, Char>, Args...>(
+      args...);
+}
+
+template <typename OutputIt, typename Char, typename... Args,
+          FMT_ENABLE_IF(internal::is_output_iterator<OutputIt>::value)>
+inline format_to_n_result<OutputIt> vformat_to_n(
+    OutputIt out, std::size_t n, basic_string_view<Char> format_str,
+    format_to_n_args<OutputIt, Char> args) {
+  auto it = vformat_to(internal::truncating_iterator<OutputIt>(out, n),
+                       format_str, args);
+  return {it.base(), it.count()};
+}
+
+/**
+ \rst
+ Formats arguments, writes up to ``n`` characters of the result to the output
+ iterator ``out`` and returns the total output size and the iterator past the
+ end of the output range.
+ \endrst
+ */
+template <typename OutputIt, typename S, typename... Args,
+          FMT_ENABLE_IF(internal::is_string<S>::value&&
+                            internal::is_output_iterator<OutputIt>::value)>
+inline format_to_n_result<OutputIt> format_to_n(OutputIt out, std::size_t n,
+                                                const S& format_str,
+                                                const Args&... args) {
+  internal::check_format_string<Args...>(format_str);
+  using context = format_to_n_context<OutputIt, char_t<S>>;
+  return vformat_to_n(out, n, to_string_view(format_str),
+                      {make_format_args<context>(args...)});
+}
+
+template <typename Char>
+inline std::basic_string<Char> internal::vformat(
+    basic_string_view<Char> format_str,
+    basic_format_args<buffer_context<Char>> args) {
+  basic_memory_buffer<Char> buffer;
+  internal::vformat_to(buffer, format_str, args);
+  return fmt::to_string(buffer);
+}
+
+/**
+  Returns the number of characters in the output of
+  ``format(format_str, args...)``.
+ */
+template <typename... Args>
+inline std::size_t formatted_size(string_view format_str, const Args&... args) {
+  auto it = format_to(internal::counting_iterator<char>(), format_str, args...);
+  return it.count();
+}
+
+#if FMT_USE_USER_DEFINED_LITERALS
+namespace internal {
+
+#  if FMT_USE_UDL_TEMPLATE
+template <typename Char, Char... CHARS> class udl_formatter {
+ public:
+  template <typename... Args>
+  std::basic_string<Char> operator()(Args&&... args) const {
+    FMT_CONSTEXPR_DECL Char s[] = {CHARS..., '\0'};
+    FMT_CONSTEXPR_DECL bool invalid_format =
+        do_check_format_string<Char, error_handler, Args...>(
+            basic_string_view<Char>(s, sizeof...(CHARS)));
+    (void)invalid_format;
+    return format(s, std::forward<Args>(args)...);
+  }
+};
+#  else
+template <typename Char> struct udl_formatter {
+  basic_string_view<Char> str;
+
+  template <typename... Args>
+  std::basic_string<Char> operator()(Args&&... args) const {
+    return format(str, std::forward<Args>(args)...);
+  }
+};
+#  endif  // FMT_USE_UDL_TEMPLATE
+
+template <typename Char> struct udl_arg {
+  basic_string_view<Char> str;
+
+  template <typename T> named_arg<T, Char> operator=(T&& value) const {
+    return {str, std::forward<T>(value)};
+  }
+};
+
+}  // namespace internal
+
+inline namespace literals {
+#  if FMT_USE_UDL_TEMPLATE
+#    pragma GCC diagnostic push
+#    if FMT_CLANG_VERSION
+#      pragma GCC diagnostic ignored "-Wgnu-string-literal-operator-template"
+#    endif
+template <typename Char, Char... CHARS>
+FMT_CONSTEXPR internal::udl_formatter<Char, CHARS...> operator""_format() {
+  return {};
+}
+#    pragma GCC diagnostic pop
+#  else
+/**
+  \rst
+  User-defined literal equivalent of :func:`fmt::format`.
+
+  **Example**::
+
+    using namespace fmt::literals;
+    std::string message = "The answer is {}"_format(42);
+  \endrst
+ */
+FMT_CONSTEXPR internal::udl_formatter<char> operator"" _format(const char* s,
+                                                               std::size_t n) {
+  return {{s, n}};
+}
+FMT_CONSTEXPR internal::udl_formatter<wchar_t> operator"" _format(
+    const wchar_t* s, std::size_t n) {
+  return {{s, n}};
+}
+#  endif  // FMT_USE_UDL_TEMPLATE
+
+/**
+  \rst
+  User-defined literal equivalent of :func:`fmt::arg`.
+
+  **Example**::
+
+    using namespace fmt::literals;
+    fmt::print("Elapsed time: {s:.2f} seconds", "s"_a=1.23);
+  \endrst
+ */
+FMT_CONSTEXPR internal::udl_arg<char> operator"" _a(const char* s,
+                                                    std::size_t n) {
+  return {{s, n}};
+}
+FMT_CONSTEXPR internal::udl_arg<wchar_t> operator"" _a(const wchar_t* s,
+                                                       std::size_t n) {
+  return {{s, n}};
+}
+}  // namespace literals
+#endif  // FMT_USE_USER_DEFINED_LITERALS
+FMT_END_NAMESPACE
+
+/**
+  \rst
+  Constructs a compile-time format string.
+
+  **Example**::
+
+    // A compile-time error because 'd' is an invalid specifier for strings.
+    std::string s = format(FMT_STRING("{:d}"), "foo");
+  \endrst
+ */
+#define FMT_STRING(s)                                                    \
+  [] {                                                                   \
+    struct str : fmt::compile_string {                                   \
+      using char_type = typename std::remove_cv<std::remove_pointer<     \
+          typename std::decay<decltype(s)>::type>::type>::type;          \
+      FMT_CONSTEXPR operator fmt::basic_string_view<char_type>() const { \
+        return {s, sizeof(s) / sizeof(char_type) - 1};                   \
+      }                                                                  \
+    } result;                                                            \
+    /* Suppress Qt Creator warning about unused operator. */             \
+    (void)static_cast<fmt::basic_string_view<typename str::char_type>>(  \
+        result);                                                         \
+    return result;                                                       \
+  }()
+
+#if defined(FMT_STRING_ALIAS) && FMT_STRING_ALIAS
+/**
+  \rst
+  Constructs a compile-time format string. This macro is disabled by default to
+  prevent potential name collisions. To enable it define ``FMT_STRING_ALIAS`` to
+  1 before including ``fmt/format.h``.
+
+  **Example**::
+
+    #define FMT_STRING_ALIAS 1
+    #include <fmt/format.h>
+    // A compile-time error because 'd' is an invalid specifier for strings.
+    std::string s = format(fmt("{:d}"), "foo");
+  \endrst
+ */
+#  define fmt(s) FMT_STRING(s)
+#endif
+
+#ifdef FMT_HEADER_ONLY
+#  define FMT_FUNC inline
+#  include "format-inl.h"
+#else
+#  define FMT_FUNC
+#endif
+
+#endif  // FMT_FORMAT_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/locale.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/locale.h
new file mode 100644
index 000000000..7c13656e4
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/locale.h
@@ -0,0 +1,77 @@
+// Formatting library for C++ - std::locale support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_LOCALE_H_
+#define FMT_LOCALE_H_
+
+#include <locale>
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+
+namespace internal {
+template <typename Char>
+typename buffer_context<Char>::iterator vformat_to(
+    const std::locale& loc, buffer<Char>& buf,
+    basic_string_view<Char> format_str,
+    basic_format_args<buffer_context<Char>> args) {
+  using range = buffer_range<Char>;
+  return vformat_to<arg_formatter<range>>(buf, to_string_view(format_str), args,
+                                          internal::locale_ref(loc));
+}
+
+template <typename Char>
+std::basic_string<Char> vformat(const std::locale& loc,
+                                basic_string_view<Char> format_str,
+                                basic_format_args<buffer_context<Char>> args) {
+  basic_memory_buffer<Char> buffer;
+  internal::vformat_to(loc, buffer, format_str, args);
+  return fmt::to_string(buffer);
+}
+}  // namespace internal
+
+template <typename S, typename Char = char_t<S>>
+inline std::basic_string<Char> vformat(
+    const std::locale& loc, const S& format_str,
+    basic_format_args<buffer_context<Char>> args) {
+  return internal::vformat(loc, to_string_view(format_str), args);
+}
+
+template <typename S, typename... Args, typename Char = char_t<S>>
+inline std::basic_string<Char> format(const std::locale& loc,
+                                      const S& format_str, Args&&... args) {
+  return internal::vformat(
+      loc, to_string_view(format_str),
+      {internal::make_args_checked<Args...>(format_str, args...)});
+}
+
+template <typename S, typename OutputIt, typename... Args,
+          typename Char = enable_if_t<
+              internal::is_output_iterator<OutputIt>::value, char_t<S>>>
+inline OutputIt vformat_to(OutputIt out, const std::locale& loc,
+                           const S& format_str,
+                           format_args_t<OutputIt, Char> args) {
+  using range = internal::output_range<OutputIt, Char>;
+  return vformat_to<arg_formatter<range>>(
+      range(out), to_string_view(format_str), args, internal::locale_ref(loc));
+}
+
+template <typename OutputIt, typename S, typename... Args,
+          FMT_ENABLE_IF(internal::is_output_iterator<OutputIt>::value&&
+                            internal::is_string<S>::value)>
+inline OutputIt format_to(OutputIt out, const std::locale& loc,
+                          const S& format_str, Args&&... args) {
+  internal::check_format_string<Args...>(format_str);
+  using context = format_context_t<OutputIt, char_t<S>>;
+  format_arg_store<context, Args...> as{args...};
+  return vformat_to(out, loc, to_string_view(format_str),
+                    basic_format_args<context>(as));
+}
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_LOCALE_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/ostream.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/ostream.h
new file mode 100644
index 000000000..69bac0e24
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/ostream.h
@@ -0,0 +1,136 @@
+// Formatting library for C++ - std::ostream support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_OSTREAM_H_
+#define FMT_OSTREAM_H_
+
+#include <ostream>
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+namespace internal {
+
+template <class Char> class formatbuf : public std::basic_streambuf<Char> {
+ private:
+  using int_type = typename std::basic_streambuf<Char>::int_type;
+  using traits_type = typename std::basic_streambuf<Char>::traits_type;
+
+  buffer<Char>& buffer_;
+
+ public:
+  formatbuf(buffer<Char>& buf) : buffer_(buf) {}
+
+ protected:
+  // The put-area is actually always empty. This makes the implementation
+  // simpler and has the advantage that the streambuf and the buffer are always
+  // in sync and sputc never writes into uninitialized memory. The obvious
+  // disadvantage is that each call to sputc always results in a (virtual) call
+  // to overflow. There is no disadvantage here for sputn since this always
+  // results in a call to xsputn.
+
+  int_type overflow(int_type ch = traits_type::eof()) FMT_OVERRIDE {
+    if (!traits_type::eq_int_type(ch, traits_type::eof()))
+      buffer_.push_back(static_cast<Char>(ch));
+    return ch;
+  }
+
+  std::streamsize xsputn(const Char* s, std::streamsize count) FMT_OVERRIDE {
+    buffer_.append(s, s + count);
+    return count;
+  }
+};
+
+template <typename Char> struct test_stream : std::basic_ostream<Char> {
+ private:
+  struct null;
+  // Hide all operator<< from std::basic_ostream<Char>.
+  void operator<<(null);
+};
+
+// Checks if T has a user-defined operator<< (e.g. not a member of
+// std::ostream).
+template <typename T, typename Char> class is_streamable {
+ private:
+  template <typename U>
+  static decltype((void)(std::declval<test_stream<Char>&>()
+                         << std::declval<U>()),
+                  std::true_type())
+  test(int);
+
+  template <typename> static std::false_type test(...);
+
+  using result = decltype(test<T>(0));
+
+ public:
+  static const bool value = result::value;
+};
+
+// Write the content of buf to os.
+template <typename Char>
+void write(std::basic_ostream<Char>& os, buffer<Char>& buf) {
+  const Char* buf_data = buf.data();
+  using unsigned_streamsize = std::make_unsigned<std::streamsize>::type;
+  unsigned_streamsize size = buf.size();
+  unsigned_streamsize max_size =
+      to_unsigned((std::numeric_limits<std::streamsize>::max)());
+  do {
+    unsigned_streamsize n = size <= max_size ? size : max_size;
+    os.write(buf_data, static_cast<std::streamsize>(n));
+    buf_data += n;
+    size -= n;
+  } while (size != 0);
+}
+
+template <typename Char, typename T>
+void format_value(buffer<Char>& buf, const T& value) {
+  formatbuf<Char> format_buf(buf);
+  std::basic_ostream<Char> output(&format_buf);
+  output.exceptions(std::ios_base::failbit | std::ios_base::badbit);
+  output << value;
+  buf.resize(buf.size());
+}
+
+// Formats an object of type T that has an overloaded ostream operator<<.
+template <typename T, typename Char>
+struct fallback_formatter<T, Char, enable_if_t<is_streamable<T, Char>::value>>
+    : formatter<basic_string_view<Char>, Char> {
+  template <typename Context>
+  auto format(const T& value, Context& ctx) -> decltype(ctx.out()) {
+    basic_memory_buffer<Char> buffer;
+    format_value(buffer, value);
+    basic_string_view<Char> str(buffer.data(), buffer.size());
+    return formatter<basic_string_view<Char>, Char>::format(str, ctx);
+  }
+};
+}  // namespace internal
+
+template <typename Char>
+void vprint(std::basic_ostream<Char>& os, basic_string_view<Char> format_str,
+            basic_format_args<buffer_context<Char>> args) {
+  basic_memory_buffer<Char> buffer;
+  internal::vformat_to(buffer, format_str, args);
+  internal::write(os, buffer);
+}
+
+/**
+  \rst
+  Prints formatted data to the stream *os*.
+
+  **Example**::
+
+    fmt::print(cerr, "Don't {}!", "panic");
+  \endrst
+ */
+template <typename S, typename... Args,
+          typename Char = enable_if_t<internal::is_string<S>::value, char_t<S>>>
+void print(std::basic_ostream<Char>& os, const S& format_str, Args&&... args) {
+  vprint(os, to_string_view(format_str),
+         {internal::make_args_checked<Args...>(format_str, args...)});
+}
+FMT_END_NAMESPACE
+
+#endif  // FMT_OSTREAM_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/posix.cc b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/posix.cc
new file mode 100644
index 000000000..f565e8c26
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/posix.cc
@@ -0,0 +1,233 @@
+// A C++ interface to POSIX functions.
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+// Disable bogus MSVC warnings.
+#if !defined(_CRT_SECURE_NO_WARNINGS) && defined(_MSC_VER)
+#  define _CRT_SECURE_NO_WARNINGS
+#endif
+
+#include "posix.h"
+
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#ifndef _WIN32
+#  include <unistd.h>
+#else
+#  ifndef WIN32_LEAN_AND_MEAN
+#    define WIN32_LEAN_AND_MEAN
+#  endif
+#  include <io.h>
+#  include <windows.h>
+
+#  define O_CREAT _O_CREAT
+#  define O_TRUNC _O_TRUNC
+
+#  ifndef S_IRUSR
+#    define S_IRUSR _S_IREAD
+#  endif
+
+#  ifndef S_IWUSR
+#    define S_IWUSR _S_IWRITE
+#  endif
+
+#  ifdef __MINGW32__
+#    define _SH_DENYNO 0x40
+#  endif
+
+#endif  // _WIN32
+
+#ifdef fileno
+#  undef fileno
+#endif
+
+namespace {
+#ifdef _WIN32
+// Return type of read and write functions.
+typedef int RWResult;
+
+// On Windows the count argument to read and write is unsigned, so convert
+// it from size_t preventing integer overflow.
+inline unsigned convert_rwcount(std::size_t count) {
+  return count <= UINT_MAX ? static_cast<unsigned>(count) : UINT_MAX;
+}
+#else
+// Return type of read and write functions.
+typedef ssize_t RWResult;
+
+inline std::size_t convert_rwcount(std::size_t count) { return count; }
+#endif
+}  // namespace
+
+FMT_BEGIN_NAMESPACE
+
+buffered_file::~buffered_file() FMT_NOEXCEPT {
+  if (file_ && FMT_SYSTEM(fclose(file_)) != 0)
+    report_system_error(errno, "cannot close file");
+}
+
+buffered_file::buffered_file(cstring_view filename, cstring_view mode) {
+  FMT_RETRY_VAL(file_, FMT_SYSTEM(fopen(filename.c_str(), mode.c_str())),
+                nullptr);
+  if (!file_)
+    FMT_THROW(system_error(errno, "cannot open file {}", filename.c_str()));
+}
+
+void buffered_file::close() {
+  if (!file_) return;
+  int result = FMT_SYSTEM(fclose(file_));
+  file_ = nullptr;
+  if (result != 0) FMT_THROW(system_error(errno, "cannot close file"));
+}
+
+// A macro used to prevent expansion of fileno on broken versions of MinGW.
+#define FMT_ARGS
+
+int buffered_file::fileno() const {
+  int fd = FMT_POSIX_CALL(fileno FMT_ARGS(file_));
+  if (fd == -1) FMT_THROW(system_error(errno, "cannot get file descriptor"));
+  return fd;
+}
+
+file::file(cstring_view path, int oflag) {
+  int mode = S_IRUSR | S_IWUSR;
+#if defined(_WIN32) && !defined(__MINGW32__)
+  fd_ = -1;
+  FMT_POSIX_CALL(sopen_s(&fd_, path.c_str(), oflag, _SH_DENYNO, mode));
+#else
+  FMT_RETRY(fd_, FMT_POSIX_CALL(open(path.c_str(), oflag, mode)));
+#endif
+  if (fd_ == -1)
+    FMT_THROW(system_error(errno, "cannot open file {}", path.c_str()));
+}
+
+file::~file() FMT_NOEXCEPT {
+  // Don't retry close in case of EINTR!
+  // See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html
+  if (fd_ != -1 && FMT_POSIX_CALL(close(fd_)) != 0)
+    report_system_error(errno, "cannot close file");
+}
+
+void file::close() {
+  if (fd_ == -1) return;
+  // Don't retry close in case of EINTR!
+  // See http://linux.derkeiler.com/Mailing-Lists/Kernel/2005-09/3000.html
+  int result = FMT_POSIX_CALL(close(fd_));
+  fd_ = -1;
+  if (result != 0) FMT_THROW(system_error(errno, "cannot close file"));
+}
+
+long long file::size() const {
+#ifdef _WIN32
+  // Use GetFileSize instead of GetFileSizeEx for the case when _WIN32_WINNT
+  // is less than 0x0500 as is the case with some default MinGW builds.
+  // Both functions support large file sizes.
+  DWORD size_upper = 0;
+  HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd_));
+  DWORD size_lower = FMT_SYSTEM(GetFileSize(handle, &size_upper));
+  if (size_lower == INVALID_FILE_SIZE) {
+    DWORD error = GetLastError();
+    if (error != NO_ERROR)
+      FMT_THROW(windows_error(GetLastError(), "cannot get file size"));
+  }
+  unsigned long long long_size = size_upper;
+  return (long_size << sizeof(DWORD) * CHAR_BIT) | size_lower;
+#else
+  typedef struct stat Stat;
+  Stat file_stat = Stat();
+  if (FMT_POSIX_CALL(fstat(fd_, &file_stat)) == -1)
+    FMT_THROW(system_error(errno, "cannot get file attributes"));
+  static_assert(sizeof(long long) >= sizeof(file_stat.st_size),
+                "return type of file::size is not large enough");
+  return file_stat.st_size;
+#endif
+}
+
+std::size_t file::read(void* buffer, std::size_t count) {
+  RWResult result = 0;
+  FMT_RETRY(result, FMT_POSIX_CALL(read(fd_, buffer, convert_rwcount(count))));
+  if (result < 0) FMT_THROW(system_error(errno, "cannot read from file"));
+  return internal::to_unsigned(result);
+}
+
+std::size_t file::write(const void* buffer, std::size_t count) {
+  RWResult result = 0;
+  FMT_RETRY(result, FMT_POSIX_CALL(write(fd_, buffer, convert_rwcount(count))));
+  if (result < 0) FMT_THROW(system_error(errno, "cannot write to file"));
+  return internal::to_unsigned(result);
+}
+
+file file::dup(int fd) {
+  // Don't retry as dup doesn't return EINTR.
+  // http://pubs.opengroup.org/onlinepubs/009695399/functions/dup.html
+  int new_fd = FMT_POSIX_CALL(dup(fd));
+  if (new_fd == -1)
+    FMT_THROW(system_error(errno, "cannot duplicate file descriptor {}", fd));
+  return file(new_fd);
+}
+
+void file::dup2(int fd) {
+  int result = 0;
+  FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd)));
+  if (result == -1) {
+    FMT_THROW(system_error(errno, "cannot duplicate file descriptor {} to {}",
+                           fd_, fd));
+  }
+}
+
+void file::dup2(int fd, error_code& ec) FMT_NOEXCEPT {
+  int result = 0;
+  FMT_RETRY(result, FMT_POSIX_CALL(dup2(fd_, fd)));
+  if (result == -1) ec = error_code(errno);
+}
+
+void file::pipe(file& read_end, file& write_end) {
+  // Close the descriptors first to make sure that assignments don't throw
+  // and there are no leaks.
+  read_end.close();
+  write_end.close();
+  int fds[2] = {};
+#ifdef _WIN32
+  // Make the default pipe capacity same as on Linux 2.6.11+.
+  enum { DEFAULT_CAPACITY = 65536 };
+  int result = FMT_POSIX_CALL(pipe(fds, DEFAULT_CAPACITY, _O_BINARY));
+#else
+  // Don't retry as the pipe function doesn't return EINTR.
+  // http://pubs.opengroup.org/onlinepubs/009696799/functions/pipe.html
+  int result = FMT_POSIX_CALL(pipe(fds));
+#endif
+  if (result != 0) FMT_THROW(system_error(errno, "cannot create pipe"));
+  // The following assignments don't throw because read_fd and write_fd
+  // are closed.
+  read_end = file(fds[0]);
+  write_end = file(fds[1]);
+}
+
+buffered_file file::fdopen(const char* mode) {
+  // Don't retry as fdopen doesn't return EINTR.
+  FILE* f = FMT_POSIX_CALL(fdopen(fd_, mode));
+  if (!f)
+    FMT_THROW(
+        system_error(errno, "cannot associate stream with file descriptor"));
+  buffered_file bf(f);
+  fd_ = -1;
+  return bf;
+}
+
+long getpagesize() {
+#ifdef _WIN32
+  SYSTEM_INFO si;
+  GetSystemInfo(&si);
+  return si.dwPageSize;
+#else
+  long size = FMT_POSIX_CALL(sysconf(_SC_PAGESIZE));
+  if (size < 0) FMT_THROW(system_error(errno, "cannot get memory page size"));
+  return size;
+#endif
+}
+FMT_END_NAMESPACE
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/posix.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/posix.h
new file mode 100644
index 000000000..6b2d7f8e4
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/posix.h
@@ -0,0 +1,311 @@
+// A C++ interface to POSIX functions.
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_POSIX_H_
+#define FMT_POSIX_H_
+
+#if defined(__MINGW32__) || defined(__CYGWIN__)
+// Workaround MinGW bug https://sourceforge.net/p/mingw/bugs/2024/.
+#  undef __STRICT_ANSI__
+#endif
+
+#include <errno.h>
+#include <fcntl.h>   // for O_RDONLY
+#include <locale.h>  // for locale_t
+#include <stdio.h>
+#include <stdlib.h>  // for strtod_l
+
+#include <cstddef>
+
+#if defined __APPLE__ || defined(__FreeBSD__)
+#  include <xlocale.h>  // for LC_NUMERIC_MASK on OS X
+#endif
+
+#include "format.h"
+
+#ifndef FMT_POSIX
+#  if defined(_WIN32) && !defined(__MINGW32__)
+// Fix warnings about deprecated symbols.
+#    define FMT_POSIX(call) _##call
+#  else
+#    define FMT_POSIX(call) call
+#  endif
+#endif
+
+// Calls to system functions are wrapped in FMT_SYSTEM for testability.
+#ifdef FMT_SYSTEM
+#  define FMT_POSIX_CALL(call) FMT_SYSTEM(call)
+#else
+#  define FMT_SYSTEM(call) call
+#  ifdef _WIN32
+// Fix warnings about deprecated symbols.
+#    define FMT_POSIX_CALL(call) ::_##call
+#  else
+#    define FMT_POSIX_CALL(call) ::call
+#  endif
+#endif
+
+// Retries the expression while it evaluates to error_result and errno
+// equals to EINTR.
+#ifndef _WIN32
+#  define FMT_RETRY_VAL(result, expression, error_result) \
+    do {                                                  \
+      result = (expression);                              \
+    } while (result == error_result && errno == EINTR)
+#else
+#  define FMT_RETRY_VAL(result, expression, error_result) result = (expression)
+#endif
+
+#define FMT_RETRY(result, expression) FMT_RETRY_VAL(result, expression, -1)
+
+FMT_BEGIN_NAMESPACE
+
+/**
+  \rst
+  A reference to a null-terminated string. It can be constructed from a C
+  string or ``std::string``.
+
+  You can use one of the following type aliases for common character types:
+
+  +---------------+-----------------------------+
+  | Type          | Definition                  |
+  +===============+=============================+
+  | cstring_view  | basic_cstring_view<char>    |
+  +---------------+-----------------------------+
+  | wcstring_view | basic_cstring_view<wchar_t> |
+  +---------------+-----------------------------+
+
+  This class is most useful as a parameter type to allow passing
+  different types of strings to a function, for example::
+
+    template <typename... Args>
+    std::string format(cstring_view format_str, const Args & ... args);
+
+    format("{}", 42);
+    format(std::string("{}"), 42);
+  \endrst
+ */
+template <typename Char> class basic_cstring_view {
+ private:
+  const Char* data_;
+
+ public:
+  /** Constructs a string reference object from a C string. */
+  basic_cstring_view(const Char* s) : data_(s) {}
+
+  /**
+    \rst
+    Constructs a string reference from an ``std::string`` object.
+    \endrst
+   */
+  basic_cstring_view(const std::basic_string<Char>& s) : data_(s.c_str()) {}
+
+  /** Returns the pointer to a C string. */
+  const Char* c_str() const { return data_; }
+};
+
+using cstring_view = basic_cstring_view<char>;
+using wcstring_view = basic_cstring_view<wchar_t>;
+
+// An error code.
+class error_code {
+ private:
+  int value_;
+
+ public:
+  explicit error_code(int value = 0) FMT_NOEXCEPT : value_(value) {}
+
+  int get() const FMT_NOEXCEPT { return value_; }
+};
+
+// A buffered file.
+class buffered_file {
+ private:
+  FILE* file_;
+
+  friend class file;
+
+  explicit buffered_file(FILE* f) : file_(f) {}
+
+ public:
+  // Constructs a buffered_file object which doesn't represent any file.
+  buffered_file() FMT_NOEXCEPT : file_(nullptr) {}
+
+  // Destroys the object closing the file it represents if any.
+  FMT_API ~buffered_file() FMT_NOEXCEPT;
+
+ private:
+  buffered_file(const buffered_file&) = delete;
+  void operator=(const buffered_file&) = delete;
+
+ public:
+  buffered_file(buffered_file&& other) FMT_NOEXCEPT : file_(other.file_) {
+    other.file_ = nullptr;
+  }
+
+  buffered_file& operator=(buffered_file&& other) {
+    close();
+    file_ = other.file_;
+    other.file_ = nullptr;
+    return *this;
+  }
+
+  // Opens a file.
+  FMT_API buffered_file(cstring_view filename, cstring_view mode);
+
+  // Closes the file.
+  FMT_API void close();
+
+  // Returns the pointer to a FILE object representing this file.
+  FILE* get() const FMT_NOEXCEPT { return file_; }
+
+  // We place parentheses around fileno to workaround a bug in some versions
+  // of MinGW that define fileno as a macro.
+  FMT_API int(fileno)() const;
+
+  void vprint(string_view format_str, format_args args) {
+    fmt::vprint(file_, format_str, args);
+  }
+
+  template <typename... Args>
+  inline void print(string_view format_str, const Args&... args) {
+    vprint(format_str, make_format_args(args...));
+  }
+};
+
+// A file. Closed file is represented by a file object with descriptor -1.
+// Methods that are not declared with FMT_NOEXCEPT may throw
+// fmt::system_error in case of failure. Note that some errors such as
+// closing the file multiple times will cause a crash on Windows rather
+// than an exception. You can get standard behavior by overriding the
+// invalid parameter handler with _set_invalid_parameter_handler.
+class file {
+ private:
+  int fd_;  // File descriptor.
+
+  // Constructs a file object with a given descriptor.
+  explicit file(int fd) : fd_(fd) {}
+
+ public:
+  // Possible values for the oflag argument to the constructor.
+  enum {
+    RDONLY = FMT_POSIX(O_RDONLY),  // Open for reading only.
+    WRONLY = FMT_POSIX(O_WRONLY),  // Open for writing only.
+    RDWR = FMT_POSIX(O_RDWR)       // Open for reading and writing.
+  };
+
+  // Constructs a file object which doesn't represent any file.
+  file() FMT_NOEXCEPT : fd_(-1) {}
+
+  // Opens a file and constructs a file object representing this file.
+  FMT_API file(cstring_view path, int oflag);
+
+ private:
+  file(const file&) = delete;
+  void operator=(const file&) = delete;
+
+ public:
+  file(file&& other) FMT_NOEXCEPT : fd_(other.fd_) { other.fd_ = -1; }
+
+  file& operator=(file&& other) {
+    close();
+    fd_ = other.fd_;
+    other.fd_ = -1;
+    return *this;
+  }
+
+  // Destroys the object closing the file it represents if any.
+  FMT_API ~file() FMT_NOEXCEPT;
+
+  // Returns the file descriptor.
+  int descriptor() const FMT_NOEXCEPT { return fd_; }
+
+  // Closes the file.
+  FMT_API void close();
+
+  // Returns the file size. The size has signed type for consistency with
+  // stat::st_size.
+  FMT_API long long size() const;
+
+  // Attempts to read count bytes from the file into the specified buffer.
+  FMT_API std::size_t read(void* buffer, std::size_t count);
+
+  // Attempts to write count bytes from the specified buffer to the file.
+  FMT_API std::size_t write(const void* buffer, std::size_t count);
+
+  // Duplicates a file descriptor with the dup function and returns
+  // the duplicate as a file object.
+  FMT_API static file dup(int fd);
+
+  // Makes fd be the copy of this file descriptor, closing fd first if
+  // necessary.
+  FMT_API void dup2(int fd);
+
+  // Makes fd be the copy of this file descriptor, closing fd first if
+  // necessary.
+  FMT_API void dup2(int fd, error_code& ec) FMT_NOEXCEPT;
+
+  // Creates a pipe setting up read_end and write_end file objects for reading
+  // and writing respectively.
+  FMT_API static void pipe(file& read_end, file& write_end);
+
+  // Creates a buffered_file object associated with this file and detaches
+  // this file object from the file.
+  FMT_API buffered_file fdopen(const char* mode);
+};
+
+// Returns the memory page size.
+long getpagesize();
+
+#ifdef FMT_LOCALE
+// A "C" numeric locale.
+class Locale {
+ private:
+#  ifdef _WIN32
+  using locale_t = _locale_t;
+
+  enum { LC_NUMERIC_MASK = LC_NUMERIC };
+
+  static locale_t newlocale(int category_mask, const char* locale, locale_t) {
+    return _create_locale(category_mask, locale);
+  }
+
+  static void freelocale(locale_t locale) { _free_locale(locale); }
+
+  static double strtod_l(const char* nptr, char** endptr, _locale_t locale) {
+    return _strtod_l(nptr, endptr, locale);
+  }
+#  endif
+
+  locale_t locale_;
+
+  Locale(const Locale&) = delete;
+  void operator=(const Locale&) = delete;
+
+ public:
+  using type = locale_t;
+
+  Locale() : locale_(newlocale(LC_NUMERIC_MASK, "C", nullptr)) {
+    if (!locale_) FMT_THROW(system_error(errno, "cannot create locale"));
+  }
+  ~Locale() { freelocale(locale_); }
+
+  type get() const { return locale_; }
+
+  // Converts string to floating-point number and advances str past the end
+  // of the parsed input.
+  double strtod(const char*& str) const {
+    char* end = nullptr;
+    double result = strtod_l(str, &end, locale_);
+    str = end;
+    return result;
+  }
+};
+#endif  // FMT_LOCALE
+FMT_END_NAMESPACE
+
+#endif  // FMT_POSIX_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/printf.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/printf.h
new file mode 100644
index 000000000..c803aa952
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/printf.h
@@ -0,0 +1,715 @@
+// Formatting library for C++
+//
+// Copyright (c) 2012 - 2016, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+
+#ifndef FMT_PRINTF_H_
+#define FMT_PRINTF_H_
+
+#include <algorithm>  // std::fill_n
+#include <limits>     // std::numeric_limits
+
+#include "ostream.h"
+
+FMT_BEGIN_NAMESPACE
+namespace internal {
+
+// A helper function to suppress bogus "conditional expression is constant"
+// warnings.
+template <typename T> inline T const_check(T value) { return value; }
+
+// Checks if a value fits in int - used to avoid warnings about comparing
+// signed and unsigned integers.
+template <bool IsSigned> struct int_checker {
+  template <typename T> static bool fits_in_int(T value) {
+    unsigned max = std::numeric_limits<int>::max();
+    return value <= max;
+  }
+  static bool fits_in_int(bool) { return true; }
+};
+
+template <> struct int_checker<true> {
+  template <typename T> static bool fits_in_int(T value) {
+    return value >= std::numeric_limits<int>::min() &&
+           value <= std::numeric_limits<int>::max();
+  }
+  static bool fits_in_int(int) { return true; }
+};
+
+class printf_precision_handler {
+ public:
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  int operator()(T value) {
+    if (!int_checker<std::numeric_limits<T>::is_signed>::fits_in_int(value))
+      FMT_THROW(format_error("number is too big"));
+    return (std::max)(static_cast<int>(value), 0);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  int operator()(T) {
+    FMT_THROW(format_error("precision is not integer"));
+    return 0;
+  }
+};
+
+// An argument visitor that returns true iff arg is a zero integer.
+class is_zero_int {
+ public:
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  bool operator()(T value) {
+    return value == 0;
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  bool operator()(T) {
+    return false;
+  }
+};
+
+template <typename T> struct make_unsigned_or_bool : std::make_unsigned<T> {};
+
+template <> struct make_unsigned_or_bool<bool> { using type = bool; };
+
+template <typename T, typename Context> class arg_converter {
+ private:
+  using char_type = typename Context::char_type;
+
+  basic_format_arg<Context>& arg_;
+  char_type type_;
+
+ public:
+  arg_converter(basic_format_arg<Context>& arg, char_type type)
+      : arg_(arg), type_(type) {}
+
+  void operator()(bool value) {
+    if (type_ != 's') operator()<bool>(value);
+  }
+
+  template <typename U, FMT_ENABLE_IF(std::is_integral<U>::value)>
+  void operator()(U value) {
+    bool is_signed = type_ == 'd' || type_ == 'i';
+    using target_type = conditional_t<std::is_same<T, void>::value, U, T>;
+    if (const_check(sizeof(target_type) <= sizeof(int))) {
+      // Extra casts are used to silence warnings.
+      if (is_signed) {
+        arg_ = internal::make_arg<Context>(
+            static_cast<int>(static_cast<target_type>(value)));
+      } else {
+        using unsigned_type = typename make_unsigned_or_bool<target_type>::type;
+        arg_ = internal::make_arg<Context>(
+            static_cast<unsigned>(static_cast<unsigned_type>(value)));
+      }
+    } else {
+      if (is_signed) {
+        // glibc's printf doesn't sign extend arguments of smaller types:
+        //   std::printf("%lld", -42);  // prints "4294967254"
+        // but we don't have to do the same because it's a UB.
+        arg_ = internal::make_arg<Context>(static_cast<long long>(value));
+      } else {
+        arg_ = internal::make_arg<Context>(
+            static_cast<typename make_unsigned_or_bool<U>::type>(value));
+      }
+    }
+  }
+
+  template <typename U, FMT_ENABLE_IF(!std::is_integral<U>::value)>
+  void operator()(U) {}  // No conversion needed for non-integral types.
+};
+
+// Converts an integer argument to T for printf, if T is an integral type.
+// If T is void, the argument is converted to corresponding signed or unsigned
+// type depending on the type specifier: 'd' and 'i' - signed, other -
+// unsigned).
+template <typename T, typename Context, typename Char>
+void convert_arg(basic_format_arg<Context>& arg, Char type) {
+  visit_format_arg(arg_converter<T, Context>(arg, type), arg);
+}
+
+// Converts an integer argument to char for printf.
+template <typename Context> class char_converter {
+ private:
+  basic_format_arg<Context>& arg_;
+
+ public:
+  explicit char_converter(basic_format_arg<Context>& arg) : arg_(arg) {}
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  void operator()(T value) {
+    arg_ = internal::make_arg<Context>(
+        static_cast<typename Context::char_type>(value));
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  void operator()(T) {}  // No conversion needed for non-integral types.
+};
+
+// Checks if an argument is a valid printf width specifier and sets
+// left alignment if it is negative.
+template <typename Char> class printf_width_handler {
+ private:
+  using format_specs = basic_format_specs<Char>;
+
+  format_specs& specs_;
+
+ public:
+  explicit printf_width_handler(format_specs& specs) : specs_(specs) {}
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  unsigned operator()(T value) {
+    auto width = static_cast<uint32_or_64_t<T>>(value);
+    if (internal::is_negative(value)) {
+      specs_.align = align::left;
+      width = 0 - width;
+    }
+    unsigned int_max = std::numeric_limits<int>::max();
+    if (width > int_max) FMT_THROW(format_error("number is too big"));
+    return static_cast<unsigned>(width);
+  }
+
+  template <typename T, FMT_ENABLE_IF(!std::is_integral<T>::value)>
+  unsigned operator()(T) {
+    FMT_THROW(format_error("width is not integer"));
+    return 0;
+  }
+};
+
+template <typename Char, typename Context>
+void printf(buffer<Char>& buf, basic_string_view<Char> format,
+            basic_format_args<Context> args) {
+  Context(std::back_inserter(buf), format, args).format();
+}
+
+template <typename OutputIt, typename Char, typename Context>
+internal::truncating_iterator<OutputIt> printf(
+    internal::truncating_iterator<OutputIt> it, basic_string_view<Char> format,
+    basic_format_args<Context> args) {
+  return Context(it, format, args).format();
+}
+}  // namespace internal
+
+using internal::printf;  // For printing into memory_buffer.
+
+template <typename Range> class printf_arg_formatter;
+
+template <typename OutputIt, typename Char> class basic_printf_context;
+
+/**
+  \rst
+  The ``printf`` argument formatter.
+  \endrst
+ */
+template <typename Range>
+class printf_arg_formatter : public internal::arg_formatter_base<Range> {
+ public:
+  using iterator = typename Range::iterator;
+
+ private:
+  using char_type = typename Range::value_type;
+  using base = internal::arg_formatter_base<Range>;
+  using context_type = basic_printf_context<iterator, char_type>;
+
+  context_type& context_;
+
+  void write_null_pointer(char) {
+    this->specs()->type = 0;
+    this->write("(nil)");
+  }
+
+  void write_null_pointer(wchar_t) {
+    this->specs()->type = 0;
+    this->write(L"(nil)");
+  }
+
+ public:
+  using format_specs = typename base::format_specs;
+
+  /**
+    \rst
+    Constructs an argument formatter object.
+    *buffer* is a reference to the output buffer and *specs* contains format
+    specifier information for standard argument types.
+    \endrst
+   */
+  printf_arg_formatter(iterator iter, format_specs& specs, context_type& ctx)
+      : base(Range(iter), &specs, internal::locale_ref()), context_(ctx) {}
+
+  template <typename T, FMT_ENABLE_IF(std::is_integral<T>::value)>
+  iterator operator()(T value) {
+    // MSVC2013 fails to compile separate overloads for bool and char_type so
+    // use std::is_same instead.
+    if (std::is_same<T, bool>::value) {
+      format_specs& fmt_specs = *this->specs();
+      if (fmt_specs.type != 's') return base::operator()(value ? 1 : 0);
+      fmt_specs.type = 0;
+      this->write(value != 0);
+    } else if (std::is_same<T, char_type>::value) {
+      format_specs& fmt_specs = *this->specs();
+      if (fmt_specs.type && fmt_specs.type != 'c')
+        return (*this)(static_cast<int>(value));
+      fmt_specs.sign = sign::none;
+      fmt_specs.alt = false;
+      fmt_specs.align = align::right;
+      return base::operator()(value);
+    } else {
+      return base::operator()(value);
+    }
+    return this->out();
+  }
+
+  template <typename T, FMT_ENABLE_IF(std::is_floating_point<T>::value)>
+  iterator operator()(T value) {
+    return base::operator()(value);
+  }
+
+  /** Formats a null-terminated C string. */
+  iterator operator()(const char* value) {
+    if (value)
+      base::operator()(value);
+    else if (this->specs()->type == 'p')
+      write_null_pointer(char_type());
+    else
+      this->write("(null)");
+    return this->out();
+  }
+
+  /** Formats a null-terminated wide C string. */
+  iterator operator()(const wchar_t* value) {
+    if (value)
+      base::operator()(value);
+    else if (this->specs()->type == 'p')
+      write_null_pointer(char_type());
+    else
+      this->write(L"(null)");
+    return this->out();
+  }
+
+  iterator operator()(basic_string_view<char_type> value) {
+    return base::operator()(value);
+  }
+
+  iterator operator()(monostate value) { return base::operator()(value); }
+
+  /** Formats a pointer. */
+  iterator operator()(const void* value) {
+    if (value) return base::operator()(value);
+    this->specs()->type = 0;
+    write_null_pointer(char_type());
+    return this->out();
+  }
+
+  /** Formats an argument of a custom (user-defined) type. */
+  iterator operator()(typename basic_format_arg<context_type>::handle handle) {
+    handle.format(context_.parse_context(), context_);
+    return this->out();
+  }
+};
+
+template <typename T> struct printf_formatter {
+  template <typename ParseContext>
+  auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return ctx.begin();
+  }
+
+  template <typename FormatContext>
+  auto format(const T& value, FormatContext& ctx) -> decltype(ctx.out()) {
+    internal::format_value(internal::get_container(ctx.out()), value);
+    return ctx.out();
+  }
+};
+
+/** This template formats data and writes the output to a writer. */
+template <typename OutputIt, typename Char> class basic_printf_context {
+ public:
+  /** The character type for the output. */
+  using char_type = Char;
+  using format_arg = basic_format_arg<basic_printf_context>;
+  template <typename T> using formatter_type = printf_formatter<T>;
+
+ private:
+  using format_specs = basic_format_specs<char_type>;
+
+  OutputIt out_;
+  basic_format_args<basic_printf_context> args_;
+  basic_parse_context<Char> parse_ctx_;
+
+  static void parse_flags(format_specs& specs, const Char*& it,
+                          const Char* end);
+
+  // Returns the argument with specified index or, if arg_index is equal
+  // to the maximum unsigned value, the next argument.
+  format_arg get_arg(unsigned arg_index = std::numeric_limits<unsigned>::max());
+
+  // Parses argument index, flags and width and returns the argument index.
+  unsigned parse_header(const Char*& it, const Char* end, format_specs& specs);
+
+ public:
+  /**
+   \rst
+   Constructs a ``printf_context`` object. References to the arguments and
+   the writer are stored in the context object so make sure they have
+   appropriate lifetimes.
+   \endrst
+   */
+  basic_printf_context(OutputIt out, basic_string_view<char_type> format_str,
+                       basic_format_args<basic_printf_context> args)
+      : out_(out), args_(args), parse_ctx_(format_str) {}
+
+  OutputIt out() { return out_; }
+  void advance_to(OutputIt it) { out_ = it; }
+
+  format_arg arg(unsigned id) const { return args_.get(id); }
+
+  basic_parse_context<Char>& parse_context() { return parse_ctx_; }
+
+  FMT_CONSTEXPR void on_error(const char* message) {
+    parse_ctx_.on_error(message);
+  }
+
+  /** Formats stored arguments and writes the output to the range. */
+  template <typename ArgFormatter =
+                printf_arg_formatter<internal::buffer_range<Char>>>
+  OutputIt format();
+};
+
+template <typename OutputIt, typename Char>
+void basic_printf_context<OutputIt, Char>::parse_flags(format_specs& specs,
+                                                       const Char*& it,
+                                                       const Char* end) {
+  for (; it != end; ++it) {
+    switch (*it) {
+    case '-':
+      specs.align = align::left;
+      break;
+    case '+':
+      specs.sign = sign::plus;
+      break;
+    case '0':
+      specs.fill[0] = '0';
+      break;
+    case ' ':
+      specs.sign = sign::space;
+      break;
+    case '#':
+      specs.alt = true;
+      break;
+    default:
+      return;
+    }
+  }
+}
+
+template <typename OutputIt, typename Char>
+typename basic_printf_context<OutputIt, Char>::format_arg
+basic_printf_context<OutputIt, Char>::get_arg(unsigned arg_index) {
+  if (arg_index == std::numeric_limits<unsigned>::max())
+    arg_index = parse_ctx_.next_arg_id();
+  else
+    parse_ctx_.check_arg_id(--arg_index);
+  return internal::get_arg(*this, arg_index);
+}
+
+template <typename OutputIt, typename Char>
+unsigned basic_printf_context<OutputIt, Char>::parse_header(
+    const Char*& it, const Char* end, format_specs& specs) {
+  unsigned arg_index = std::numeric_limits<unsigned>::max();
+  char_type c = *it;
+  if (c >= '0' && c <= '9') {
+    // Parse an argument index (if followed by '$') or a width possibly
+    // preceded with '0' flag(s).
+    internal::error_handler eh;
+    unsigned value = parse_nonnegative_int(it, end, eh);
+    if (it != end && *it == '$') {  // value is an argument index
+      ++it;
+      arg_index = value;
+    } else {
+      if (c == '0') specs.fill[0] = '0';
+      if (value != 0) {
+        // Nonzero value means that we parsed width and don't need to
+        // parse it or flags again, so return now.
+        specs.width = value;
+        return arg_index;
+      }
+    }
+  }
+  parse_flags(specs, it, end);
+  // Parse width.
+  if (it != end) {
+    if (*it >= '0' && *it <= '9') {
+      internal::error_handler eh;
+      specs.width = parse_nonnegative_int(it, end, eh);
+    } else if (*it == '*') {
+      ++it;
+      specs.width = visit_format_arg(
+          internal::printf_width_handler<char_type>(specs), get_arg());
+    }
+  }
+  return arg_index;
+}
+
+template <typename OutputIt, typename Char>
+template <typename ArgFormatter>
+OutputIt basic_printf_context<OutputIt, Char>::format() {
+  auto out = this->out();
+  const Char* start = parse_ctx_.begin();
+  const Char* end = parse_ctx_.end();
+  auto it = start;
+  while (it != end) {
+    char_type c = *it++;
+    if (c != '%') continue;
+    if (it != end && *it == c) {
+      out = std::copy(start, it, out);
+      start = ++it;
+      continue;
+    }
+    out = std::copy(start, it - 1, out);
+
+    format_specs specs;
+    specs.align = align::right;
+
+    // Parse argument index, flags and width.
+    unsigned arg_index = parse_header(it, end, specs);
+
+    // Parse precision.
+    if (it != end && *it == '.') {
+      ++it;
+      c = it != end ? *it : 0;
+      if ('0' <= c && c <= '9') {
+        internal::error_handler eh;
+        specs.precision = static_cast<int>(parse_nonnegative_int(it, end, eh));
+      } else if (c == '*') {
+        ++it;
+        specs.precision =
+            visit_format_arg(internal::printf_precision_handler(), get_arg());
+      } else {
+        specs.precision = 0;
+      }
+    }
+
+    format_arg arg = get_arg(arg_index);
+    if (specs.alt && visit_format_arg(internal::is_zero_int(), arg))
+      specs.alt = false;
+    if (specs.fill[0] == '0') {
+      if (arg.is_arithmetic())
+        specs.align = align::numeric;
+      else
+        specs.fill[0] = ' ';  // Ignore '0' flag for non-numeric types.
+    }
+
+    // Parse length and convert the argument to the required type.
+    c = it != end ? *it++ : 0;
+    char_type t = it != end ? *it : 0;
+    using internal::convert_arg;
+    switch (c) {
+    case 'h':
+      if (t == 'h') {
+        ++it;
+        t = it != end ? *it : 0;
+        convert_arg<signed char>(arg, t);
+      } else {
+        convert_arg<short>(arg, t);
+      }
+      break;
+    case 'l':
+      if (t == 'l') {
+        ++it;
+        t = it != end ? *it : 0;
+        convert_arg<long long>(arg, t);
+      } else {
+        convert_arg<long>(arg, t);
+      }
+      break;
+    case 'j':
+      convert_arg<intmax_t>(arg, t);
+      break;
+    case 'z':
+      convert_arg<std::size_t>(arg, t);
+      break;
+    case 't':
+      convert_arg<std::ptrdiff_t>(arg, t);
+      break;
+    case 'L':
+      // printf produces garbage when 'L' is omitted for long double, no
+      // need to do the same.
+      break;
+    default:
+      --it;
+      convert_arg<void>(arg, c);
+    }
+
+    // Parse type.
+    if (it == end) FMT_THROW(format_error("invalid format string"));
+    specs.type = static_cast<char>(*it++);
+    if (arg.is_integral()) {
+      // Normalize type.
+      switch (specs.type) {
+      case 'i':
+      case 'u':
+        specs.type = 'd';
+        break;
+      case 'c':
+        visit_format_arg(internal::char_converter<basic_printf_context>(arg),
+                         arg);
+        break;
+      }
+    }
+
+    start = it;
+
+    // Format argument.
+    visit_format_arg(ArgFormatter(out, specs, *this), arg);
+  }
+  return std::copy(start, it, out);
+}
+
+template <typename Char>
+using basic_printf_context_t =
+    basic_printf_context<std::back_insert_iterator<internal::buffer<Char>>,
+                         Char>;
+
+using printf_context = basic_printf_context_t<char>;
+using wprintf_context = basic_printf_context_t<wchar_t>;
+
+using printf_args = basic_format_args<printf_context>;
+using wprintf_args = basic_format_args<wprintf_context>;
+
+/**
+  \rst
+  Constructs an `~fmt::format_arg_store` object that contains references to
+  arguments and can be implicitly converted to `~fmt::printf_args`.
+  \endrst
+ */
+template <typename... Args>
+inline format_arg_store<printf_context, Args...> make_printf_args(
+    const Args&... args) {
+  return {args...};
+}
+
+/**
+  \rst
+  Constructs an `~fmt::format_arg_store` object that contains references to
+  arguments and can be implicitly converted to `~fmt::wprintf_args`.
+  \endrst
+ */
+template <typename... Args>
+inline format_arg_store<wprintf_context, Args...> make_wprintf_args(
+    const Args&... args) {
+  return {args...};
+}
+
+template <typename S, typename Char = char_t<S>>
+inline std::basic_string<Char> vsprintf(
+    const S& format, basic_format_args<basic_printf_context_t<Char>> args) {
+  basic_memory_buffer<Char> buffer;
+  printf(buffer, to_string_view(format), args);
+  return to_string(buffer);
+}
+
+/**
+  \rst
+  Formats arguments and returns the result as a string.
+
+  **Example**::
+
+    std::string message = fmt::sprintf("The answer is %d", 42);
+  \endrst
+*/
+template <typename S, typename... Args,
+          typename Char = enable_if_t<internal::is_string<S>::value, char_t<S>>>
+inline std::basic_string<Char> sprintf(const S& format, const Args&... args) {
+  using context = basic_printf_context_t<Char>;
+  return vsprintf(to_string_view(format), {make_format_args<context>(args...)});
+}
+
+template <typename S, typename Char = char_t<S>>
+inline int vfprintf(std::FILE* f, const S& format,
+                    basic_format_args<basic_printf_context_t<Char>> args) {
+  basic_memory_buffer<Char> buffer;
+  printf(buffer, to_string_view(format), args);
+  std::size_t size = buffer.size();
+  return std::fwrite(buffer.data(), sizeof(Char), size, f) < size
+             ? -1
+             : static_cast<int>(size);
+}
+
+/**
+  \rst
+  Prints formatted data to the file *f*.
+
+  **Example**::
+
+    fmt::fprintf(stderr, "Don't %s!", "panic");
+  \endrst
+ */
+template <typename S, typename... Args,
+          typename Char = enable_if_t<internal::is_string<S>::value, char_t<S>>>
+inline int fprintf(std::FILE* f, const S& format, const Args&... args) {
+  using context = basic_printf_context_t<Char>;
+  return vfprintf(f, to_string_view(format),
+                  {make_format_args<context>(args...)});
+}
+
+template <typename S, typename Char = char_t<S>>
+inline int vprintf(const S& format,
+                   basic_format_args<basic_printf_context_t<Char>> args) {
+  return vfprintf(stdout, to_string_view(format), args);
+}
+
+/**
+  \rst
+  Prints formatted data to ``stdout``.
+
+  **Example**::
+
+    fmt::printf("Elapsed time: %.2f seconds", 1.23);
+  \endrst
+ */
+template <typename S, typename... Args,
+          FMT_ENABLE_IF(internal::is_string<S>::value)>
+inline int printf(const S& format_str, const Args&... args) {
+  using context = basic_printf_context_t<char_t<S>>;
+  return vprintf(to_string_view(format_str),
+                 {make_format_args<context>(args...)});
+}
+
+template <typename S, typename Char = char_t<S>>
+inline int vfprintf(std::basic_ostream<Char>& os, const S& format,
+                    basic_format_args<basic_printf_context_t<Char>> args) {
+  basic_memory_buffer<Char> buffer;
+  printf(buffer, to_string_view(format), args);
+  internal::write(os, buffer);
+  return static_cast<int>(buffer.size());
+}
+
+/** Formats arguments and writes the output to the range. */
+template <typename ArgFormatter, typename Char,
+          typename Context =
+              basic_printf_context<typename ArgFormatter::iterator, Char>>
+typename ArgFormatter::iterator vprintf(internal::buffer<Char>& out,
+                                        basic_string_view<Char> format_str,
+                                        basic_format_args<Context> args) {
+  typename ArgFormatter::iterator iter(out);
+  Context(iter, format_str, args).template format<ArgFormatter>();
+  return iter;
+}
+
+/**
+  \rst
+  Prints formatted data to the stream *os*.
+
+  **Example**::
+
+    fmt::fprintf(cerr, "Don't %s!", "panic");
+  \endrst
+ */
+template <typename S, typename... Args, typename Char = char_t<S>>
+inline int fprintf(std::basic_ostream<Char>& os, const S& format_str,
+                   const Args&... args) {
+  using context = basic_printf_context_t<Char>;
+  return vfprintf(os, to_string_view(format_str),
+                  {make_format_args<context>(args...)});
+}
+FMT_END_NAMESPACE
+
+#endif  // FMT_PRINTF_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/ranges.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/ranges.h
new file mode 100644
index 000000000..cf0d41aaa
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/ranges.h
@@ -0,0 +1,288 @@
+// Formatting library for C++ - experimental range support
+//
+// Copyright (c) 2012 - present, Victor Zverovich
+// All rights reserved.
+//
+// For the license information refer to format.h.
+//
+// Copyright (c) 2018 - present, Remotion (Igor Schulz)
+// All Rights Reserved
+// {fmt} support for ranges, containers and types tuple interface.
+
+#ifndef FMT_RANGES_H_
+#define FMT_RANGES_H_
+
+#include <type_traits>
+#include "format.h"
+
+// output only up to N items from the range.
+#ifndef FMT_RANGE_OUTPUT_LENGTH_LIMIT
+#  define FMT_RANGE_OUTPUT_LENGTH_LIMIT 256
+#endif
+
+FMT_BEGIN_NAMESPACE
+
+template <typename Char> struct formatting_base {
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return ctx.begin();
+  }
+};
+
+template <typename Char, typename Enable = void>
+struct formatting_range : formatting_base<Char> {
+  static FMT_CONSTEXPR_DECL const std::size_t range_length_limit =
+      FMT_RANGE_OUTPUT_LENGTH_LIMIT;  // output only up to N items from the
+                                      // range.
+  Char prefix;
+  Char delimiter;
+  Char postfix;
+  formatting_range() : prefix('{'), delimiter(','), postfix('}') {}
+  static FMT_CONSTEXPR_DECL const bool add_delimiter_spaces = true;
+  static FMT_CONSTEXPR_DECL const bool add_prepostfix_space = false;
+};
+
+template <typename Char, typename Enable = void>
+struct formatting_tuple : formatting_base<Char> {
+  Char prefix;
+  Char delimiter;
+  Char postfix;
+  formatting_tuple() : prefix('('), delimiter(','), postfix(')') {}
+  static FMT_CONSTEXPR_DECL const bool add_delimiter_spaces = true;
+  static FMT_CONSTEXPR_DECL const bool add_prepostfix_space = false;
+};
+
+namespace internal {
+
+template <typename RangeT, typename OutputIterator>
+OutputIterator copy(const RangeT& range, OutputIterator out) {
+  for (auto it = range.begin(), end = range.end(); it != end; ++it)
+    *out++ = *it;
+  return out;
+}
+
+template <typename OutputIterator>
+OutputIterator copy(const char* str, OutputIterator out) {
+  while (*str) *out++ = *str++;
+  return out;
+}
+
+template <typename OutputIterator>
+OutputIterator copy(char ch, OutputIterator out) {
+  *out++ = ch;
+  return out;
+}
+
+/// Return true value if T has std::string interface, like std::string_view.
+template <typename T> class is_like_std_string {
+  template <typename U>
+  static auto check(U* p)
+      -> decltype((void)p->find('a'), p->length(), (void)p->data(), int());
+  template <typename> static void check(...);
+
+ public:
+  static FMT_CONSTEXPR_DECL const bool value =
+      is_string<T>::value || !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+template <typename Char>
+struct is_like_std_string<fmt::basic_string_view<Char>> : std::true_type {};
+
+template <typename... Ts> struct conditional_helper {};
+
+template <typename T, typename _ = void> struct is_range_ : std::false_type {};
+
+#if !FMT_MSC_VER || FMT_MSC_VER > 1800
+template <typename T>
+struct is_range_<
+    T, conditional_t<false,
+                     conditional_helper<decltype(std::declval<T>().begin()),
+                                        decltype(std::declval<T>().end())>,
+                     void>> : std::true_type {};
+#endif
+
+/// tuple_size and tuple_element check.
+template <typename T> class is_tuple_like_ {
+  template <typename U>
+  static auto check(U* p)
+      -> decltype(std::tuple_size<U>::value,
+                  (void)std::declval<typename std::tuple_element<0, U>::type>(),
+                  int());
+  template <typename> static void check(...);
+
+ public:
+  static FMT_CONSTEXPR_DECL const bool value =
+      !std::is_void<decltype(check<T>(nullptr))>::value;
+};
+
+// Check for integer_sequence
+#if defined(__cpp_lib_integer_sequence) || FMT_MSC_VER >= 1900
+template <typename T, T... N>
+using integer_sequence = std::integer_sequence<T, N...>;
+template <std::size_t... N> using index_sequence = std::index_sequence<N...>;
+template <std::size_t N>
+using make_index_sequence = std::make_index_sequence<N>;
+#else
+template <typename T, T... N> struct integer_sequence {
+  using value_type = T;
+
+  static FMT_CONSTEXPR std::size_t size() { return sizeof...(N); }
+};
+
+template <std::size_t... N>
+using index_sequence = integer_sequence<std::size_t, N...>;
+
+template <typename T, std::size_t N, T... Ns>
+struct make_integer_sequence : make_integer_sequence<T, N - 1, N - 1, Ns...> {};
+template <typename T, T... Ns>
+struct make_integer_sequence<T, 0, Ns...> : integer_sequence<T, Ns...> {};
+
+template <std::size_t N>
+using make_index_sequence = make_integer_sequence<std::size_t, N>;
+#endif
+
+template <class Tuple, class F, size_t... Is>
+void for_each(index_sequence<Is...>, Tuple&& tup, F&& f) FMT_NOEXCEPT {
+  using std::get;
+  // using free function get<I>(T) now.
+  const int _[] = {0, ((void)f(get<Is>(tup)), 0)...};
+  (void)_;  // blocks warnings
+}
+
+template <class T>
+FMT_CONSTEXPR make_index_sequence<std::tuple_size<T>::value> get_indexes(
+    T const&) {
+  return {};
+}
+
+template <class Tuple, class F> void for_each(Tuple&& tup, F&& f) {
+  const auto indexes = get_indexes(tup);
+  for_each(indexes, std::forward<Tuple>(tup), std::forward<F>(f));
+}
+
+template <typename Arg, FMT_ENABLE_IF(!is_like_std_string<
+                                      typename std::decay<Arg>::type>::value)>
+FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const Arg&) {
+  return add_space ? " {}" : "{}";
+}
+
+template <typename Arg, FMT_ENABLE_IF(is_like_std_string<
+                                      typename std::decay<Arg>::type>::value)>
+FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const Arg&) {
+  return add_space ? " \"{}\"" : "\"{}\"";
+}
+
+FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const char*) {
+  return add_space ? " \"{}\"" : "\"{}\"";
+}
+FMT_CONSTEXPR const wchar_t* format_str_quoted(bool add_space, const wchar_t*) {
+  return add_space ? L" \"{}\"" : L"\"{}\"";
+}
+
+FMT_CONSTEXPR const char* format_str_quoted(bool add_space, const char) {
+  return add_space ? " '{}'" : "'{}'";
+}
+FMT_CONSTEXPR const wchar_t* format_str_quoted(bool add_space, const wchar_t) {
+  return add_space ? L" '{}'" : L"'{}'";
+}
+
+}  // namespace internal
+
+template <typename T> struct is_tuple_like {
+  static FMT_CONSTEXPR_DECL const bool value =
+      internal::is_tuple_like_<T>::value && !internal::is_range_<T>::value;
+};
+
+template <typename TupleT, typename Char>
+struct formatter<TupleT, Char, enable_if_t<fmt::is_tuple_like<TupleT>::value>> {
+ private:
+  // C++11 generic lambda for format()
+  template <typename FormatContext> struct format_each {
+    template <typename T> void operator()(const T& v) {
+      if (i > 0) {
+        if (formatting.add_prepostfix_space) {
+          *out++ = ' ';
+        }
+        out = internal::copy(formatting.delimiter, out);
+      }
+      out = format_to(out,
+                      internal::format_str_quoted(
+                          (formatting.add_delimiter_spaces && i > 0), v),
+                      v);
+      ++i;
+    }
+
+    formatting_tuple<Char>& formatting;
+    std::size_t& i;
+    typename std::add_lvalue_reference<decltype(
+        std::declval<FormatContext>().out())>::type out;
+  };
+
+ public:
+  formatting_tuple<Char> formatting;
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return formatting.parse(ctx);
+  }
+
+  template <typename FormatContext = format_context>
+  auto format(const TupleT& values, FormatContext& ctx) -> decltype(ctx.out()) {
+    auto out = ctx.out();
+    std::size_t i = 0;
+    internal::copy(formatting.prefix, out);
+
+    internal::for_each(values, format_each<FormatContext>{formatting, i, out});
+    if (formatting.add_prepostfix_space) {
+      *out++ = ' ';
+    }
+    internal::copy(formatting.postfix, out);
+
+    return ctx.out();
+  }
+};
+
+template <typename T, typename Char> struct is_range {
+  static FMT_CONSTEXPR_DECL const bool value =
+      internal::is_range_<T>::value &&
+      !internal::is_like_std_string<T>::value &&
+      !std::is_convertible<T, std::basic_string<Char>>::value;
+};
+
+template <typename RangeT, typename Char>
+struct formatter<RangeT, Char,
+                 enable_if_t<fmt::is_range<RangeT, Char>::value>> {
+  formatting_range<Char> formatting;
+
+  template <typename ParseContext>
+  FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) {
+    return formatting.parse(ctx);
+  }
+
+  template <typename FormatContext>
+  typename FormatContext::iterator format(const RangeT& values,
+                                          FormatContext& ctx) {
+    auto out = internal::copy(formatting.prefix, ctx.out());
+    std::size_t i = 0;
+    for (auto it = values.begin(), end = values.end(); it != end; ++it) {
+      if (i > 0) {
+        if (formatting.add_prepostfix_space) *out++ = ' ';
+        out = internal::copy(formatting.delimiter, out);
+      }
+      out = format_to(out,
+                      internal::format_str_quoted(
+                          (formatting.add_delimiter_spaces && i > 0), *it),
+                      *it);
+      if (++i > formatting.range_length_limit) {
+        out = format_to(out, " ... <other elements>");
+        break;
+      }
+    }
+    if (formatting.add_prepostfix_space) *out++ = ' ';
+    return internal::copy(formatting.postfix, out);
+  }
+};
+
+FMT_END_NAMESPACE
+
+#endif  // FMT_RANGES_H_
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/safe-duration-cast.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/safe-duration-cast.h
new file mode 100644
index 000000000..aa0361829
--- /dev/null
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/fmt/safe-duration-cast.h
@@ -0,0 +1,293 @@
+/*
+ * For conversion between std::chrono::durations without undefined
+ * behaviour or erroneous results.
+ * This is a stripped down version of duration_cast, for inclusion in fmt.
+ * See https://github.com/pauldreik/safe_duration_cast
+ *
+ * Copyright Paul Dreik 2019
+ *
+ * This file is licensed under the fmt license, see format.h
+ */
+
+#include <chrono>
+#include <cmath>
+#include <limits>
+#include <type_traits>
+
+#include "format.h"
+
+FMT_BEGIN_NAMESPACE
+
+namespace safe_duration_cast {
+
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value &&
+                        std::numeric_limits<From>::is_signed ==
+                            std::numeric_limits<To>::is_signed)>
+FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+  ec = 0;
+  using F = std::numeric_limits<From>;
+  using T = std::numeric_limits<To>;
+  static_assert(F::is_integer, "From must be integral");
+  static_assert(T::is_integer, "To must be integral");
+
+  // A and B are both signed, or both unsigned.
+  if (F::digits <= T::digits) {
+    // From fits in To without any problem.
+  } else {
+    // From does not always fit in To, resort to a dynamic check.
+    if (from < T::min() || from > T::max()) {
+      // outside range.
+      ec = 1;
+      return {};
+    }
+  }
+  return static_cast<To>(from);
+}
+
+/**
+ * converts From to To, without loss. If the dynamic value of from
+ * can't be converted to To without loss, ec is set.
+ */
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value &&
+                        std::numeric_limits<From>::is_signed !=
+                            std::numeric_limits<To>::is_signed)>
+FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+  ec = 0;
+  using F = std::numeric_limits<From>;
+  using T = std::numeric_limits<To>;
+  static_assert(F::is_integer, "From must be integral");
+  static_assert(T::is_integer, "To must be integral");
+
+  if (F::is_signed && !T::is_signed) {
+    // From may be negative, not allowed!
+    if (from < 0) {
+      ec = 1;
+      return {};
+    }
+
+    // From is positive. Can it always fit in To?
+    if (F::digits <= T::digits) {
+      // yes, From always fits in To.
+    } else {
+      // from may not fit in To, we have to do a dynamic check
+      if (from > static_cast<From>(T::max())) {
+        ec = 1;
+        return {};
+      }
+    }
+  }
+
+  if (!F::is_signed && T::is_signed) {
+    // can from be held in To?
+    if (F::digits < T::digits) {
+      // yes, From always fits in To.
+    } else {
+      // from may not fit in To, we have to do a dynamic check
+      if (from > static_cast<From>(T::max())) {
+        // outside range.
+        ec = 1;
+        return {};
+      }
+    }
+  }
+
+  // reaching here means all is ok for lossless conversion.
+  return static_cast<To>(from);
+
+}  // function
+
+template <typename To, typename From,
+          FMT_ENABLE_IF(std::is_same<From, To>::value)>
+FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) {
+  ec = 0;
+  return from;
+}  // function
+
+// clang-format off
+/**
+ * converts From to To if possible, otherwise ec is set.
+ *
+ * input                            |    output
+ * ---------------------------------|---------------
+ * NaN                              | NaN
+ * Inf                              | Inf
+ * normal, fits in output           | converted (possibly lossy)
+ * normal, does not fit in output   | ec is set
+ * subnormal                        | best effort
+ * -Inf                             | -Inf
+ */
+// clang-format on
+template <typename To, typename From,
+          FMT_ENABLE_IF(!std::is_same<From, To>::value)>
+FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) {
+  ec = 0;
+  using T = std::numeric_limits<To>;
+  static_assert(std::is_floating_point<From>::value, "From must be floating");
+  static_assert(std::is_floating_point<To>::value, "To must be floating");
+
+  // catch the only happy case
+  if (std::isfinite(from)) {
+    if (from >= T::lowest() && from <= T::max()) {
+      return static_cast<To>(from);
+    }
+    // not within range.
+    ec = 1;
+    return {};
+  }
+
+  // nan and inf will be preserved
+  return static_cast<To>(from);
+}  // function
+
+template <typename To, typename From,
+          FMT_ENABLE_IF(std::is_same<From, To>::value)>
+FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) {
+  ec = 0;
+  static_assert(std::is_floating_point<From>::value, "From must be floating");
+  return from;
+}
+
+/**
+ * safe duration cast between integral durations
+ */
+template <typename To, typename FromRep, typename FromPeriod,
+          FMT_ENABLE_IF(std::is_integral<FromRep>::value),
+          FMT_ENABLE_IF(std::is_integral<typename To::rep>::value)>
+To safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
+                      int& ec) {
+  using From = std::chrono::duration<FromRep, FromPeriod>;
+  ec = 0;
+  // the basic idea is that we need to convert from count() in the from type
+  // to count() in the To type, by multiplying it with this:
+  using Factor = std::ratio_divide<typename From::period, typename To::period>;
+
+  static_assert(Factor::num > 0, "num must be positive");
+  static_assert(Factor::den > 0, "den must be positive");
+
+  // the conversion is like this: multiply from.count() with Factor::num
+  // /Factor::den and convert it to To::rep, all this without
+  // overflow/underflow. let's start by finding a suitable type that can hold
+  // both To, From and Factor::num
+  using IntermediateRep =
+      typename std::common_type<typename From::rep, typename To::rep,
+                                decltype(Factor::num)>::type;
+
+  // safe conversion to IntermediateRep
+  IntermediateRep count =
+      lossless_integral_conversion<IntermediateRep>(from.count(), ec);
+  if (ec) {
+    return {};
+  }
+  // multiply with Factor::num without overflow or underflow
+  if (Factor::num != 1) {
+    constexpr auto max1 =
+        std::numeric_limits<IntermediateRep>::max() / Factor::num;
+    if (count > max1) {
+      ec = 1;
+      return {};
+    }
+    constexpr auto min1 =
+        std::numeric_limits<IntermediateRep>::min() / Factor::num;
+    if (count < min1) {
+      ec = 1;
+      return {};
+    }
+    count *= Factor::num;
+  }
+
+  // this can't go wrong, right? den>0 is checked earlier.
+  if (Factor::den != 1) {
+    count /= Factor::den;
+  }
+  // convert to the to type, safely
+  using ToRep = typename To::rep;
+  const ToRep tocount = lossless_integral_conversion<ToRep>(count, ec);
+  if (ec) {
+    return {};
+  }
+  return To{tocount};
+}
+
+/**
+ * safe duration_cast between floating point durations
+ */
+template <typename To, typename FromRep, typename FromPeriod,
+          FMT_ENABLE_IF(std::is_floating_point<FromRep>::value),
+          FMT_ENABLE_IF(std::is_floating_point<typename To::rep>::value)>
+To safe_duration_cast(std::chrono::duration<FromRep, FromPeriod> from,
+                      int& ec) {
+  using From = std::chrono::duration<FromRep, FromPeriod>;
+  ec = 0;
+  if (std::isnan(from.count())) {
+    // nan in, gives nan out. easy.
+    return To{std::numeric_limits<typename To::rep>::quiet_NaN()};
+  }
+  // maybe we should also check if from is denormal, and decide what to do about
+  // it.
+
+  // +-inf should be preserved.
+  if (std::isinf(from.count())) {
+    return To{from.count()};
+  }
+
+  // the basic idea is that we need to convert from count() in the from type
+  // to count() in the To type, by multiplying it with this:
+  using Factor = std::ratio_divide<typename From::period, typename To::period>;
+
+  static_assert(Factor::num > 0, "num must be positive");
+  static_assert(Factor::den > 0, "den must be positive");
+
+  // the conversion is like this: multiply from.count() with Factor::num
+  // /Factor::den and convert it to To::rep, all this without
+  // overflow/underflow. let's start by finding a suitable type that can hold
+  // both To, From and Factor::num
+  using IntermediateRep =
+      typename std::common_type<typename From::rep, typename To::rep,
+                                decltype(Factor::num)>::type;
+
+  // force conversion of From::rep -> IntermediateRep to be safe,
+  // even if it will never happen be narrowing in this context.
+  IntermediateRep count =
+      safe_float_conversion<IntermediateRep>(from.count(), ec);
+  if (ec) {
+    return {};
+  }
+
+  // multiply with Factor::num without overflow or underflow
+  if (Factor::num != 1) {
+    constexpr auto max1 = std::numeric_limits<IntermediateRep>::max() /
+                          static_cast<IntermediateRep>(Factor::num);
+    if (count > max1) {
+      ec = 1;
+      return {};
+    }
+    constexpr auto min1 = std::numeric_limits<IntermediateRep>::lowest() /
+                          static_cast<IntermediateRep>(Factor::num);
+    if (count < min1) {
+      ec = 1;
+      return {};
+    }
+    count *= static_cast<IntermediateRep>(Factor::num);
+  }
+
+  // this can't go wrong, right? den>0 is checked earlier.
+  if (Factor::den != 1) {
+    using common_t = typename std::common_type<IntermediateRep, intmax_t>::type;
+    count /= static_cast<common_t>(Factor::den);
+  }
+
+  // convert to the to type, safely
+  using ToRep = typename To::rep;
+
+  const ToRep tocount = safe_float_conversion<ToRep>(count, ec);
+  if (ec) {
+    return {};
+  }
+  return To{tocount};
+}
+
+}  // namespace safe_duration_cast
+
+FMT_END_NAMESPACE
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thread/fast_mutex.h b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/thread/fast_mutex.h
similarity index 100%
rename from vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thread/fast_mutex.h
rename to vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thirdparty/thread/fast_mutex.h
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thread.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thread.hpp
index 6b7bfef1f..8c35b2d8e 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thread.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/thread.hpp
@@ -1,11 +1,15 @@
 #ifndef VTKMDIY_THREAD_H
 #define VTKMDIY_THREAD_H
 
-#ifdef VTKM_DIY_NO_THREADS
+#include <map>
+
+#ifdef VTKMDIY_NO_THREADS
 #include "no-thread.hpp"
 #else
 
-#include "thread/fast_mutex.h"
+#if !defined(_MSC_VER)
+#include "thirdparty/thread/fast_mutex.h"
+#endif
 
 #include <thread>
 #include <mutex>
@@ -17,15 +21,71 @@ namespace diy
     using std::recursive_mutex;
     namespace this_thread = std::this_thread;
 
+#if defined(_MSC_VER)
+    // fast_mutex implementation has issues on MSVC. Just use std::mutex
+    using fast_mutex = std::mutex;
+#else
     // TODO: replace with our own implementation using std::atomic_flag
     using fast_mutex = tthread::fast_mutex;
+#endif
 
     template<class Mutex>
     using lock_guard = std::unique_lock<Mutex>;
+
+    template<class T, class U>
+    struct concurrent_map;
 }
 
-#endif
+#endif // VTKMDIY_NO_THREADS
 
 #include "critical-resource.hpp"
 
+#if !defined(VTKMDIY_NO_THREADS)
+template<class T, class U>
+struct diy::concurrent_map
+{
+    using Map       = std::map<T,U>;
+    using SharedPtr = std::shared_ptr<lock_guard<fast_mutex>>;
+
+    template<class MapIterator>
+    struct iterator_
+    {
+        MapIterator     it;
+        SharedPtr       lock_ptr;
+
+                        iterator_(const MapIterator& it_, const SharedPtr& lock_ptr_ = SharedPtr()):
+                            it(it_), lock_ptr(lock_ptr_)                        {}
+
+        iterator_&      operator++()        { ++it; return *this; }
+        iterator_       operator++(int)     { iterator_ retval = *this; ++(*this); return retval; }
+
+        bool            operator==(const iterator_& other) const     { return it == other.it;}
+        bool            operator!=(const iterator_& other) const     { return !(*this == other); }
+
+        decltype(*it)               operator*() const   { return *it; }
+        decltype(it.operator->())   operator->() const  { return it.operator->(); }
+    };
+
+    using iterator       = iterator_<typename Map::iterator>;
+    using const_iterator = iterator_<typename Map::const_iterator>;
+
+    U&              operator[](const T& x)  { lock_guard<fast_mutex> l(mutex_); return map_[x]; }
+
+    iterator        begin()                 { auto p = std::make_shared<lock_guard<fast_mutex>>(mutex_); return iterator(map_.begin(), p); }
+    iterator        end()                   { return iterator(map_.end()); }
+
+    const_iterator  begin() const           { auto p = std::make_shared<lock_guard<fast_mutex>>(mutex_); return const_iterator(map_.begin(), p); }
+    const_iterator  end() const             { return const_iterator(map_.end()); }
+
+    iterator        find(const T& x)        { auto p = std::make_shared<lock_guard<fast_mutex>>(mutex_); return iterator(map_.find(x), p); }
+    const_iterator  find(const T& x) const  { auto p = std::make_shared<lock_guard<fast_mutex>>(mutex_); return const_iterator(map_.find(x), p); }
+
+    void            clear()                 { lock_guard<fast_mutex> l(mutex_); map_.clear(); }
+    bool            empty()                 { lock_guard<fast_mutex> l(mutex_); return map_.empty(); }
+
+    Map                 map_;
+    mutable fast_mutex  mutex_;
+};
+#endif // !defined(VTKMDIY_NO_THREADS)
+
 #endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/types.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/types.hpp
index be915b846..632283936 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/types.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/types.hpp
@@ -3,7 +3,7 @@
 
 #include <iostream>
 #include "constants.h"
-#include "point.hpp"
+#include "dynamic-point.hpp"
 
 namespace diy
 {
@@ -19,12 +19,21 @@ namespace diy
     struct Bounds
     {
         using Coordinate = Coordinate_;
-        using Point      = diy::Point<Coordinate, DIY_MAX_DIM>;
+        using Point      = diy::DynamicPoint<Coordinate>;
 
         Point min, max;
 
-        Bounds() = default;
+        Bounds(int dim): min(dim), max(dim)                                 {}
         Bounds(const Point& _min, const Point& _max) : min(_min), max(_max) {}
+
+        private:
+            // make default constructor private to explicitly break old deprecated behavior;
+            // any call to the default constructor should be replaced by a call to Bounds(0)
+            Bounds():
+                Bounds(0)                                                   {}
+
+            template<class T> friend struct diy::Serialization;
+
     };
     using DiscreteBounds   = Bounds<int>;
     using ContinuousBounds = Bounds<float>;
@@ -32,30 +41,43 @@ namespace diy
     //! Helper to create a 1-dimensional discrete domain with the specified extents
     inline
     diy::DiscreteBounds
-    interval(int from, int to)            { DiscreteBounds domain; domain.min[0] = from; domain.max[0] = to; return domain; }
+    interval(int from, int to)            { DiscreteBounds domain(1); domain.min[0] = from; domain.max[0] = to; return domain; }
 
-    struct Direction: public Point<int,DIY_MAX_DIM>
+    struct Direction: public DynamicPoint<int>
     {
-              Direction()                 { for (size_t i = 0; i < DIY_MAX_DIM; ++i) (*this)[i] = 0; }
-              Direction(std::initializer_list<int> lst):
-                Direction()               { size_t i = 0; for(int x : lst) (*this)[i++] = x; }
-              Direction(int dir)
+        using Parent = DynamicPoint<int>;
+
+        using Parent::dimension;
+        using Parent::operator[];
+
+        // enable inherited ctor
+        using Parent::Parent;
+
+        // DM: This breaks the old behavior. Ideally, we'd explicitly deprecate
+        //     this, but we need the default constructor in Serialization.  I
+        //     believe I've fixed all uses of this In DIY proper. Hopefully, no
+        //     existing codes break.
+              Direction(): Parent(0)                              {}
+
+              Direction(int dim, int dir):
+                  Parent(dim)
       {
-          for (size_t i = 0; i < DIY_MAX_DIM; ++i) (*this)[i] = 0;
-          if (dir & DIY_X0) (*this)[0] -= 1;
-          if (dir & DIY_X1) (*this)[0] += 1;
-          if (dir & DIY_Y0) (*this)[1] -= 1;
-          if (dir & DIY_Y1) (*this)[1] += 1;
-          if (dir & DIY_Z0) (*this)[2] -= 1;
-          if (dir & DIY_Z1) (*this)[2] += 1;
-          if (dir & DIY_T0) (*this)[3] -= 1;
-          if (dir & DIY_T1) (*this)[3] += 1;
+          if (dim > 0 && dir & VTKMDIY_X0) (*this)[0] -= 1;
+          if (dim > 0 && dir & VTKMDIY_X1) (*this)[0] += 1;
+          if (dim > 1 && dir & VTKMDIY_Y0) (*this)[1] -= 1;
+          if (dim > 1 && dir & VTKMDIY_Y1) (*this)[1] += 1;
+          if (dim > 2 && dir & VTKMDIY_Z0) (*this)[2] -= 1;
+          if (dim > 2 && dir & VTKMDIY_Z1) (*this)[2] += 1;
+          if (dim > 3 && dir & VTKMDIY_T0) (*this)[3] -= 1;
+          if (dim > 3 && dir & VTKMDIY_T1) (*this)[3] += 1;
       }
 
+      static Direction from_bits(int dir, int dim = VTKMDIY_MAX_DIM)    { return Direction(dim, dir); }
+
       bool
       operator==(const diy::Direction& y) const
       {
-        for (size_t i = 0; i < DIY_MAX_DIM; ++i)
+        for (size_t i = 0; i < dimension(); ++i)
             if ((*this)[i] != y[i]) return false;
         return true;
       }
@@ -64,7 +86,7 @@ namespace diy
       bool
       operator<(const diy::Direction& y) const
       {
-        for (size_t i = 0; i < DIY_MAX_DIM; ++i)
+        for (size_t i = 0; i < dimension(); ++i)
         {
             if ((*this)[i] < y[i]) return true;
             if ((*this)[i] > y[i]) return false;
@@ -89,6 +111,36 @@ namespace diy
     bool
     operator==(const diy::BlockID& x, const diy::BlockID& y)
     { return x.gid == y.gid; }
+
+    // Serialization
+    template<class C>
+    struct Serialization<Bounds<C>>
+    {
+        static void         save(BinaryBuffer& bb, const Bounds<C>& b)
+        {
+            diy::save(bb, b.min);
+            diy::save(bb, b.max);
+        }
+
+        static void         load(BinaryBuffer& bb, Bounds<C>& b)
+        {
+            diy::load(bb, b.min);
+            diy::load(bb, b.max);
+        }
+    };
+    template<>
+    struct Serialization<Direction>
+    {
+        static void         save(BinaryBuffer& bb, const Direction& d)
+        {
+            diy::save(bb, static_cast<const Direction::Parent&>(d));
+        }
+
+        static void         load(BinaryBuffer& bb, Direction& d)
+        {
+            diy::load(bb, static_cast<Direction::Parent&>(d));
+        }
+    };
 }
 
 #endif
diff --git a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/version.hpp b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/version.hpp
index d3a1a3c6b..7bbe4df16 100644
--- a/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/version.hpp
+++ b/vtkm/thirdparty/diy/vtkmdiy/include/vtkmdiy/version.hpp
@@ -3,6 +3,6 @@
 
 #define VTKMDIY_VERSION_MAJOR 3
 #define VTKMDIY_VERSION_MINOR 5
-#define VTKMDIY_VERSION_PATCH dev1
+#define DIY_VERSION_PATCH dev1
 
 #endif