Merge topic 'diy-improvements'

954111f6 exclude -Wunused-result from diy/storage. 1737bbe9 exclude shadow warnings from DIY. ffc833fd Update diy to revision `ca5e7cf23`. 3408e8e5 Revert "diy: pass operator instance to mpi_op<>::get()" cac71555 Use `diy::reduce` in MultiBlock reductions. e349dd0d Use default copy constructor. Acked-by: Kitware Robot <kwrobot@kitware.com> Acked-by: Robert Maynard <robert.maynard@kitware.com> Merge-request: !1037
2024-09-16 17:22:55 +00:00 · 2018-01-03 16:06:16 +00:00 · 2018-01-03 16:06:16 +00:00 · 4275b9fee9
commit 4275b9fee9
parent a81919999a 954111f60e
12 changed files with 341 additions and 199 deletions
--- a/CTestCustom.cmake.in
+++ b/CTestCustom.cmake.in
@ -20,4 +20,7 @@

 list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION
  ".*warning: ignoring loop annotation.*"
-) 
+  ".*diy.include.diy.*WShadow.*" # exclude `diy` shadow warnings.
+  ".*diy.include.diy.*note: shadowed.*" # exclude `diy` shadow warnings.
+  ".*diy.include.diy.storage.hpp.*Wunused-result.*" # this is a TODO in DIY.
+)
--- a/diy/include/diy/master.hpp
+++ b/diy/include/diy/master.hpp
@ -1062,8 +1062,6 @@ void
 diy::Master::
 flush()
 {
-
-  auto scoped = prof.scoped("comm");
 #ifdef DEBUG
  time_type start = get_time();
  unsigned wait = 1;
--- a/diy/include/diy/mpi/collectives.hpp
+++ b/diy/include/diy/mpi/collectives.hpp
@ -152,13 +152,13 @@ namespace mpi
      }
    }

-    static void reduce(const communicator& comm, const T& in, T& out, int root, const Op& op)
+    static void reduce(const communicator& comm, const T& in, T& out, int root, const Op&)
    {
      MPI_Reduce(Datatype::address(const_cast<T&>(in)),
                 Datatype::address(out),
                 Datatype::count(in),
                 Datatype::datatype(),
-                 detail::mpi_op<Op>::get(op),
+                 detail::mpi_op<Op>::get(),
                 root, comm);
    }

@ -168,38 +168,38 @@ namespace mpi
                 Datatype::address(const_cast<T&>(in)),
                 Datatype::count(in),
                 Datatype::datatype(),
-                 detail::mpi_op<Op>::get(op),
+                 detail::mpi_op<Op>::get(),
                 root, comm);
    }

-    static void all_reduce(const communicator& comm, const T& in, T& out, const Op& op)
+    static void all_reduce(const communicator& comm, const T& in, T& out, const Op&)
    {
      MPI_Allreduce(Datatype::address(const_cast<T&>(in)),
                    Datatype::address(out),
                    Datatype::count(in),
                    Datatype::datatype(),
-                    detail::mpi_op<Op>::get(op),
+                    detail::mpi_op<Op>::get(),
                    comm);
    }

-    static void all_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op& op)
+    static void all_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op&)
    {
      out.resize(in.size());
      MPI_Allreduce(Datatype::address(const_cast<T&>(in[0])),
                    Datatype::address(out[0]),
                    in.size(),
                    Datatype::datatype(),
-                    detail::mpi_op<Op>::get(op),
+                    detail::mpi_op<Op>::get(),
                    comm);
    }

-    static void scan(const communicator& comm, const T& in, T& out, const Op& op)
+    static void scan(const communicator& comm, const T& in, T& out, const Op&)
    {
      MPI_Scan(Datatype::address(const_cast<T&>(in)),
               Datatype::address(out),
               Datatype::count(in),
               Datatype::datatype(),
-               detail::mpi_op<Op>::get(op),
+               detail::mpi_op<Op>::get(),
               comm);
    }

--- a/diy/include/diy/mpi/operations.hpp
+++ b/diy/include/diy/mpi/operations.hpp
@ -14,13 +14,13 @@ namespace mpi

 namespace detail
 {
-  template<class T> struct mpi_op                           { static MPI_Op  get(const T&); };
-  template<class U> struct mpi_op< maximum<U> >             { static MPI_Op  get(const maximum<U>&) { return MPI_MAX; }  };
-  template<class U> struct mpi_op< minimum<U> >             { static MPI_Op  get(const minimum<U>&) { return MPI_MIN; }  };
-  template<class U> struct mpi_op< std::plus<U> >           { static MPI_Op  get(const std::plus<U>&) { return MPI_SUM; }  };
-  template<class U> struct mpi_op< std::multiplies<U> >     { static MPI_Op  get(const std::multiplies<U>&) { return MPI_PROD; }  };
-  template<class U> struct mpi_op< std::logical_and<U> >    { static MPI_Op  get(const std::logical_and<U>&) { return MPI_LAND; }  };
-  template<class U> struct mpi_op< std::logical_or<U> >     { static MPI_Op  get(const std::logical_or<U>&) { return MPI_LOR; }  };
+  template<class T> struct mpi_op                           { static MPI_Op  get(); };
+  template<class U> struct mpi_op< maximum<U> >             { static MPI_Op  get() { return MPI_MAX; }  };
+  template<class U> struct mpi_op< minimum<U> >             { static MPI_Op  get() { return MPI_MIN; }  };
+  template<class U> struct mpi_op< std::plus<U> >           { static MPI_Op  get() { return MPI_SUM; }  };
+  template<class U> struct mpi_op< std::multiplies<U> >     { static MPI_Op  get() { return MPI_PROD; }  };
+  template<class U> struct mpi_op< std::logical_and<U> >    { static MPI_Op  get() { return MPI_LAND; }  };
+  template<class U> struct mpi_op< std::logical_or<U> >     { static MPI_Op  get() { return MPI_LOR; }  };
 }
 }
 }
--- a/diy/include/diy/stats.hpp
+++ b/diy/include/diy/stats.hpp
@ -6,7 +6,7 @@
 #include <vector>

 #include "log.hpp"      // need this for format
-#define DIY_PROFILE 1
+
 namespace diy
 {
 namespace stats
@ -71,11 +71,7 @@ struct Profiler
        {
            const Event& e = events[i];
            auto time = std::chrono::duration_cast<std::chrono::microseconds>(e.stamp - start).count();
-            fmt::print(out, "{} {} {}\n",
-                            time / 1000000.,
-                            (e.begin ? '<' : '>'),
-                            e.name);
-            /*
+
            fmt::print(out, "{:02d}:{:02d}:{:02d}.{:06d} {}{}\n",
                            time/1000000/60/60,
                            time/1000000/60 % 60,
@ -83,7 +79,6 @@ struct Profiler
                            time % 1000000,
                            (e.begin ? '<' : '>'),
                            e.name);
-                            */
        }
    }

--- a/vtkm/Bounds.h
+++ b/vtkm/Bounds.h
@ -45,6 +45,9 @@ struct Bounds
  VTKM_EXEC_CONT
  Bounds() {}

+  VTKM_EXEC_CONT
+  Bounds(const Bounds&) = default;
+
  VTKM_EXEC_CONT
  Bounds(const vtkm::Range& xRange, const vtkm::Range& yRange, const vtkm::Range& zRange)
    : X(xRange)
@ -89,13 +92,7 @@ struct Bounds
  }

  VTKM_EXEC_CONT
-  const vtkm::Bounds& operator=(const vtkm::Bounds& src)
-  {
-    this->X = src.X;
-    this->Y = src.Y;
-    this->Z = src.Z;
-    return *this;
-  }
+  vtkm::Bounds& operator=(const vtkm::Bounds& src) = default;

  /// \b Determine if the bounds are valid (i.e. has at least one valid point).
  ///
--- a/vtkm/Range.h
+++ b/vtkm/Range.h
@ -49,6 +49,9 @@ struct Range
  {
  }

+  VTKM_EXEC_CONT
+  Range(const Range&) = default;
+
  template <typename T1, typename T2>
  VTKM_EXEC_CONT Range(const T1& min, const T2& max)
    : Min(static_cast<vtkm::Float64>(min))
@ -57,12 +60,7 @@ struct Range
  }

  VTKM_EXEC_CONT
-  const vtkm::Range& operator=(const vtkm::Range& src)
-  {
-    this->Min = src.Min;
-    this->Max = src.Max;
-    return *this;
-  }
+  vtkm::Range& operator=(const vtkm::Range& src) = default;

  /// \b Determine if the range is valid (i.e. has at least one valid point).
  ///
--- a/vtkm/cont/AssignerMultiBlock.cxx
+++ b/vtkm/cont/AssignerMultiBlock.cxx
@ -0,0 +1,78 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2015 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2015 UT-Battelle, LLC.
+//  Copyright 2015 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#include <vtkm/cont/AssignerMultiBlock.h>
+
+#if defined(VTKM_ENABLE_MPI)
+
+#include <diy/mpi.hpp>
+#include <vtkm/cont/EnvironmentTracker.h>
+
+#include <algorithm> // std::lower_bound
+#include <numeric>   // std::iota
+
+namespace vtkm
+{
+namespace cont
+{
+
+VTKM_CONT
+AssignerMultiBlock::AssignerMultiBlock(const vtkm::cont::MultiBlock& mb)
+  : diy::Assigner(vtkm::cont::EnvironmentTracker::GetCommunicator().size(), 1)
+  , IScanBlockCounts()
+{
+  auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
+  const auto nblocks = mb.GetNumberOfBlocks();
+
+  vtkm::Id iscan;
+  diy::mpi::scan(comm, nblocks, iscan, std::plus<vtkm::Id>());
+  diy::mpi::all_gather(comm, iscan, this->IScanBlockCounts);
+
+  this->set_nblocks(static_cast<int>(this->IScanBlockCounts.back()));
+}
+
+VTKM_CONT
+void AssignerMultiBlock::local_gids(int rank, std::vector<int>& gids) const
+{
+  if (rank == 0)
+  {
+    assert(this->IScanBlockCounts.size() > 0);
+    gids.resize(this->IScanBlockCounts[rank]);
+    std::iota(gids.begin(), gids.end(), 0);
+  }
+  else if (rank > 0 && rank < static_cast<int>(this->IScanBlockCounts.size()))
+  {
+    gids.resize(this->IScanBlockCounts[rank] - this->IScanBlockCounts[rank - 1]);
+    std::iota(gids.begin(), gids.end(), this->IScanBlockCounts[rank - 1]);
+  }
+}
+
+VTKM_CONT
+int AssignerMultiBlock::rank(int gid) const
+{
+  return static_cast<int>(
+    std::lower_bound(this->IScanBlockCounts.begin(), this->IScanBlockCounts.end(), gid + 1) -
+    this->IScanBlockCounts.begin());
+}
+
+
+} // vtkm::cont
+} // vtkm
+
+#endif // defined(VTKM_ENABLE_MPI)
--- a/vtkm/cont/AssignerMultiBlock.h
+++ b/vtkm/cont/AssignerMultiBlock.h
@ -0,0 +1,70 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2015 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2015 UT-Battelle, LLC.
+//  Copyright 2015 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_AssignerMultiBlock_h
+#define vtk_m_cont_AssignerMultiBlock_h
+
+#include <vtkm/internal/Configure.h>
+#if defined(VTKM_ENABLE_MPI)
+
+#include <diy/assigner.hpp>
+#include <vtkm/cont/MultiBlock.h>
+
+namespace vtkm
+{
+namespace cont
+{
+
+/// \brief Assigner for `MultiBlock` blocks.
+///
+/// `AssignerMultiBlock` is a `diy::Assigner` implementation that uses
+/// `MultiBlock`'s block distribution to build global-id/rank associations
+/// needed for several `diy` operations.
+/// It uses a contiguous assignment strategy to map blocks to global ids i.e.
+/// blocks on rank 0 come first, then rank 1, etc. Any rank may have 0 blocks.
+///
+/// AssignerMultiBlock uses collectives in the constructor hence it is
+/// essential it gets created on all ranks irrespective of whether the rank has
+/// any blocks.
+///
+class VTKM_CONT_EXPORT AssignerMultiBlock : public diy::Assigner
+{
+public:
+  /// Initialize the assigner using a multiblock dataset.
+  /// This may initialize collective operations to populate the assigner with
+  /// information about blocks on all ranks.
+  VTKM_CONT
+  AssignerMultiBlock(const vtkm::cont::MultiBlock& mb);
+
+  ///@{
+  /// diy::Assigner API implementation.
+  VTKM_CONT
+  void local_gids(int rank, std::vector<int>& gids) const override;
+
+  VTKM_CONT
+  int rank(int gid) const override;
+  //@}
+private:
+  std::vector<vtkm::Id> IScanBlockCounts;
+};
+}
+}
+
+#endif // defined(VTKM_ENABLE_MPI)
+#endif
--- a/vtkm/cont/CMakeLists.txt
+++ b/vtkm/cont/CMakeLists.txt
@ -45,6 +45,7 @@ set(headers
  ArrayHandleConcatenate.h
  ArrayRangeCompute.h
  ArrayRangeCompute.hxx
+  AssignerMultiBlock.h
  CellLocatorTwoLevelUniformGrid.h
  CellSet.h
  CellSetExplicit.h
@ -58,6 +59,7 @@ set(headers
  DataSetBuilderRectilinear.h
  DataSetBuilderUniform.h
  DataSetFieldAdd.h
+  DecomposerMultiBlock.h
  DeviceAdapter.h
  DeviceAdapterAlgorithm.h
  DeviceAdapterListTag.h
@ -94,6 +96,7 @@ set(header_impls

 set(sources
  ArrayHandle.cxx
+  AssignerMultiBlock.cxx
  CellSet.cxx
  CellSetExplicit.cxx
  CellSetStructured.cxx
--- a/vtkm/cont/DecomposerMultiBlock.h
+++ b/vtkm/cont/DecomposerMultiBlock.h
@ -0,0 +1,57 @@
+//============================================================================
+//  Copyright (c) Kitware, Inc.
+//  All rights reserved.
+//  See LICENSE.txt for details.
+//  This software is distributed WITHOUT ANY WARRANTY; without even
+//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+//  PURPOSE.  See the above copyright notice for more information.
+//
+//  Copyright 2015 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
+//  Copyright 2015 UT-Battelle, LLC.
+//  Copyright 2015 Los Alamos National Security.
+//
+//  Under the terms of Contract DE-NA0003525 with NTESS,
+//  the U.S. Government retains certain rights in this software.
+//
+//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
+//  Laboratory (LANL), the U.S. Government retains certain rights in
+//  this software.
+//============================================================================
+#ifndef vtk_m_cont_DecomposerMultiBlock_h
+#define vtk_m_cont_DecomposerMultiBlock_h
+
+#include <vtkm/internal/Configure.h>
+#if defined(VTKM_ENABLE_MPI)
+#include <vtkm/cont/AssignerMultiBlock.h>
+
+namespace vtkm
+{
+namespace cont
+{
+
+/// \brief DIY Decomposer that uses `MultiBlock` existing decomposition.
+///
+/// To create partners for various reduce operations, DIY requires a decomposer.
+/// This class provides an implementation that can use the multiblock's
+/// decomposition.
+///
+class VTKM_CONT_EXPORT DecomposerMultiBlock
+{
+public:
+  VTKM_CONT DecomposerMultiBlock(const diy::Assigner& assigner)
+    : divisions{ assigner.nblocks() }
+  {
+  }
+
+  using DivisionVector = std::vector<int>;
+
+  /// this public member is needed to satisfy decomposer concept for
+  /// partners in DIY.
+  DivisionVector divisions;
+};
+}
+}
+
+#endif // defined(VTKM_ENABLE_MPI)
+
+#endif
--- a/vtkm/cont/MultiBlock.cxx
+++ b/vtkm/cont/MultiBlock.cxx
@ -21,7 +21,9 @@
 #include <vtkm/StaticAssert.h>
 #include <vtkm/cont/ArrayCopy.h>
 #include <vtkm/cont/ArrayHandle.h>
+#include <vtkm/cont/AssignerMultiBlock.h>
 #include <vtkm/cont/DataSet.h>
+#include <vtkm/cont/DecomposerMultiBlock.h>
 #include <vtkm/cont/DeviceAdapterAlgorithm.h>
 #include <vtkm/cont/DynamicArrayHandle.h>
 #include <vtkm/cont/EnvironmentTracker.h>
@ -30,7 +32,11 @@
 #include <vtkm/cont/MultiBlock.h>

 #if defined(VTKM_ENABLE_MPI)
+#include <diy/decomposition.hpp>
 #include <diy/master.hpp>
+#include <diy/partners/all-reduce.hpp>
+#include <diy/partners/swap.hpp>
+#include <diy/reduce.hpp>

 namespace vtkm
 {
@ -48,111 +54,21 @@ VTKM_CONT std::vector<typename PortalType::ValueType> CopyArrayPortalToVector(
  std::copy(iterators.GetBegin(), iterators.GetEnd(), result.begin());
  return result;
 }
+
+template <typename T>
+const vtkm::cont::DataSet& GetBlock(const vtkm::cont::MultiBlock& mb, const T&);
+
+template <>
+const vtkm::cont::DataSet& GetBlock(const vtkm::cont::MultiBlock& mb,
+                                    const diy::Master::ProxyWithLink& cp)
+{
+  const int lid = cp.master()->lid(cp.gid());
+  return mb.GetBlock(lid);
 }
 }
 }
-
-namespace std
-{
-
-namespace detail
-{
-
-template <typename T, size_t ElementSize = sizeof(T)>
-struct MPIPlus
-{
-  MPIPlus()
-  {
-    this->OpPtr = std::shared_ptr<MPI_Op>(new MPI_Op(MPI_NO_OP), [](MPI_Op* ptr) {
-      MPI_Op_free(ptr);
-      delete ptr;
-    });
-
-    MPI_Op_create(
-      [](void* a, void* b, int* len, MPI_Datatype*) {
-        T* ba = reinterpret_cast<T*>(a);
-        T* bb = reinterpret_cast<T*>(b);
-        for (int cc = 0; cc < (*len) / ElementSize; ++cc)
-        {
-          bb[cc] = ba[cc] + bb[cc];
-        }
-      },
-      1,
-      this->OpPtr.get());
-  }
-  ~MPIPlus() {}
-  operator MPI_Op() const { return *this->OpPtr.get(); }
-private:
-  std::shared_ptr<MPI_Op> OpPtr;
-};
-
-} // std::detail
-
-template <>
-struct plus<vtkm::Bounds>
-{
-  MPI_Op get_mpi_op() const { return this->Op; }
-  vtkm::Bounds operator()(const vtkm::Bounds& lhs, const vtkm::Bounds& rhs) const
-  {
-    return lhs + rhs;
-  }
-
-private:
-  std::detail::MPIPlus<vtkm::Bounds> Op;
-};
-
-template <>
-struct plus<vtkm::Range>
-{
-  MPI_Op get_mpi_op() const { return this->Op; }
-  vtkm::Range operator()(const vtkm::Range& lhs, const vtkm::Range& rhs) const { return lhs + rhs; }
-
-private:
-  std::detail::MPIPlus<vtkm::Range> Op;
-};
 }

-namespace diy
-{
-namespace mpi
-{
-namespace detail
-{
-template <>
-struct mpi_datatype<vtkm::Bounds>
-{
-  static MPI_Datatype datatype() { return get_mpi_datatype<vtkm::Float64>(); }
-  static const void* address(const vtkm::Bounds& x) { return &x; }
-  static void* address(vtkm::Bounds& x) { return &x; }
-  static int count(const vtkm::Bounds&) { return 6; }
-};
-
-template <>
-struct mpi_op<std::plus<vtkm::Bounds>>
-{
-  static MPI_Op get(const std::plus<vtkm::Bounds>& op) { return op.get_mpi_op(); }
-};
-
-template <>
-struct mpi_datatype<vtkm::Range>
-{
-  static MPI_Datatype datatype() { return get_mpi_datatype<vtkm::Float64>(); }
-  static const void* address(const vtkm::Range& x) { return &x; }
-  static void* address(vtkm::Range& x) { return &x; }
-  static int count(const vtkm::Range&) { return 2; }
-};
-
-template <>
-struct mpi_op<std::plus<vtkm::Range>>
-{
-  static MPI_Op get(const std::plus<vtkm::Range>& op) { return op.get_mpi_op(); }
-};
-
-} // diy::mpi::detail
-} // diy::mpi
-} // diy
-
-
 #endif

 namespace vtkm
@ -311,26 +227,56 @@ VTKM_CONT vtkm::Bounds MultiBlock::GetBounds(vtkm::Id coordinate_system_index,

 #if defined(VTKM_ENABLE_MPI)
  auto world = vtkm::cont::EnvironmentTracker::GetCommunicator();
-  //const auto global_num_blocks = this->GetGlobalNumberOfBlocks();
+  diy::Master master(world,
+                     1,
+                     -1,
+                     []() -> void* { return new vtkm::Bounds(); },
+                     [](void* ptr) { delete static_cast<vtkm::Bounds*>(ptr); });

-  const auto num_blocks = this->GetNumberOfBlocks();
+  vtkm::cont::AssignerMultiBlock assigner(*this);

-  diy::Master master(world, 1, -1);
-  for (vtkm::Id cc = 0; cc < num_blocks; ++cc)
-  {
-    int gid = cc * world.size() + world.rank();
-    master.add(gid, const_cast<vtkm::cont::DataSet*>(&this->Blocks[cc]), new diy::Link());
-  }
+  // populate master with blocks from `this`.
+  diy::decompose(world.rank(), assigner, master);

-  master.foreach ([&](const vtkm::cont::DataSet* block, const diy::Master::ProxyWithLink& cp) {
-    auto coords = block->GetCoordinateSystem(coordinate_system_index);
-    const vtkm::Bounds bounds = coords.GetBounds(TypeList(), StorageList());
-    cp.all_reduce(bounds, std::plus<vtkm::Bounds>());
+  auto self = (*this);
+  master.foreach ([&](vtkm::Bounds* data, const diy::Master::ProxyWithLink& cp) {
+    const vtkm::cont::DataSet& block = vtkm::cont::detail::GetBlock(self, cp);
+    try
+    {
+      vtkm::cont::CoordinateSystem coords = block.GetCoordinateSystem(coordinate_system_index);
+      *data = coords.GetBounds(TypeList(), StorageList());
+    }
+    catch (const vtkm::cont::Error&)
+    {
+    }
  });

-  master.process_collectives();
-  auto bounds = master.proxy(0).get<vtkm::Bounds>();
-  return bounds;
+  vtkm::cont::DecomposerMultiBlock decomposer(assigner);
+  diy::RegularSwapPartners partners(decomposer, /*k=*/2);
+
+  auto callback =
+    [](vtkm::Bounds* data, const diy::ReduceProxy& srp, const diy::RegularSwapPartners&) {
+      // 1. dequeue.
+      std::vector<int> incoming;
+      srp.incoming(incoming);
+      vtkm::Bounds message;
+      for (const int gid : incoming)
+      {
+        srp.dequeue(gid, message);
+        data->Include(message);
+      }
+      // 2. enqueue
+      for (int cc = 0; cc < srp.out_link().size(); ++cc)
+      {
+        srp.enqueue(srp.out_link().target(cc), *data);
+      }
+    };
+  diy::reduce(master, assigner, partners, callback);
+  if (master.size())
+  {
+    return (*master.block<vtkm::Bounds>(0));
+  }
+  return vtkm::Bounds();

 #else
  const vtkm::Id index = coordinate_system_index;
@ -443,65 +389,62 @@ VTKM_CONT vtkm::cont::ArrayHandle<vtkm::Range>
 MultiBlock::GetGlobalRange(const std::string& field_name, TypeList, StorageList) const
 {
 #if defined(VTKM_ENABLE_MPI)
-  auto world = vtkm::cont::EnvironmentTracker::GetCommunicator();
-  const auto num_blocks = this->GetNumberOfBlocks();
+  using BlockMetaData = std::vector<vtkm::Range>;

-  diy::Master master(world);
-  for (vtkm::Id cc = 0; cc < num_blocks; ++cc)
-  {
-    int gid = cc * world.size() + world.rank();
-    master.add(gid, const_cast<vtkm::cont::DataSet*>(&this->Blocks[cc]), new diy::Link());
-  }
+  auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
+  diy::Master master(comm,
+                     1,
+                     -1,
+                     []() -> void* { return new BlockMetaData(); },
+                     [](void* ptr) { delete static_cast<BlockMetaData*>(ptr); });

-  // collect info about number of components in the field.
-  master.foreach ([&](const vtkm::cont::DataSet* dataset, const diy::Master::ProxyWithLink& cp) {
-    if (dataset->HasField(field_name))
+  vtkm::cont::AssignerMultiBlock assigner(*this);
+
+  diy::decompose(comm.rank(), assigner, master);
+
+  auto self = (*this);
+  master.foreach ([&](BlockMetaData* data, const diy::Master::ProxyWithLink& cp) {
+    const vtkm::cont::DataSet& block = vtkm::cont::detail::GetBlock(self, cp);
+    if (block.HasField(field_name))
    {
-      auto field = dataset->GetField(field_name);
+      auto field = block.GetField(field_name);
      const vtkm::cont::ArrayHandle<vtkm::Range> range = field.GetRange(TypeList(), StorageList());
-      vtkm::Id components = range.GetPortalConstControl().GetNumberOfValues();
-      cp.all_reduce(components, diy::mpi::maximum<vtkm::Id>());
+      *data = vtkm::cont::detail::CopyArrayPortalToVector(range.GetPortalConstControl());
    }
  });
-  master.process_collectives();

-  const vtkm::Id components = master.size() ? master.proxy(0).read<vtkm::Id>() : 0;
+  vtkm::cont::DecomposerMultiBlock decomposer(assigner);
+  diy::RegularSwapPartners partners(decomposer, /*k=*/2);
+  auto callback =
+    [](BlockMetaData* data, const diy::ReduceProxy& srp, const diy::RegularSwapPartners&) {
+      std::vector<int> incoming;
+      srp.incoming(incoming);

-  // clear all collectives.
-  master.foreach ([&](const vtkm::cont::DataSet*, const diy::Master::ProxyWithLink& cp) {
-    cp.collectives()->clear();
-  });
-
-  master.foreach ([&](const vtkm::cont::DataSet* dataset, const diy::Master::ProxyWithLink& cp) {
-    if (dataset->HasField(field_name))
-    {
-      auto field = dataset->GetField(field_name);
-      const vtkm::cont::ArrayHandle<vtkm::Range> range = field.GetRange(TypeList(), StorageList());
-      const auto v_range =
-        vtkm::cont::detail::CopyArrayPortalToVector(range.GetPortalConstControl());
-      for (const vtkm::Range& r : v_range)
+      // 1. dequeue
+      BlockMetaData message;
+      for (const int gid : incoming)
      {
-        cp.all_reduce(r, std::plus<vtkm::Range>());
+        srp.dequeue(gid, message);
+        data->resize(std::max(data->size(), message.size()));
+        for (size_t cc = 0; cc < data->size(); ++cc)
+        {
+          (*data)[cc].Include(message[cc]);
+        }
      }
-      // if current block has less that the max number of components, just add invalid ranges for the rest.
-      for (vtkm::Id cc = static_cast<vtkm::Id>(v_range.size()); cc < components; ++cc)
+      // 2. enqueue
+      for (int cc = 0; cc < srp.out_link().size(); ++cc)
      {
-        cp.all_reduce(vtkm::Range(), std::plus<vtkm::Range>());
+        srp.enqueue(srp.out_link().target(cc), *data);
      }
-    }
-  });
-  master.process_collectives();
-  std::vector<vtkm::Range> ranges(components);
-  // FIXME: is master.size() == 0 i.e. there are no blocks on the current rank,
-  // this method won't return valid range.
-  if (master.size() > 0)
+    };
+
+  diy::reduce(master, assigner, partners, callback);
+
+  BlockMetaData ranges;
+  if (master.size())
  {
-    for (vtkm::Id cc = 0; cc < components; ++cc)
-    {
-      ranges[cc] = master.proxy(0).get<vtkm::Range>();
-    }
+    ranges = *(master.block<BlockMetaData>(0));
  }
-
  vtkm::cont::ArrayHandle<vtkm::Range> tmprange = vtkm::cont::make_ArrayHandle(ranges);
  vtkm::cont::ArrayHandle<vtkm::Range> range;
  vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandle(ranges), range);