Use diy::reduce in MultiBlock reductions.

MultiBlock now uses `diy::reduce` for reductions rather than using proxy
collectives. To support using `diy::reduce` operations on a
vtkm::cont::MultiBlock, added AssignerMultiBlock and
DecomposerMultiBlock classes. This are helper classes that provide DIY
concepts on top of a existing MultiBlock.
This commit is contained in:
Utkarsh Ayachit 2017-12-20 20:18:22 -05:00
parent e349dd0d1c
commit cac71555e2
5 changed files with 311 additions and 160 deletions

@ -0,0 +1,78 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2015 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2015 UT-Battelle, LLC.
// Copyright 2015 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/AssignerMultiBlock.h>
#if defined(VTKM_ENABLE_MPI)
#include <diy/mpi.hpp>
#include <vtkm/cont/EnvironmentTracker.h>
#include <algorithm> // std::lower_bound
#include <numeric> // std::iota
namespace vtkm
{
namespace cont
{
VTKM_CONT
AssignerMultiBlock::AssignerMultiBlock(const vtkm::cont::MultiBlock& mb)
: diy::Assigner(vtkm::cont::EnvironmentTracker::GetCommunicator().size(), 1)
, IScanBlockCounts()
{
auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
const auto nblocks = mb.GetNumberOfBlocks();
vtkm::Id iscan;
diy::mpi::scan(comm, nblocks, iscan, std::plus<vtkm::Id>());
diy::mpi::all_gather(comm, iscan, this->IScanBlockCounts);
this->set_nblocks(static_cast<int>(this->IScanBlockCounts.back()));
}
VTKM_CONT
void AssignerMultiBlock::local_gids(int rank, std::vector<int>& gids) const
{
if (rank == 0)
{
assert(this->IScanBlockCounts.size() > 0);
gids.resize(this->IScanBlockCounts[rank]);
std::iota(gids.begin(), gids.end(), 0);
}
else if (rank > 0 && rank < static_cast<int>(this->IScanBlockCounts.size()))
{
gids.resize(this->IScanBlockCounts[rank] - this->IScanBlockCounts[rank - 1]);
std::iota(gids.begin(), gids.end(), this->IScanBlockCounts[rank - 1]);
}
}
VTKM_CONT
int AssignerMultiBlock::rank(int gid) const
{
return static_cast<int>(
std::lower_bound(this->IScanBlockCounts.begin(), this->IScanBlockCounts.end(), gid + 1) -
this->IScanBlockCounts.begin());
}
} // vtkm::cont
} // vtkm
#endif // defined(VTKM_ENABLE_MPI)

@ -0,0 +1,70 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2015 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2015 UT-Battelle, LLC.
// Copyright 2015 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_AssignerMultiBlock_h
#define vtk_m_cont_AssignerMultiBlock_h
#include <vtkm/internal/Configure.h>
#if defined(VTKM_ENABLE_MPI)
#include <diy/assigner.hpp>
#include <vtkm/cont/MultiBlock.h>
namespace vtkm
{
namespace cont
{
/// \brief Assigner for `MultiBlock` blocks.
///
/// `AssignerMultiBlock` is a `diy::Assigner` implementation that uses
/// `MultiBlock`'s block distribution to build global-id/rank associations
/// needed for several `diy` operations.
/// It uses a contiguous assignment strategy to map blocks to global ids i.e.
/// blocks on rank 0 come first, then rank 1, etc. Any rank may have 0 blocks.
///
/// AssignerMultiBlock uses collectives in the constructor hence it is
/// essential it gets created on all ranks irrespective of whether the rank has
/// any blocks.
///
class VTKM_CONT_EXPORT AssignerMultiBlock : public diy::Assigner
{
public:
/// Initialize the assigner using a multiblock dataset.
/// This may initialize collective operations to populate the assigner with
/// information about blocks on all ranks.
VTKM_CONT
AssignerMultiBlock(const vtkm::cont::MultiBlock& mb);
///@{
/// diy::Assigner API implementation.
VTKM_CONT
void local_gids(int rank, std::vector<int>& gids) const override;
VTKM_CONT
int rank(int gid) const override;
//@}
private:
std::vector<vtkm::Id> IScanBlockCounts;
};
}
}
#endif // defined(VTKM_ENABLE_MPI)
#endif

@ -45,6 +45,7 @@ set(headers
ArrayHandleConcatenate.h
ArrayRangeCompute.h
ArrayRangeCompute.hxx
AssignerMultiBlock.h
CellLocatorTwoLevelUniformGrid.h
CellSet.h
CellSetExplicit.h
@ -58,6 +59,7 @@ set(headers
DataSetBuilderRectilinear.h
DataSetBuilderUniform.h
DataSetFieldAdd.h
DecomposerMultiBlock.h
DeviceAdapter.h
DeviceAdapterAlgorithm.h
DeviceAdapterListTag.h
@ -94,6 +96,7 @@ set(header_impls
set(sources
ArrayHandle.cxx
AssignerMultiBlock.cxx
CellSet.cxx
CellSetExplicit.cxx
CellSetStructured.cxx

@ -0,0 +1,57 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2015 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2015 UT-Battelle, LLC.
// Copyright 2015 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_DecomposerMultiBlock_h
#define vtk_m_cont_DecomposerMultiBlock_h
#include <vtkm/internal/Configure.h>
#if defined(VTKM_ENABLE_MPI)
#include <vtkm/cont/AssignerMultiBlock.h>
namespace vtkm
{
namespace cont
{
/// \brief DIY Decomposer that uses `MultiBlock` existing decomposition.
///
/// To create partners for various reduce operations, DIY requires a decomposer.
/// This class provides an implementation that can use the multiblock's
/// decomposition.
///
class VTKM_CONT_EXPORT DecomposerMultiBlock
{
public:
VTKM_CONT DecomposerMultiBlock(const diy::Assigner& assigner)
: divisions{ assigner.nblocks() }
{
}
using DivisionVector = std::vector<int>;
/// this public member is needed to satisfy decomposer concept for
/// partners in DIY.
DivisionVector divisions;
};
}
}
#endif // defined(VTKM_ENABLE_MPI)
#endif

@ -21,7 +21,9 @@
#include <vtkm/StaticAssert.h>
#include <vtkm/cont/ArrayCopy.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/AssignerMultiBlock.h>
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/DecomposerMultiBlock.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/DynamicArrayHandle.h>
#include <vtkm/cont/EnvironmentTracker.h>
@ -30,7 +32,11 @@
#include <vtkm/cont/MultiBlock.h>
#if defined(VTKM_ENABLE_MPI)
#include <diy/decomposition.hpp>
#include <diy/master.hpp>
#include <diy/partners/all-reduce.hpp>
#include <diy/partners/swap.hpp>
#include <diy/reduce.hpp>
namespace vtkm
{
@ -48,111 +54,21 @@ VTKM_CONT std::vector<typename PortalType::ValueType> CopyArrayPortalToVector(
std::copy(iterators.GetBegin(), iterators.GetEnd(), result.begin());
return result;
}
template <typename T>
const vtkm::cont::DataSet& GetBlock(const vtkm::cont::MultiBlock& mb, const T&);
template <>
const vtkm::cont::DataSet& GetBlock(const vtkm::cont::MultiBlock& mb,
const diy::Master::ProxyWithLink& cp)
{
const int lid = cp.master()->lid(cp.gid());
return mb.GetBlock(lid);
}
}
}
namespace std
{
namespace detail
{
template <typename T, size_t ElementSize = sizeof(T)>
struct MPIPlus
{
MPIPlus()
{
this->OpPtr = std::shared_ptr<MPI_Op>(new MPI_Op(MPI_NO_OP), [](MPI_Op* ptr) {
MPI_Op_free(ptr);
delete ptr;
});
MPI_Op_create(
[](void* a, void* b, int* len, MPI_Datatype*) {
T* ba = reinterpret_cast<T*>(a);
T* bb = reinterpret_cast<T*>(b);
for (int cc = 0; cc < (*len) / ElementSize; ++cc)
{
bb[cc] = ba[cc] + bb[cc];
}
},
1,
this->OpPtr.get());
}
~MPIPlus() {}
operator MPI_Op() const { return *this->OpPtr.get(); }
private:
std::shared_ptr<MPI_Op> OpPtr;
};
} // std::detail
template <>
struct plus<vtkm::Bounds>
{
MPI_Op get_mpi_op() const { return this->Op; }
vtkm::Bounds operator()(const vtkm::Bounds& lhs, const vtkm::Bounds& rhs) const
{
return lhs + rhs;
}
private:
std::detail::MPIPlus<vtkm::Bounds> Op;
};
template <>
struct plus<vtkm::Range>
{
MPI_Op get_mpi_op() const { return this->Op; }
vtkm::Range operator()(const vtkm::Range& lhs, const vtkm::Range& rhs) const { return lhs + rhs; }
private:
std::detail::MPIPlus<vtkm::Range> Op;
};
}
namespace diy
{
namespace mpi
{
namespace detail
{
template <>
struct mpi_datatype<vtkm::Bounds>
{
static MPI_Datatype datatype() { return get_mpi_datatype<vtkm::Float64>(); }
static const void* address(const vtkm::Bounds& x) { return &x; }
static void* address(vtkm::Bounds& x) { return &x; }
static int count(const vtkm::Bounds&) { return 6; }
};
template <>
struct mpi_op<std::plus<vtkm::Bounds>>
{
static MPI_Op get(const std::plus<vtkm::Bounds>& op) { return op.get_mpi_op(); }
};
template <>
struct mpi_datatype<vtkm::Range>
{
static MPI_Datatype datatype() { return get_mpi_datatype<vtkm::Float64>(); }
static const void* address(const vtkm::Range& x) { return &x; }
static void* address(vtkm::Range& x) { return &x; }
static int count(const vtkm::Range&) { return 2; }
};
template <>
struct mpi_op<std::plus<vtkm::Range>>
{
static MPI_Op get(const std::plus<vtkm::Range>& op) { return op.get_mpi_op(); }
};
} // diy::mpi::detail
} // diy::mpi
} // diy
#endif
namespace vtkm
@ -311,26 +227,56 @@ VTKM_CONT vtkm::Bounds MultiBlock::GetBounds(vtkm::Id coordinate_system_index,
#if defined(VTKM_ENABLE_MPI)
auto world = vtkm::cont::EnvironmentTracker::GetCommunicator();
//const auto global_num_blocks = this->GetGlobalNumberOfBlocks();
diy::Master master(world,
1,
-1,
[]() -> void* { return new vtkm::Bounds(); },
[](void* ptr) { delete static_cast<vtkm::Bounds*>(ptr); });
const auto num_blocks = this->GetNumberOfBlocks();
vtkm::cont::AssignerMultiBlock assigner(*this);
diy::Master master(world, 1, -1);
for (vtkm::Id cc = 0; cc < num_blocks; ++cc)
{
int gid = cc * world.size() + world.rank();
master.add(gid, const_cast<vtkm::cont::DataSet*>(&this->Blocks[cc]), new diy::Link());
}
// populate master with blocks from `this`.
diy::decompose(world.rank(), assigner, master);
master.foreach ([&](const vtkm::cont::DataSet* block, const diy::Master::ProxyWithLink& cp) {
auto coords = block->GetCoordinateSystem(coordinate_system_index);
const vtkm::Bounds bounds = coords.GetBounds(TypeList(), StorageList());
cp.all_reduce(bounds, std::plus<vtkm::Bounds>());
auto self = (*this);
master.foreach ([&](vtkm::Bounds* data, const diy::Master::ProxyWithLink& cp) {
const vtkm::cont::DataSet& block = vtkm::cont::detail::GetBlock(self, cp);
try
{
vtkm::cont::CoordinateSystem coords = block.GetCoordinateSystem(coordinate_system_index);
*data = coords.GetBounds(TypeList(), StorageList());
}
catch (const vtkm::cont::Error&)
{
}
});
master.process_collectives();
auto bounds = master.proxy(0).get<vtkm::Bounds>();
return bounds;
vtkm::cont::DecomposerMultiBlock decomposer(assigner);
diy::RegularSwapPartners partners(decomposer, /*k=*/2);
auto callback =
[](vtkm::Bounds* data, const diy::ReduceProxy& srp, const diy::RegularSwapPartners&) {
// 1. dequeue.
std::vector<int> incoming;
srp.incoming(incoming);
vtkm::Bounds message;
for (const int gid : incoming)
{
srp.dequeue(gid, message);
data->Include(message);
}
// 2. enqueue
for (int cc = 0; cc < srp.out_link().size(); ++cc)
{
srp.enqueue(srp.out_link().target(cc), *data);
}
};
diy::reduce(master, assigner, partners, callback);
if (master.size())
{
return (*master.block<vtkm::Bounds>(0));
}
return vtkm::Bounds();
#else
const vtkm::Id index = coordinate_system_index;
@ -443,65 +389,62 @@ VTKM_CONT vtkm::cont::ArrayHandle<vtkm::Range>
MultiBlock::GetGlobalRange(const std::string& field_name, TypeList, StorageList) const
{
#if defined(VTKM_ENABLE_MPI)
auto world = vtkm::cont::EnvironmentTracker::GetCommunicator();
const auto num_blocks = this->GetNumberOfBlocks();
using BlockMetaData = std::vector<vtkm::Range>;
diy::Master master(world);
for (vtkm::Id cc = 0; cc < num_blocks; ++cc)
{
int gid = cc * world.size() + world.rank();
master.add(gid, const_cast<vtkm::cont::DataSet*>(&this->Blocks[cc]), new diy::Link());
}
auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
diy::Master master(comm,
1,
-1,
[]() -> void* { return new BlockMetaData(); },
[](void* ptr) { delete static_cast<BlockMetaData*>(ptr); });
// collect info about number of components in the field.
master.foreach ([&](const vtkm::cont::DataSet* dataset, const diy::Master::ProxyWithLink& cp) {
if (dataset->HasField(field_name))
vtkm::cont::AssignerMultiBlock assigner(*this);
diy::decompose(comm.rank(), assigner, master);
auto self = (*this);
master.foreach ([&](BlockMetaData* data, const diy::Master::ProxyWithLink& cp) {
const vtkm::cont::DataSet& block = vtkm::cont::detail::GetBlock(self, cp);
if (block.HasField(field_name))
{
auto field = dataset->GetField(field_name);
auto field = block.GetField(field_name);
const vtkm::cont::ArrayHandle<vtkm::Range> range = field.GetRange(TypeList(), StorageList());
vtkm::Id components = range.GetPortalConstControl().GetNumberOfValues();
cp.all_reduce(components, diy::mpi::maximum<vtkm::Id>());
*data = vtkm::cont::detail::CopyArrayPortalToVector(range.GetPortalConstControl());
}
});
master.process_collectives();
const vtkm::Id components = master.size() ? master.proxy(0).read<vtkm::Id>() : 0;
vtkm::cont::DecomposerMultiBlock decomposer(assigner);
diy::RegularSwapPartners partners(decomposer, /*k=*/2);
auto callback =
[](BlockMetaData* data, const diy::ReduceProxy& srp, const diy::RegularSwapPartners&) {
std::vector<int> incoming;
srp.incoming(incoming);
// clear all collectives.
master.foreach ([&](const vtkm::cont::DataSet*, const diy::Master::ProxyWithLink& cp) {
cp.collectives()->clear();
});
master.foreach ([&](const vtkm::cont::DataSet* dataset, const diy::Master::ProxyWithLink& cp) {
if (dataset->HasField(field_name))
{
auto field = dataset->GetField(field_name);
const vtkm::cont::ArrayHandle<vtkm::Range> range = field.GetRange(TypeList(), StorageList());
const auto v_range =
vtkm::cont::detail::CopyArrayPortalToVector(range.GetPortalConstControl());
for (const vtkm::Range& r : v_range)
// 1. dequeue
BlockMetaData message;
for (const int gid : incoming)
{
cp.all_reduce(r, std::plus<vtkm::Range>());
srp.dequeue(gid, message);
data->resize(std::max(data->size(), message.size()));
for (size_t cc = 0; cc < data->size(); ++cc)
{
(*data)[cc].Include(message[cc]);
}
}
// if current block has less that the max number of components, just add invalid ranges for the rest.
for (vtkm::Id cc = static_cast<vtkm::Id>(v_range.size()); cc < components; ++cc)
// 2. enqueue
for (int cc = 0; cc < srp.out_link().size(); ++cc)
{
cp.all_reduce(vtkm::Range(), std::plus<vtkm::Range>());
srp.enqueue(srp.out_link().target(cc), *data);
}
}
});
master.process_collectives();
std::vector<vtkm::Range> ranges(components);
// FIXME: is master.size() == 0 i.e. there are no blocks on the current rank,
// this method won't return valid range.
if (master.size() > 0)
};
diy::reduce(master, assigner, partners, callback);
BlockMetaData ranges;
if (master.size())
{
for (vtkm::Id cc = 0; cc < components; ++cc)
{
ranges[cc] = master.proxy(0).get<vtkm::Range>();
}
ranges = *(master.block<BlockMetaData>(0));
}
vtkm::cont::ArrayHandle<vtkm::Range> tmprange = vtkm::cont::make_ArrayHandle(ranges);
vtkm::cont::ArrayHandle<vtkm::Range> range;
vtkm::cont::ArrayCopy(vtkm::cont::make_ArrayHandle(ranges), range);