Merge topic 'diy-mpi-nompi'

934f085e0 Build diy as a library
f0a37ac6a Merge branch 'upstream-diy' into diy-mpi-nompi
7687aabf8 diy 2020-06-05 (b62915aa)
6ca2b9f87 Point to new version of Diy

Acked-by: Kitware Robot <kwrobot@kitware.com>
Acked-by: Robert Maynard <robert.maynard@kitware.com>
Merge-request: !2123
This commit is contained in:
Sujin Philip 2020-06-09 01:25:43 +00:00 committed by Kitware Robot
commit b7d0c94f1b
118 changed files with 16201 additions and 6802 deletions

@ -133,6 +133,7 @@ stages:
# The artifacts of the build.
- build/bin/
- build/include/
- build/vtkm/thirdparty/diy/vtkmdiy/
- build/lib/
# CTest and CMake install files.

@ -18,6 +18,7 @@
- build/bin/
- build/include/
- build/lib/
- build/vtkm/thirdparty/diy/vtkmdiy/include
# CTest and CMake install files.
# XXX(globbing): Can be simplified with support from

@ -116,3 +116,7 @@ endif()
# This includes a host of functions used by VTK-m CMake.
include(VTKmWrappers)
include(VTKmRenderingContexts)
# Setup diy magic of chosing the appropriate mpi/no_mpi library to link against
include(VTKmDIYUtils)
vtkm_diy_init_target()

59
CMake/VTKmDIYUtils.cmake Normal file

@ -0,0 +1,59 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
macro(_vtkm_diy_target flag target)
set(${target} "vtkmdiympi")
if (NOT ${flag})
set(${target} "vtkmdiympi_nompi")
endif()
endmacro()
function(vtkm_diy_init_target)
set(vtkm_diy_default_flag "${VTKm_ENABLE_MPI}")
_vtkm_diy_target(vtkm_diy_default_flag vtkm_diy_default_target)
set_target_properties(vtkm_diy PROPERTIES
vtkm_diy_use_mpi_stack ${vtkm_diy_default_flag}
vtkm_diy_target ${vtkm_diy_default_target})
endfunction()
#-----------------------------------------------------------------------------
function(vtkm_diy_use_mpi_push)
set(topval ${VTKm_ENABLE_MPI})
if (NOT ARGC EQUAL 0)
set(topval ${ARGV0})
endif()
get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
list (APPEND stack ${topval})
_vtkm_diy_target(topval target)
set_target_properties(vtkm_diy PROPERTIES
vtkm_diy_use_mpi_stack "${stack}"
vtkm_diy_target "${target}")
endfunction()
function(vtkm_diy_use_mpi value)
get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
list (REMOVE_AT stack -1)
list (APPEND stack ${value})
_vtkm_diy_target(value target)
set_target_properties(vtkm_diy PROPERTIES
vtkm_diy_use_mpi_stack "${stack}"
vtkm_diy_target "${target}")
endfunction()
function(vtkm_diy_use_mpi_pop)
get_target_property(stack vtkm_diy vtkm_diy_use_mpi_stack)
list (GET stack -1 value)
list (REMOVE_AT stack -1)
_vtkm_diy_target(value target)
set_target_properties(vtkm_diy PROPERTIES
vtkm_diy_use_mpi_stack "${stack}"
vtkm_diy_target "${target}")
endfunction()

@ -111,6 +111,7 @@ function(do_verify root_dir prefix)
set(file_exceptions
cont/ColorTablePrivate.hxx
thirdparty/diy/vtkmdiy/cmake/mpi_types.h
)
#by default every header in a testing directory doesn't need to be installed

@ -10,6 +10,69 @@
include(VTKmWrappers)
function(vtkm_create_test_executable
prog_name
sources
libraries
defines
is_mpi_test
use_mpi
enable_all_backends
use_job_pool)
vtkm_diy_use_mpi_push()
set(prog ${prog_name})
# for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
if (is_mpi_test)
set(extraArgs EXTRA_INCLUDE "vtkm/thirdparty/diy/environment.h")
set(CMAKE_TESTDRIVER_BEFORE_TESTMAIN "vtkmdiy::mpi::environment env(ac, av);")
if (use_mpi)
vtkm_diy_use_mpi(ON)
set(prog "${prog}_mpi")
else()
vtkm_diy_use_mpi(OFF)
set(prog "${prog}_nompi")
endif()
else()
set(CMAKE_TESTDRIVER_BEFORE_TESTMAIN "")
endif()
#the creation of the test source list needs to occur before the labeling as
#cuda. This is so that we get the correctly named entry points generated
create_test_sourcelist(test_sources ${prog}.cxx ${sources} ${extraArgs})
add_executable(${prog} ${prog}.cxx ${sources})
vtkm_add_drop_unused_function_flags(${prog})
target_compile_definitions(${prog} PRIVATE ${defines})
#if all backends are enabled, we can use cuda compiler to handle all possible backends.
set(device_sources)
if(TARGET vtkm::cuda AND enable_all_backends)
set(device_sources ${sources})
endif()
vtkm_add_target_information(${prog} DEVICE_SOURCES ${device_sources})
if(NOT VTKm_USE_DEFAULT_SYMBOL_VISIBILITY)
set_property(TARGET ${prog} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
set_property(TARGET ${prog} PROPERTY CXX_VISIBILITY_PRESET "hidden")
endif()
set_property(TARGET ${prog} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
set_property(TARGET ${prog} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
set_property(TARGET ${prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
target_link_libraries(${prog} PRIVATE vtkm_cont ${libraries})
if(use_job_pool)
vtkm_setup_job_pool()
set_property(TARGET ${prog} PROPERTY JOB_POOL_COMPILE vtkm_pool)
endif()
vtkm_diy_use_mpi_pop()
endfunction()
#-----------------------------------------------------------------------------
# Declare unit tests, which should be in the same directory as a kit
# (package, module, whatever you call it). Usage:
@ -36,7 +99,9 @@ include(VTKmWrappers)
# test executable
#
# [MPI] : when specified, the tests should be run in parallel if
# MPI is enabled.
# MPI is enabled. The tests should also be able to build and run
# When MPI is not available, i.e., they should not make explicit
# use of MPI and instead completely rely on DIY.
# [ALL_BACKENDS] : when specified, the tests would test against all enabled
# backends. Otherwise we expect the tests to manage the
# backends at runtime.
@ -56,9 +121,6 @@ function(vtkm_unit_tests)
)
vtkm_parse_test_options(VTKm_UT_SOURCES "${options}" ${VTKm_UT_SOURCES})
set(test_prog)
set(per_device_command_line_arguments "NONE")
set(per_device_suffix "")
set(per_device_timeout 180)
@ -93,6 +155,7 @@ function(vtkm_unit_tests)
endif()
endif()
set(test_prog)
if(VTKm_UT_NAME)
set(test_prog "${VTKm_UT_NAME}")
else()
@ -110,43 +173,38 @@ function(vtkm_unit_tests)
list(APPEND VTKm_UT_TEST_ARGS "--baseline-dir=${VTKm_SOURCE_DIR}/data/baseline")
if(VTKm_UT_MPI)
# for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
set(test_prog "${test_prog}_mpi")
set(extraArgs EXTRA_INCLUDE "vtkm/cont/testing/Testing.h"
FUNCTION "vtkm::cont::testing::Environment env")
if (VTKm_ENABLE_MPI)
vtkm_create_test_executable(
${test_prog}
"${VTKm_UT_SOURCES}"
"${VTKm_UT_LIBRARIES}"
"${VTKm_UT_DEFINES}"
ON # is_mpi_test
ON # use_mpi
${enable_all_backends}
${VTKm_UT_USE_VTKM_JOB_POOL})
endif()
if ((NOT VTKm_ENABLE_MPI) OR VTKm_ENABLE_DIY_NOMPI)
vtkm_create_test_executable(
${test_prog}
"${VTKm_UT_SOURCES}"
"${VTKm_UT_LIBRARIES}"
"${VTKm_UT_DEFINES}"
ON # is_mpi_test
OFF # use_mpi
${enable_all_backends}
${VTKm_UT_USE_VTKM_JOB_POOL})
endif()
else()
set(extraArgs)
endif()
#the creation of the test source list needs to occur before the labeling as
#cuda. This is so that we get the correctly named entry points generated
create_test_sourcelist(test_sources ${test_prog}.cxx ${VTKm_UT_SOURCES} ${extraArgs})
add_executable(${test_prog} ${test_prog}.cxx ${VTKm_UT_SOURCES})
vtkm_add_drop_unused_function_flags(${test_prog})
target_compile_definitions(${test_prog} PRIVATE ${VTKm_UT_DEFINES})
#if all backends are enabled, we can use cuda compiler to handle all possible backends.
set(device_sources )
if(TARGET vtkm::cuda AND enable_all_backends)
set(device_sources ${VTKm_UT_SOURCES})
endif()
vtkm_add_target_information(${test_prog} DEVICE_SOURCES ${device_sources})
if(VTKm_HIDE_PRIVATE_SYMBOLS)
set_property(TARGET ${test_prog} PROPERTY CUDA_VISIBILITY_PRESET "hidden")
set_property(TARGET ${test_prog} PROPERTY CXX_VISIBILITY_PRESET "hidden")
endif()
set_property(TARGET ${test_prog} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
set_property(TARGET ${test_prog} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
set_property(TARGET ${test_prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
target_link_libraries(${test_prog} PRIVATE vtkm_cont ${VTKm_UT_LIBRARIES})
if(VTKm_UT_USE_VTKM_JOB_POOL)
vtkm_setup_job_pool()
set_property(TARGET ${test_prog} PROPERTY JOB_POOL_COMPILE vtkm_pool)
vtkm_create_test_executable(
${test_prog}
"${VTKm_UT_SOURCES}"
"${VTKm_UT_LIBRARIES}"
"${VTKm_UT_DEFINES}"
OFF # is_mpi_test
OFF # use_mpi
${enable_all_backends}
${VTKm_UT_USE_VTKM_JOB_POOL})
endif()
list(LENGTH per_device_command_line_arguments number_of_devices)
@ -170,25 +228,42 @@ function(vtkm_unit_tests)
foreach (test ${VTKm_UT_SOURCES})
get_filename_component(tname ${test} NAME_WE)
if(VTKm_UT_MPI AND VTKm_ENABLE_MPI)
add_test(NAME ${tname}${upper_backend}
COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS}
$<TARGET_FILE:${test_prog}> ${tname} ${device_command_line_argument}
${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS} ${MPIEXEC_POSTFLAGS}
)
else()
if(VTKm_UT_MPI)
if (VTKm_ENABLE_MPI)
add_test(NAME ${tname}${upper_backend}_mpi
COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} 3 ${MPIEXEC_PREFLAGS}
$<TARGET_FILE:${test_prog}_mpi> ${tname} ${device_command_line_argument}
${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS} ${MPIEXEC_POSTFLAGS}
)
set_tests_properties("${tname}${upper_backend}_mpi" PROPERTIES
LABELS "${upper_backend};${VTKm_UT_LABEL}"
TIMEOUT ${timeout}
RUN_SERIAL ${run_serial}
FAIL_REGULAR_EXPRESSION "runtime error")
endif() # VTKm_ENABLE_MPI
if ((NOT VTKm_ENABLE_MPI) OR VTKm_ENABLE_DIY_NOMPI)
add_test(NAME ${tname}${upper_backend}_nompi
COMMAND ${test_prog}_nompi ${tname} ${device_command_line_argument}
${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS}
)
set_tests_properties("${tname}${upper_backend}_nompi" PROPERTIES
LABELS "${upper_backend};${VTKm_UT_LABEL}"
TIMEOUT ${timeout}
RUN_SERIAL ${run_serial}
FAIL_REGULAR_EXPRESSION "runtime error")
endif() # VTKm_ENABLE_DIY_NOMPI
else() # VTKm_UT_MPI
add_test(NAME ${tname}${upper_backend}
COMMAND ${test_prog} ${tname} ${device_command_line_argument}
${vtkm_default_test_log_level} ${VTKm_UT_TEST_ARGS}
)
endif()
set_tests_properties("${tname}${upper_backend}" PROPERTIES
LABELS "${upper_backend};${VTKm_UT_LABEL}"
TIMEOUT ${timeout}
RUN_SERIAL ${run_serial}
FAIL_REGULAR_EXPRESSION "runtime error"
)
set_tests_properties("${tname}${upper_backend}" PROPERTIES
LABELS "${upper_backend};${VTKm_UT_LABEL}"
TIMEOUT ${timeout}
RUN_SERIAL ${run_serial}
FAIL_REGULAR_EXPRESSION "runtime error")
endif() # VTKm_UT_MPI
endforeach()
endforeach()

@ -277,6 +277,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
${VTKm_SOURCE_DIR}/CMake/VTKmCPUVectorization.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmDetectCUDAVersion.cu
${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmDIYUtils.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake

@ -18,7 +18,7 @@ if(VTKm_ENABLE_EXAMPLES)
add_subdirectory(contour_tree_augmented)
add_subdirectory(cosmotools)
add_subdirectory(demo)
add_subdirectory(game_of_life)
#add_subdirectory(game_of_life)
add_subdirectory(hello_worklet)
add_subdirectory(histogram)
add_subdirectory(lagrangian)

@ -78,7 +78,7 @@ endif()
####################################
if (VTKm_ENABLE_MPI)
add_executable(ContourTree_Augmented_MPI ContourTreeApp.cxx)
target_link_libraries(ContourTree_Augmented_MPI vtkm_filter vtkm_io)
target_link_libraries(ContourTree_Augmented_MPI vtkm_filter vtkm_io MPI::MPI_CXX)
vtkm_add_target_information(ContourTree_Augmented_MPI
MODIFY_CUDA_FLAGS
DEVICE_SOURCES ContourTreeApp.cxx)

@ -168,7 +168,7 @@ int main(int argc, char* argv[])
auto comm = MPI_COMM_WORLD;
// Tell VTK-m which communicator it should use.
vtkm::cont::EnvironmentTracker::SetCommunicator(vtkmdiy::mpi::communicator(comm));
vtkm::cont::EnvironmentTracker::SetCommunicator(vtkmdiy::mpi::communicator());
// get the rank and size
int rank, size;

@ -14,7 +14,7 @@ project(Histogram CXX)
find_package(VTKm REQUIRED QUIET)
if (VTKm_ENABLE_MPI)
add_executable(Histogram Histogram.cxx HistogramMPI.h HistogramMPI.hxx)
target_link_libraries(Histogram PRIVATE vtkm_filter)
target_link_libraries(Histogram PRIVATE vtkm_filter MPI::MPI_CXX)
vtkm_add_target_information(Histogram
DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
DEVICE_SOURCES Histogram.cxx)

@ -57,14 +57,16 @@ int main(int argc, char* argv[])
vtkm::cont::Initialize(argc, argv, opts);
// setup MPI environment.
MPI_Init(&argc, &argv);
vtkmdiy::mpi::environment env(argc, argv); // will finalize on destruction
vtkmdiy::mpi::communicator world; // the default is MPI_COMM_WORLD
// tell VTK-m the communicator to use.
vtkm::cont::EnvironmentTracker::SetCommunicator(vtkmdiy::mpi::communicator(MPI_COMM_WORLD));
vtkm::cont::EnvironmentTracker::SetCommunicator(world);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(vtkmdiy::mpi::mpi_cast(world.handle()), &rank);
MPI_Comm_size(vtkmdiy::mpi::mpi_cast(world.handle()), &size);
if (argc != 2)
{
@ -72,7 +74,6 @@ int main(int argc, char* argv[])
{
std::cout << "Usage: " << std::endl << "$ " << argv[0] << " <num-bins>" << std::endl;
}
MPI_Finalize();
return EXIT_FAILURE;
}
@ -105,11 +106,9 @@ int main(int argc, char* argv[])
if (count != numVals * size)
{
std::cout << "ERROR: bins mismatched!" << std::endl;
MPI_Finalize();
return EXIT_FAILURE;
}
}
MPI_Finalize();
return EXIT_SUCCESS;
}

@ -18,6 +18,9 @@
#include <vtkm/cont/FieldRangeGlobalCompute.h>
#include <vtkm/thirdparty/diy/diy.h>
#include <vtkm/thirdparty/diy/mpi-cast.h>
#include <mpi.h>
namespace example
{
@ -81,7 +84,7 @@ public:
sizeof(vtkm::Id) == 4 ? MPI_INT : MPI_LONG,
MPI_SUM,
0,
comm);
vtkmdiy::mpi::mpi_cast(comm.handle()));
if (comm.rank() == 0)
{

@ -30,7 +30,7 @@ int main(int argc, char* argv[])
auto config = vtkm::cont::Initialize(argc, argv, opts);
vtkmdiy::mpi::environment env(argc, argv);
auto comm = vtkmdiy::mpi::communicator(MPI_COMM_WORLD);
vtkmdiy::mpi::communicator comm;
vtkm::cont::EnvironmentTracker::SetCommunicator(comm);
if (argc != 3)

@ -27,7 +27,7 @@ namespace internal
static vtkmdiy::ContinuousBounds convert(const vtkm::Bounds& bds)
{
vtkmdiy::ContinuousBounds result;
vtkmdiy::ContinuousBounds result(3);
result.min[0] = static_cast<float>(bds.X.Min);
result.min[1] = static_cast<float>(bds.Y.Min);
result.min[2] = static_cast<float>(bds.Z.Min);
@ -136,7 +136,7 @@ public:
{
auto target = rp.out_link().target(cc);
// let's get the bounding box for the target block.
vtkmdiy::ContinuousBounds bds;
vtkmdiy::ContinuousBounds bds(3);
this->Decomposer.fill_bounds(bds, target.gid);
auto extractedDS = this->Extract(*block, bds);

@ -11,34 +11,36 @@
#include <vtkm/thirdparty/diy/diy.h>
#include <memory>
namespace vtkm
{
namespace cont
{
namespace internal
{
static vtkmdiy::mpi::communicator GlobalCommuncator(MPI_COMM_NULL);
static std::unique_ptr<vtkmdiy::mpi::communicator> GlobalCommuncator;
}
void EnvironmentTracker::SetCommunicator(const vtkmdiy::mpi::communicator& comm)
{
vtkm::cont::internal::GlobalCommuncator = comm;
if (!internal::GlobalCommuncator)
{
internal::GlobalCommuncator.reset(new vtkmdiy::mpi::communicator(comm));
}
else
{
*internal::GlobalCommuncator = comm;
}
}
const vtkmdiy::mpi::communicator& EnvironmentTracker::GetCommunicator()
{
#ifndef VTKM_DIY_NO_MPI
int flag;
MPI_Initialized(&flag);
if (!flag)
if (!internal::GlobalCommuncator)
{
int argc = 0;
char** argv = nullptr;
MPI_Init(&argc, &argv);
internal::GlobalCommuncator = vtkmdiy::mpi::communicator(MPI_COMM_WORLD);
internal::GlobalCommuncator.reset(new vtkmdiy::mpi::communicator());
}
#endif
return vtkm::cont::internal::GlobalCommuncator;
return *internal::GlobalCommuncator;
}
} // namespace vtkm::cont
} // namespace vtkm

@ -24,7 +24,7 @@
#include <vtkm/cont/DynamicCellSet.h>
#include <vtkm/cont/VariantArrayHandle.h>
#include <vtkm/thirdparty/diy/serialization.h>
#include <vtkm/thirdparty/diy/diy.h>
namespace opt = vtkm::cont::internal::option;
@ -265,31 +265,6 @@ private:
}
};
struct Environment
{
VTKM_CONT Environment(int* argc, char*** argv)
{
#if defined(VTKM_ENABLE_MPI)
int provided_threading;
MPI_Init_thread(argc, argv, MPI_THREAD_FUNNELED, &provided_threading);
// set the global communicator to use in VTKm.
vtkmdiy::mpi::communicator comm(MPI_COMM_WORLD);
vtkm::cont::EnvironmentTracker::SetCommunicator(comm);
#else
(void)argc;
(void)argv;
#endif
}
VTKM_CONT ~Environment()
{
#if defined(VTKM_ENABLE_MPI)
MPI_Finalize();
#endif
}
};
//============================================================================
class TestEqualResult
{

@ -202,8 +202,7 @@ public:
{
if (this->NumberOfDimensions() == 2)
{
// may need to change back when porting ot later verison of VTKM/vtkmdiy
vtkmdiy::DiscreteBounds domain; //(2);
vtkmdiy::DiscreteBounds domain(2);
domain.min[0] = domain.min[1] = 0;
domain.max[0] = static_cast<int>(this->GlobalSize[0]);
domain.max[1] = static_cast<int>(this->GlobalSize[1]);
@ -211,8 +210,7 @@ public:
}
else
{
// may need to change back when porting to later version of VTMK/vtkmdiy
vtkmdiy::DiscreteBounds domain; //(3);
vtkmdiy::DiscreteBounds domain(3);
domain.min[0] = domain.min[1] = domain.min[2] = 0;
domain.max[0] = static_cast<int>(this->GlobalSize[0]);
domain.max[1] = static_cast<int>(this->GlobalSize[1]);

@ -13,6 +13,8 @@
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/thirdparty/diy/environment.h>
//
// Make a simple 2D, 1000 point dataset populated with stat distributions
//
@ -328,5 +330,8 @@ void TestHistogram()
int UnitTestHistogramFilter(int argc, char* argv[])
{
// Setup MPI environment: This test is not intendent to be run in parallel
// but filter does make some DIY/MPI calls
vtkmdiy::mpi::environment env(argc, argv);
return vtkm::cont::testing::Testing::Run(TestHistogram, argc, argv);
}

@ -13,6 +13,8 @@
#include <vtkm/cont/PartitionedDataSet.h>
#include <vtkm/cont/testing/Testing.h>
#include <vtkm/thirdparty/diy/environment.h>
#include <algorithm>
#include <numeric>
#include <random>
@ -127,5 +129,8 @@ static void TestPartitionedDataSetHistogram()
int UnitTestPartitionedDataSetHistogramFilter(int argc, char* argv[])
{
// Setup MPI environment: This test is not intendent to be run in parallel
// but filter does make some DIY/MPI calls
vtkmdiy::mpi::environment env(argc, argv);
return vtkm::cont::testing::Testing::Run(TestPartitionedDataSetHistogram, argc, argv);
}

@ -7,36 +7,118 @@
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
add_library(vtkm_diy INTERFACE)
vtkm_get_kit_name(kit_name kit_dir)
# diy needs C++11
target_compile_features(vtkm_diy INTERFACE cxx_std_11)
include(CMakeDependentOption)
if (NOT DEFINED VTKm_ENABLE_DIY_NOMPI)
cmake_dependent_option(
VTKm_ENABLE_DIY_NOMPI "Also build DIY without mpi" OFF "VTKm_ENABLE_MPI" OFF)
endif()
if (VTKm_ENABLE_DIY_NOMPI AND
(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC") AND
BUILD_SHARED_LIBS)
message(WARNING "VTKm_ENABLE_DIY_NOMPI support with MSVC shared builds is experimental and may not work.")
endif()
## Configure diy
set(build_examples OFF)
set(build_tests OFF)
set(threads OFF)
set(log OFF)
set(profile OFF)
set(caliper OFF)
set(build_diy_mpi_lib ON)
set(mpi OFF)
set(build_diy_nompi_lib OFF)
if (VTKm_ENABLE_MPI)
set(mpi ON)
endif()
if (VTKm_ENABLE_DIY_NOMPI)
set(build_diy_nompi_lib ON)
endif()
mark_as_advanced(FORCE caliper log profile wrapped_mpi)
set(diy_prefix "vtkmdiy")
set(diy_install_include_dir ${VTKm_INSTALL_INCLUDE_DIR}/${kit_dir}/vtkmdiy/include)
set(diy_install_lib_dir ${VTKm_INSTALL_LIB_DIR})
set(diy_export_name ${VTKm_EXPORT_NAME})
if (VTKm_INSTALL_ONLY_LIBRARIES)
set(diy_install_only_libraries)
endif()
set(diy_dont_install_export)
# placeholder to support external DIY
set(VTKM_USE_EXTERNAL_DIY OFF)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Configure.h.in
${VTKm_BINARY_INCLUDE_DIR}/${kit_dir}/Configure.h)
function(vtkm_diy_set_target_output_directory target)
set_property(TARGET ${target} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
set_property(TARGET ${target} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
set_property(TARGET ${target} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
endfunction()
#-----------------------------------------------------------------------------
add_subdirectory(vtkmdiy)
# move diy libraries
if (TARGET vtkmdiympi)
vtkm_diy_set_target_output_directory(vtkmdiympi)
endif()
if (TARGET vtkmdiympi_nompi)
vtkm_diy_set_target_output_directory(vtkmdiympi_nompi)
endif()
include(VTKmDIYUtils)
add_library(vtkm_diy INTERFACE)
vtkm_diy_init_target()
target_include_directories(vtkm_diy INTERFACE
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
$<INSTALL_INTERFACE:${VTKm_INSTALL_INCLUDE_DIR}/vtkm/thirdparty/diy>)
target_link_libraries(vtkm_diy INTERFACE vtkmdiy)
if(VTKm_ENABLE_MPI)
target_link_libraries(vtkm_diy INTERFACE MPI::MPI_CXX)
# special logic for when both versions of the diy library are built
if (VTKm_ENABLE_DIY_NOMPI)
# only link vtkmdiympi/vtkmdiympi_nompi when building executable
set(is_exe "$<STREQUAL:$<TARGET_PROPERTY:TYPE>,EXECUTABLE>")
target_link_libraries(vtkm_diy INTERFACE
"$<LINK_ONLY:$<${is_exe}:$<TARGET_PROPERTY:vtkm_diy,vtkm_diy_target>>>")
# ignore undefined symbols
set(is_shared_lib "$<STREQUAL:$<TARGET_PROPERTY:TYPE>,SHARED_LIBRARY>")
if (APPLE)
target_link_libraries(vtkm_diy INTERFACE "$<${is_shared_lib}:-undefined dynamic_lookup>")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
target_link_libraries(vtkm_diy INTERFACE
"$<${is_shared_lib}:-INCREMENTAL:NO>"
"$<${is_shared_lib}:-FORCE:UNRESOLVED>")
endif()
else()
if (TARGET vtkmdiympi)
target_link_libraries(vtkm_diy INTERFACE vtkmdiympi)
else()
target_link_libraries(vtkm_diy INTERFACE vtkmdiympi_nompi)
endif()
endif()
install(TARGETS vtkm_diy
EXPORT ${VTKm_EXPORT_NAME})
#-----------------------------------------------------------------------------
install(TARGETS vtkm_diy EXPORT ${VTKm_EXPORT_NAME})
## Install headers
if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
install(DIRECTORY vtkmdiy
DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/${kit_dir}/)
## Install headers
if (NOT VTKm_INSTALL_ONLY_LIBRARIES)
install(FILES
${VTKm_BINARY_INCLUDE_DIR}/${kit_dir}/Configure.h
${CMAKE_CURRENT_SOURCE_DIR}/diy.h
${CMAKE_CURRENT_SOURCE_DIR}/environment.h
${CMAKE_CURRENT_SOURCE_DIR}/mpi-cast.h
${CMAKE_CURRENT_SOURCE_DIR}/post-include.h
${CMAKE_CURRENT_SOURCE_DIR}/pre-include.h
${CMAKE_CURRENT_SOURCE_DIR}/serialization.h
DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/${kit_dir}/)
endif()

@ -20,16 +20,6 @@
/* Use the diy library configured for VTM-m. */
#cmakedefine01 VTKM_USE_EXTERNAL_DIY
/* Whether to use MPI support in DIY */
#if !defined(VTKM_ENABLE_MPI)
# define VTKM_DIY_NO_MPI
#endif
/* initially, we disable DIY threads.
* once we've sorted out how DIY threads and vtkm work together
* we will make this configurable.*/
#define VTKM_DIY_NO_THREADS
/* Need to provide a way to for Serialziation
* specializations to be injected into the correct
* namespace. This solves the issue while allowing
@ -41,5 +31,4 @@
# define mangled_diy_namespace vtkmdiy
#endif
#endif

@ -10,21 +10,9 @@
#ifndef vtk_m_thirdparty_diy_diy_h
#define vtk_m_thirdparty_diy_diy_h
#include <vtkm/thirdparty/diy/Configure.h>
#if VTKM_USE_EXTERNAL_DIY
#define VTKM_DIY_INCLUDE(header) <diy/header>
#else
#define VTKM_DIY_INCLUDE(header) <vtkmdiy/include/vtkmdiy/header>
#define diy vtkmdiy // mangle namespace diy (see below comments)
#endif
#if defined(VTKM_CLANG) || defined(VTKM_GCC)
#pragma GCC visibility push(default)
#endif
#include "pre-include.h"
// clang-format off
VTKM_THIRDPARTY_PRE_INCLUDE
#include VTKM_DIY_INCLUDE(assigner.hpp)
#include VTKM_DIY_INCLUDE(decomposition.hpp)
#include VTKM_DIY_INCLUDE(master.hpp)
@ -36,29 +24,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
#include VTKM_DIY_INCLUDE(reduce-operations.hpp)
#include VTKM_DIY_INCLUDE(resolve.hpp)
#include VTKM_DIY_INCLUDE(serialization.hpp)
#undef VTKM_DIY_INCLUDE
VTKM_THIRDPARTY_POST_INCLUDE
// clang-format on
#include "post-include.h"
#if defined(VTKM_CLANG) || defined(VTKM_GCC)
#pragma GCC visibility pop
#endif
// When using an external DIY
// We need to alias the diy namespace to
// vtkmdiy so that VTK-m uses it properly
#if VTKM_USE_EXTERNAL_DIY
namespace vtkmdiy = ::diy;
#else
// The aliasing approach fails for when we
// want to us an internal version since
// the diy namespace already points to the
// external version. Instead we use macro
// replacement to make sure all diy classes
// are placed in vtkmdiy placed
#undef diy // mangle namespace diy
#endif
#endif
#endif // vtk_m_thirdparty_diy_diy_h

19
vtkm/thirdparty/diy/environment.h vendored Normal file

@ -0,0 +1,19 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#ifndef vtk_m_thirdparty_diy_environment_h
#define vtk_m_thirdparty_diy_environment_h
#include "pre-include.h"
// clang-format off
#include VTKM_DIY_INCLUDE(mpi/environment.hpp)
// clang-format on
#include "post-include.h"
#endif // vtk_m_thirdparty_diy_environment_h

19
vtkm/thirdparty/diy/mpi-cast.h vendored Normal file

@ -0,0 +1,19 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#ifndef vtk_m_thirdparty_diy_cast_h
#define vtk_m_thirdparty_diy_cast_h
#include "pre-include.h"
// clang-format off
#include VTKM_DIY_INCLUDE(mpi/mpi_cast.hpp)
// clang-format on
#include "post-include.h"
#endif // vtk_m_thirdparty_diy_cast_h

24
vtkm/thirdparty/diy/post-include.h vendored Normal file

@ -0,0 +1,24 @@
#undef VTKM_DIY_INCLUDE
VTKM_THIRDPARTY_POST_INCLUDE
// clang-format on
#if defined(VTKM_CLANG) || defined(VTKM_GCC)
#pragma GCC visibility pop
#endif
// When using an external DIY
// We need to alias the diy namespace to
// vtkmdiy so that VTK-m uses it properly
#if VTKM_USE_EXTERNAL_DIY
namespace vtkmdiy = ::diy;
#else
// The aliasing approach fails for when we
// want to use an internal version since
// the diy namespace already points to the
// external version. Instead we use macro
// replacement to make sure all diy classes
// are placed in vtkmdiy placed
#undef diy // mangle namespace diy
#endif

15
vtkm/thirdparty/diy/pre-include.h vendored Normal file

@ -0,0 +1,15 @@
#include <vtkm/thirdparty/diy/Configure.h>
#if VTKM_USE_EXTERNAL_DIY
#define VTKM_DIY_INCLUDE(header) <diy/header>
#else
#define VTKM_DIY_INCLUDE(header) <vtkmdiy/header>
#define diy vtkmdiy // mangle namespace diy
#endif
#if defined(VTKM_CLANG) || defined(VTKM_GCC)
#pragma GCC visibility push(default)
#endif
// clang-format off
VTKM_THIRDPARTY_PRE_INCLUDE

@ -10,45 +10,10 @@
#ifndef vtk_m_thirdparty_diy_serialization_h
#define vtk_m_thirdparty_diy_serialization_h
#include <vtkm/thirdparty/diy/Configure.h>
#if VTKM_USE_EXTERNAL_DIY
#define VTKM_DIY_INCLUDE(header) <diy/header>
#else
#define VTKM_DIY_INCLUDE(header) <vtkmdiy/include/vtkmdiy/header>
#define diy vtkmdiy // mangle namespace diy (see below comments)
#endif
#if defined(VTKM_CLANG) || defined(VTKM_GCC)
#pragma GCC visibility push(default)
#endif
#include "pre-include.h"
// clang-format off
VTKM_THIRDPARTY_PRE_INCLUDE
#include VTKM_DIY_INCLUDE(serialization.hpp)
#undef VTKM_DIY_INCLUDE
VTKM_THIRDPARTY_POST_INCLUDE
// clang-format on
#include "post-include.h"
#if defined(VTKM_CLANG) || defined(VTKM_GCC)
#pragma GCC visibility pop
#endif
// When using an external DIY
// We need to alias the diy namespace to
// vtkmdiy so that VTK-m uses it properly
#if VTKM_USE_EXTERNAL_DIY
namespace vtkmdiy = ::diy;
#else
// The aliasing approach fails for when we
// want to us an internal version since
// the diy namespace already points to the
// external version. Instead we use macro
// replacement to make sure all diy classes
// are placed in vtkmdiy placed
#undef diy // mangle namespace diy
#endif
#endif
#endif // vtk_m_thirdparty_diy_serialization_h

@ -8,9 +8,11 @@ readonly name="diy"
readonly ownership="Diy Upstream <kwrobot@kitware.com>"
readonly subtree="vtkm/thirdparty/$name/vtkm$name"
readonly repo="https://gitlab.kitware.com/third-party/diy2.git"
readonly tag="for/vtk-m"
readonly tag="for/vtk-m-20200608-master"
readonly paths="
cmake
include
CMakeLists.txt
LEGAL.txt
LICENSE.txt
README.md

@ -0,0 +1,257 @@
#=============================================================================
# Adds the following DIY library targets:
# 1. diy: The main diy interface library and the only target for
# header-only mode.
# 2. diympi: Generated when `build_diy_mpi_lib` and `mpi` are turned on.
# Isolates the MPI dependent part of diy into a library.
# 3. diympi_nompi: Generated when `build_diy_mpi_lib` is on and either `mpi`
# is off or `build_diy_nompi_lib` is on.
#
# Both mpi and non-mpi libraries can be generated by turning on `build_diy_mpi_lib`
# and `build_diy_nompi_lib`. In this case, one of these targets must be explicitly
# specified when linking againts diy.
#=============================================================================
project (DIY)
cmake_minimum_required (VERSION 3.9)
list (APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake)
include(CMakeDependentOption)
# Provides an option if it is not already defined.
# This can be replaced when CMake 3.13 is our cmake_minimum_required
macro (diy_option variable)
if (NOT DEFINED "${variable}")
option("${variable}" ${ARGN})
endif ()
endmacro ()
macro (diy_dependent_option variable)
if (NOT DEFINED "${variable}")
cmake_dependent_option("${variable}" ${ARGN})
endif ()
endmacro ()
diy_option (threads "Build DIY with threading" ON)
diy_option (log "Build DIY with logging" OFF)
diy_option (profile "Build DIY with profiling" OFF)
diy_option (caliper "Build DIY with caliper" OFF)
diy_option (mpi "Build DIY with mpi" ON)
diy_option (wrapped_mpi "MPI compiler wrapper requires no further MPI libraries" OFF)
diy_option (build_diy_mpi_lib "Build diy::mpi as a library" OFF)
diy_dependent_option (BUILD_SHARED_LIBS "Create shared libraries if on" ON "build_diy_mpi_lib" OFF)
diy_dependent_option (build_diy_nompi_lib "Also build the nompi version of diy::mpi" OFF "mpi;build_diy_mpi_lib" OFF)
diy_option (build_examples "Build DIY examples" ON)
diy_option (build_tests "Build DIY tests" ON)
# Default to Release
if (NOT CMAKE_BUILD_TYPE)
set (CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
set_property (CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif (NOT CMAKE_BUILD_TYPE)
set (diy_definitions "")
set (diy_include_directories "")
set (diy_include_thirdparty_directories "")
set (diy_libraries "")
# Debugging
if (${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR
${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
list (APPEND diy_definitions "-DDEBUG")
endif ()
# Logging
if (log)
list (APPEND diy_definitions "-DVTKMDIY_USE_SPDLOG")
find_path (SPDLOG_INCLUDE_DIR spdlog/spdlog.h)
list (APPEND diy_include_thirdparty_directories $<BUILD_INTERFACE:${SPDLOG_INCLUDE_DIR}>)
endif()
# Profiling
if (profile)
list (APPEND diy_definitions "-DVTKMDIY_PROFILE")
endif()
if (caliper)
list (APPEND diy_definitions "-DVTKMDIY_USE_CALIPER")
find_package (caliper)
list (APPEND diy_include_thirdparty_directories $<BUILD_INTERFACE:${caliper_INCLUDE_DIR}>)
list (APPEND diy_libraries caliper caliper-mpi)
endif()
# Threads
if (NOT threads)
list (APPEND diy_definitions "-DVTKMDIY_NO_THREADS")
else (NOT threads)
find_package (Threads)
list (APPEND diy_libraries ${CMAKE_THREAD_LIBS_INIT})
endif (NOT threads)
# MPI
if (mpi AND NOT wrapped_mpi)
find_package(MPI REQUIRED)
endif()
# configuration variables for diy build and install
# if diy is a sub-project, the following variables allow the parent project to
# easily customize the library
if (NOT DEFINED diy_prefix)
set(diy_prefix "diy")
endif()
if (NOT DEFINED diy_install_include_dir)
set(diy_install_include_dir "include")
endif()
if (NOT DEFINED diy_install_lib_dir)
set(diy_install_lib_dir "lib")
endif()
if (NOT DEFINED diy_export_name)
set(diy_export_name "diy_targets")
endif()
set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")
set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")
# for diy_developer_flags
include(DIYCompilerFlags)
function(add_diy_mpi_library use_mpi)
set (sources
"include/${diy_prefix}/mpi/collectives.cpp"
"include/${diy_prefix}/mpi/communicator.cpp"
"include/${diy_prefix}/mpi/datatypes.cpp"
"include/${diy_prefix}/mpi/environment.cpp"
"include/${diy_prefix}/mpi/io.cpp"
"include/${diy_prefix}/mpi/operations.cpp"
"include/${diy_prefix}/mpi/point-to-point.cpp"
"include/${diy_prefix}/mpi/request.cpp"
"include/${diy_prefix}/mpi/status.cpp"
"include/${diy_prefix}/mpi/window.cpp")
if (use_mpi)
set (lib_name ${diy_prefix}mpi)
set (has_mpi_val 1)
else()
set (lib_name ${diy_prefix}mpi_nompi)
set (has_mpi_val 0)
endif()
add_library(${lib_name} ${sources})
target_compile_features(${lib_name} PRIVATE cxx_std_11)
target_compile_definitions(${lib_name}
PRIVATE -DVTKMDIY_HAS_MPI=${has_mpi_val}
PRIVATE -Ddiy=${diy_prefix} # mangle diy namespace
PRIVATE ${diy_definitions})
target_include_directories(${lib_name} SYSTEM PRIVATE ${PROJECT_SOURCE_DIR}/include) # for types.hpp
target_include_directories(${lib_name} SYSTEM PRIVATE ${diy_include_directories}) # for mpitypes.hpp
target_include_directories(${lib_name} SYSTEM PRIVATE ${diy_include_thirdparty_directories})
target_link_libraries(${lib_name} PRIVATE diy_developer_flags)
if (use_mpi AND TARGET MPI::MPI_CXX)
target_link_libraries(${lib_name} PRIVATE MPI::MPI_CXX)
endif()
endfunction()
# create the targets
set (diy_targets)
if (build_diy_mpi_lib)
include(DIYConfigureMPI)
# To be interchangeable, these libraries should only have PRIVATE properties.
# Properties that should be public should also be part of the core diy target.
list(APPEND diy_definitions -DVTKMDIY_MPI_AS_LIB)
list(APPEND diy_include_directories
"$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include/${diy_prefix}/mpi>"
"$<INSTALL_INTERFACE:${diy_install_include_dir}/${diy_prefix}/mpi>")
# macro required for proper export macros for static vs shared builds
if (NOT BUILD_SHARED_LIBS)
list(APPEND diy_definitions -DVTKMDIY_MPI_STATIC_BUILD)
endif()
if (mpi)
add_diy_mpi_library(ON)
list(APPEND diy_targets ${diy_prefix}mpi)
endif()
if ((NOT mpi) OR build_diy_nompi_lib)
add_diy_mpi_library(OFF)
list(APPEND diy_targets ${diy_prefix}mpi_nompi)
endif()
endif() # build_diy_mpi_lib
add_library(${diy_prefix} INTERFACE)
target_compile_features(${diy_prefix} INTERFACE cxx_std_11)
target_compile_definitions(${diy_prefix} INTERFACE ${diy_definitions})
target_include_directories(${diy_prefix} SYSTEM INTERFACE
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${diy_install_include_dir}>")
target_include_directories(${diy_prefix} SYSTEM INTERFACE ${diy_include_thirdparty_directories})
if (diy_include_directories)
target_include_directories(${diy_prefix} SYSTEM INTERFACE ${diy_include_directories})
endif()
target_link_libraries(${diy_prefix} INTERFACE ${diy_libraries})
if (NOT build_diy_mpi_lib)
if (mpi)
target_compile_definitions(${diy_prefix} INTERFACE -DVTKMDIY_HAS_MPI=1)
if (TARGET MPI::MPI_CXX)
target_link_libraries(${diy_prefix} INTERFACE MPI::MPI_CXX)
endif()
else()
target_compile_definitions(${diy_prefix} INTERFACE -DVTKMDIY_HAS_MPI=0)
endif()
elseif (NOT build_diy_nompi_lib)
if (mpi)
target_link_libraries(${diy_prefix} INTERFACE ${diy_prefix}mpi)
else()
target_link_libraries(${diy_prefix} INTERFACE ${diy_prefix}mpi_nompi)
endif()
endif()
list(APPEND diy_targets ${diy_prefix} diy_developer_flags)
# libraries used by examples and tests
set(libraries ${diy_prefix})
if (${diy_prefix}mpi IN_LIST diy_targets)
list(APPEND libraries ${diy_prefix}mpi)
elseif (${diy_prefix}mpi_nompi IN_LIST diy_targets)
list(APPEND libraries ${diy_prefix}mpi_nompi)
endif()
list(APPEND libraries diy_developer_flags)
# enable testing and CDash dashboard submission
enable_testing ()
include (CTest)
if (build_examples)
add_subdirectory (examples)
endif (build_examples)
if (build_tests)
add_subdirectory (tests)
endif (build_tests)
# configure find_package script
include(CMakePackageConfigHelpers)
configure_package_config_file(
"${PROJECT_SOURCE_DIR}/cmake/diy-config.cmake.in"
"${PROJECT_BINARY_DIR}/diy-config.cmake"
INSTALL_DESTINATION ".")
# install targets
if (NOT DEFINED diy_install_only_libraries) # defined by parent project if building for binary distribution
install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/${diy_prefix} DESTINATION ${diy_install_include_dir})
if (build_diy_mpi_lib)
install(FILES ${PROJECT_BINARY_DIR}/include/${diy_prefix}/mpi/mpitypes.hpp DESTINATION ${diy_install_include_dir}/${diy_prefix}/mpi)
endif()
endif()
install(TARGETS ${diy_targets} EXPORT ${diy_export_name} DESTINATION ${diy_install_lib_dir})
export(EXPORT ${diy_export_name} NAMESPACE DIY:: FILE "${PROJECT_BINARY_DIR}/diy-targets.cmake")
if (CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) # Only generate these files when diy is the main project
install(EXPORT ${diy_export_name} NAMESPACE DIY:: DESTINATION "." FILE diy-targets.cmake)
install(FILES "${PROJECT_BINARY_DIR}/diy-config.cmake" DESTINATION ".")
endif()

@ -0,0 +1,76 @@
if(CMAKE_CXX_COMPILER_ID STREQUAL "MSVC" OR
CMAKE_CXX_SIMULATE_ID STREQUAL "MSVC")
set(DIY_COMPILER_IS_MSVC 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "PGI")
set(DIY_COMPILER_IS_PGI 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(DIY_COMPILER_IS_ICC 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
set(DIY_COMPILER_IS_CLANG 1)
set(DIY_COMPILER_IS_APPLECLANG 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(DIY_COMPILER_IS_CLANG 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(DIY_COMPILER_IS_GNU 1)
endif()
#-----------------------------------------------------------------------------
add_library(diy_developer_flags INTERFACE)
if(DIY_COMPILER_IS_MSVC)
target_compile_definitions(diy_developer_flags INTERFACE
"_SCL_SECURE_NO_WARNINGS" "_CRT_SECURE_NO_WARNINGS")
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.15)
set(cxx_flags "-W3")
endif()
#list(APPEND cxx_flags -wd4702 -wd4505)
if(MSVC_VERSION LESS 1900)
# In VS2013 the C4127 warning has a bug in the implementation and
# generates false positive warnings for lots of template code
#list(APPEND cxx_flags -wd4127)
endif()
target_compile_options(diy_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>)
elseif(DIY_COMPILER_IS_ICC)
# dissable some false positive warnings
set(cxx_flags -wd186 -wd3280)
list(APPEND cxx_flags -diag-disable=11074 -diag-disable=11076)
#list(APPEND cxx_flags -wd1478 -wd 13379)
target_compile_options(diy_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>)
elseif(DIY_COMPILER_IS_GNU OR DIY_COMPILER_IS_CLANG)
set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common)
#Only add float-conversion warnings for gcc as the integer warnigns in GCC
#include the implicit casting of all types smaller than int to ints.
if (DIY_COMPILER_IS_GNU AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.99)
list(APPEND cxx_flags -Wfloat-conversion)
elseif (DIY_COMPILER_IS_CLANG)
list(APPEND cxx_flags -Wconversion)
endif()
# TODO: remove after resolving these warnings
# temporarily disable the following warnings as we will need a well thought out plan for fixing these
list(APPEND cxx_flags -Wno-sign-conversion -Wno-sign-compare -Wno-cast-align)
#Add in the -Wodr warning for GCC versions 5.2+
if (DIY_COMPILER_IS_CLANG OR (DIY_COMPILER_IS_GNU AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1))
list(APPEND cxx_flags -Wodr)
endif()
#GCC 5, 6 don't properly handle strict-overflow suppression through pragma's.
#Instead of suppressing around the location of the strict-overflow you
#have to suppress around the entry point, or in vtk-m case the worklet
#invocation site. This is incredibly tedious and has been fixed in gcc 7
#
if(DIY_COMPILER_IS_GNU AND
(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 4.99) AND
(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.99) )
list(APPEND cxx_flags -Wno-strict-overflow)
endif()
target_compile_options(diy_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>)
endif()

@ -0,0 +1,51 @@
cmake_policy(PUSH)
if (POLICY CMP0075)
cmake_policy(SET CMP0075 NEW)
endif()
include (CheckTypeSize)
if (mpi)
find_package(MPI REQUIRED)
list(APPEND CMAKE_REQUIRED_INCLUDES ${MPI_CXX_INCLUDE_PATH})
list(APPEND CMAKE_EXTRA_INCLUDE_FILES "mpi.h")
list(APPEND CMAKE_REQUIRED_LIBRARIES ${MPI_CXX_LIBRARIES})
else()
list(APPEND CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/include)
list(APPEND CMAKE_EXTRA_INCLUDE_FILES "${diy_prefix}/mpi/no-mpi.hpp")
endif()
list(APPEND CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/cmake)
list(APPEND CMAKE_EXTRA_INCLUDE_FILES "mpi_types.h")
if (NOT (DEFINED CACHE{previous_mpi} AND ((previous_mpi AND mpi) OR (NOT(previous_mpi OR mpi)))))
unset(TYPESIZE_MPI_Comm CACHE)
unset(HAVE_TYPESIZE_MPI_Comm CACHE)
unset(TYPESIZE_MPI_Datatype CACHE)
unset(HAVE_TYPESIZE_MPI_Datatype CACHE)
unset(TYPESIZE_MPI_Status CACHE)
unset(HAVE_TYPESIZE_MPI_Status CACHE)
unset(TYPESIZE_MPI_Request CACHE)
unset(HAVE_TYPESIZE_MPI_Request CACHE)
unset(TYPESIZE_MPI_Op CACHE)
unset(HAVE_TYPESIZE_MPI_Op CACHE)
unset(TYPESIZE_MPI_File CACHE)
unset(HAVE_TYPESIZE_MPI_File CACHE)
unset(TYPESIZE_MPI_Win CACHE)
unset(HAVE_TYPESIZE_MPI_Win CACHE)
set (previous_mpi ${mpi} CACHE INTERNAL "" FORCE)
endif()
set(CMAKE_CXX_STANDARD 11)
check_type_size("Wrapped_MPI_Comm" TYPESIZE_MPI_Comm LANGUAGE CXX)
check_type_size("Wrapped_MPI_Datatype" TYPESIZE_MPI_Datatype LANGUAGE CXX)
check_type_size("Wrapped_MPI_Status" TYPESIZE_MPI_Status LANGUAGE CXX)
check_type_size("Wrapped_MPI_Request" TYPESIZE_MPI_Request LANGUAGE CXX)
check_type_size("Wrapped_MPI_Op" TYPESIZE_MPI_Op LANGUAGE CXX)
check_type_size("Wrapped_MPI_File" TYPESIZE_MPI_File LANGUAGE CXX)
check_type_size("Wrapped_MPI_Win" TYPESIZE_MPI_Win LANGUAGE CXX)
configure_file("include/${diy_prefix}/mpi/mpitypes.hpp.in" "include/${diy_prefix}/mpi/mpitypes.hpp" @ONLY)
cmake_policy(POP)

@ -0,0 +1,57 @@
if (CMAKE_VERSION VERSION_LESS "3.9")
message(FATAL_ERROR "Diy requires CMake 3.9+")
endif()
@PACKAGE_INIT@
set(threads "@threads@")
set(log "@log@")
set(caliper "@caliper@")
set(mpi "@mpi@")
include("${CMAKE_CURRENT_LIST_DIR}/diy-targets.cmake")
set(_diy_find_quietly)
if (${CMAKE_FIND_PACKAGE_NAME}_FIND_QUIETLY)
set(_diy_find_quietly QUIET)
endif()
if (threads)
find_package(Threads ${_diy_find_quietly})
if (NOT Threads_FOUND)
list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "Threads not found")
set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0)
endif()
endif()
if (log)
find_path(SPDLOG_INCLUDE_DIR "spdlog/spdlog.h")
if (SPDLOG_INCLUDE_DIR STREQUAL "SPDLOG_INCLUDE_DIR-NOTFOUND")
list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "SPDLOG not found")
set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0)
else()
target_include_directories(DIY::@diy_prefix@ INTERFACE $<INSTALL_INTERFACE:${SPDLOG_INCLUDE_DIR}>)
endif()
endif()
if (caliper)
find_package(caliper ${_diy_find_quietly})
if (NOT caliper_FOUND)
list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "Caliper not found")
set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0)
else()
target_include_directories(DIY::@diy_prefix@ INTERFACE $<INSTALL_INTERFACE:${caliper_INCLUDE_DIR}>)
endif()
endif()
if (mpi)
find_package(MPI ${_diy_find_quietly})
if (NOT MPI_FOUND)
list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "MPI not found")
set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0)
endif()
endif()
if (NOT DEFINED "${CMAKE_FIND_PACKAGE_NAME}_FOUND")
set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 1)
endif ()

@ -0,0 +1,9 @@
// Wrap the mpi types in structs as they can be macros in some implementations,
// causing `check_type_size` to fail.
struct Wrapped_MPI_Comm { MPI_Comm obj; };
struct Wrapped_MPI_Datatype { MPI_Datatype obj; };
struct Wrapped_MPI_Status { MPI_Status obj; };
struct Wrapped_MPI_Request { MPI_Request obj; };
struct Wrapped_MPI_Op { MPI_Op obj; };
struct Wrapped_MPI_File { MPI_File obj; };
struct Wrapped_MPI_Win { MPI_Win obj; };

@ -86,7 +86,7 @@ namespace diy
typedef diy::RegularContinuousLink RCLink;
for (size_t i = 0; i < master.size(); ++i)
for (int i = 0; i < static_cast<int>(master.size()); ++i)
{
RCLink* link = static_cast<RCLink*>(master.link(i));
*link = RCLink(dim, domain, domain);
@ -96,7 +96,7 @@ namespace diy
diy::BlockID self = { master.gid(i), master.communicator().rank() };
for (int j = 0; j < dim; ++j)
{
diy::Direction dir, wrap_dir;
diy::Direction dir(dim,0), wrap_dir(dim,0);
// left
dir[j] = -1; wrap_dir[j] = -1;
@ -122,7 +122,7 @@ namespace diy
// update master.expected to match the links
int expected = 0;
for (size_t i = 0; i < master.size(); ++i)
for (int i = 0; i < static_cast<int>(master.size()); ++i)
expected += master.link(i)->size_unique();
master.set_expected(expected);
}
@ -146,7 +146,7 @@ namespace diy
typedef diy::RegularContinuousLink RCLink;
for (size_t i = 0; i < master.size(); ++i)
for (int i = 0; i < static_cast<int>(master.size()); ++i)
{
RCLink* link = static_cast<RCLink*>(master.link(i));
*link = RCLink(dim, domain, domain);
@ -156,7 +156,7 @@ namespace diy
diy::BlockID self = { master.gid(i), master.communicator().rank() };
for (int j = 0; j < dim; ++j)
{
diy::Direction dir, wrap_dir;
diy::Direction dir(dim,0), wrap_dir(dim,0);
// left
dir[j] = -1; wrap_dir[j] = -1;
@ -182,7 +182,7 @@ namespace diy
// update master.expected to match the links
int expected = 0;
for (size_t i = 0; i < master.size(); ++i)
for (int i = 0; i < static_cast<int>(master.size()); ++i)
expected += master.link(i)->size_unique();
master.set_expected(expected);
}

@ -22,6 +22,7 @@ namespace diy
int nblocks__ //!< total (global) number of blocks
):
size_(size__), nblocks_(nblocks__) {}
virtual ~Assigner() {}
//! returns the total number of process ranks
int size() const { return size_; }
@ -36,6 +37,7 @@ namespace diy
virtual std::vector<int>
ranks(const std::vector<int>& gids) const;
private:
int size_; // total number of ranks
int nblocks_; // total number of blocks
@ -107,7 +109,7 @@ namespace diy
Assigner(size__, nblocks__),
comm_(comm),
div_(nblocks__ / size__ + ((nblocks__ % size__) == 0 ? 0 : 1)), // NB: same size window everywhere means the last rank may allocate extra space
rank_map_(comm_, div_) { rank_map_.lock_all(MPI_MODE_NOCHECK); }
rank_map_(comm_, div_) { rank_map_.lock_all(mpi::nocheck); }
~DynamicAssigner() { rank_map_.unlock_all(); }
inline
@ -187,7 +189,7 @@ set_nblocks(int nblocks__)
rank_map_.unlock_all();
rank_map_ = mpi::window<int>(comm_, div_);
rank_map_.lock_all(MPI_MODE_NOCHECK);
rank_map_.lock_all(mpi::nocheck);
}
std::tuple<bool,int>

@ -1,26 +1,39 @@
#ifndef VTKMDIY_CONSTANTS_H
#define VTKMDIY_CONSTANTS_H
// Default DIY_MAX_DIM to 4, unless provided by the user
// Default VTKMDIY_MAX_DIM to 4, unless provided by the user
// (used for static min/max size in various Bounds)
#ifndef DIY_MAX_DIM
#define DIY_MAX_DIM 4
#ifndef VTKMDIY_MAX_DIM
#define VTKMDIY_MAX_DIM 4
#endif
enum
{
DIY_X0 = 0x01, /* minimum-side x (left) neighbor */
DIY_X1 = 0x02, /* maximum-side x (right) neighbor */
DIY_Y0 = 0x04, /* minimum-side y (bottom) neighbor */
DIY_Y1 = 0x08, /* maximum-side y (top) neighbor */
DIY_Z0 = 0x10, /* minimum-side z (back) neighbor */
DIY_Z1 = 0x20, /* maximum-side z (front)neighbor */
DIY_T0 = 0x40, /* minimum-side t (earlier) neighbor */
DIY_T1 = 0x80 /* maximum-side t (later) neighbor */
VTKMDIY_X0 = 0x01, /* minimum-side x (left) neighbor */
VTKMDIY_X1 = 0x02, /* maximum-side x (right) neighbor */
VTKMDIY_Y0 = 0x04, /* minimum-side y (bottom) neighbor */
VTKMDIY_Y1 = 0x08, /* maximum-side y (top) neighbor */
VTKMDIY_Z0 = 0x10, /* minimum-side z (back) neighbor */
VTKMDIY_Z1 = 0x20, /* maximum-side z (front)neighbor */
VTKMDIY_T0 = 0x40, /* minimum-side t (earlier) neighbor */
VTKMDIY_T1 = 0x80 /* maximum-side t (later) neighbor */
};
#ifndef DIY_UNUSED
#define DIY_UNUSED(expr) do { (void)(expr); } while (0)
#define VTKMDIY_UNUSED(expr) do { (void)(expr); } while (0)
// From https://stackoverflow.com/a/21265197/44738
#if defined(__cplusplus) && (__cplusplus >= 201402L)
# define DEPRECATED(msg) [[deprecated(#msg)]]
#else
# if defined(__GNUC__) || defined(__clang__)
# define DEPRECATED(msg) __attribute__((deprecated(#msg)))
# elif defined(_MSC_VER)
# define DEPRECATED(msg) __declspec(deprecated(#msg))
# else
# pragma message("WARNING: You need to implement DEPRECATED for this compiler")
# define DEPRECATED(msg)
# endif
#endif
#endif

@ -17,6 +17,9 @@ namespace diy
const T& operator*() const { return x_; }
const T* operator->() const { return &x_; }
void lock() { lock_.lock(); }
void unlock() { lock_.unlock(); }
private:
T& x_;
lock_guard<Mutex> lock_;
@ -33,6 +36,8 @@ namespace diy
critical_resource() {}
critical_resource(const T& x):
x_(x) {}
critical_resource(T&& x):
x_(std::move(x)) {}
accessor access() { return accessor(x_, m_); }
const_accessor const_access() const { return const_accessor(x_, m_); }

@ -63,8 +63,8 @@ namespace detail
static Coordinate from(int i, int n, Coordinate min, Coordinate max, bool) { return min + (max - min)/n * i; }
static Coordinate to (int i, int n, Coordinate min, Coordinate max, bool) { return min + (max - min)/n * (i+1); }
static int lower(Coordinate x, int n, Coordinate min, Coordinate max, bool) { Coordinate width = (max - min)/n; Coordinate res = std::floor((x - min)/width); if (min + res*width == x) return (res - 1); else return res; }
static int upper(Coordinate x, int n, Coordinate min, Coordinate max, bool) { Coordinate width = (max - min)/n; Coordinate res = std::ceil ((x - min)/width); if (min + res*width == x) return (res + 1); else return res; }
static int lower(Coordinate x, int n, Coordinate min, Coordinate max, bool) { Coordinate width = (max - min)/n; auto res = static_cast<int>(std::floor((x - min)/width)); if (min + res*width == x) return (res - 1); else return res; }
static int upper(Coordinate x, int n, Coordinate min, Coordinate max, bool) { Coordinate width = (max - min)/n; auto res = static_cast<int>(std::ceil ((x - min)/width)); if (min + res*width == x) return (res + 1); else return res; }
};
}
@ -74,9 +74,9 @@ namespace detail
template<class Bounds_>
struct RegularDecomposer
{
typedef Bounds_ Bounds;
typedef typename BoundsValue<Bounds>::type Coordinate;
typedef typename RegularLinkSelector<Bounds>::type Link;
using Bounds = Bounds_;
using Coordinate = typename BoundsValue<Bounds>::type;
using Link = RegularLink<Bounds>;
using Creator = std::function<void(int, Bounds, Bounds, Bounds, Link)>;
using Updater = std::function<void(int, int, Bounds, Bounds, Bounds, Link)>;
@ -123,6 +123,7 @@ namespace detail
template<class Point>
int lowest_gid(const Point& p) const;
DivisionsVector gid_to_coords(int gid) const { DivisionsVector coords; gid_to_coords(gid, coords); return coords; }
void gid_to_coords(int gid, DivisionsVector& coords) const { gid_to_coords(gid, coords, divisions); }
int coords_to_gid(const DivisionsVector& coords) const { return coords_to_gid(coords, divisions); }
void fill_divisions(std::vector<int>& divisions) const;
@ -131,8 +132,8 @@ namespace detail
void fill_bounds(Bounds& bounds, int gid, bool add_ghosts = false) const;
static bool all(const std::vector<int>& v, int x);
static void gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divisions);
static int coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divisions);
static void gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divs);
static int coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divs);
static void factor(std::vector<unsigned>& factors, int n);
@ -303,7 +304,7 @@ decompose(int rank, const StaticAssigner& assigner, const Creator& create)
DivisionsVector coords;
gid_to_coords(gid, coords);
Bounds core, bounds;
Bounds core(dim), bounds(dim);
fill_bounds(core, coords);
fill_bounds(bounds, coords, true);
@ -325,7 +326,7 @@ decompose(int rank, const StaticAssigner& assigner, const Creator& create)
if (all(offsets, 0)) continue; // skip ourselves
DivisionsVector nhbr_coords(dim);
Direction dir, wrap_dir;
Direction dir(dim,0), wrap_dir(dim,0);
bool inbounds = true;
for (int k = 0; k < dim; ++k)
{
@ -364,8 +365,12 @@ decompose(int rank, const StaticAssigner& assigner, const Creator& create)
BlockID bid; bid.gid = nhbr_gid; bid.proc = assigner.rank(nhbr_gid);
link.add_neighbor(bid);
Bounds nhbr_bounds;
fill_bounds(nhbr_bounds, nhbr_coords);
Bounds nhbr_core(dim);
fill_bounds(nhbr_core, nhbr_coords);
link.add_core(nhbr_core);
Bounds nhbr_bounds(dim);
fill_bounds(nhbr_bounds, nhbr_coords, true);
link.add_bounds(nhbr_bounds);
link.add_direction(dir);
@ -405,25 +410,25 @@ all(const std::vector<int>& v, int x)
template<class Bounds>
void
diy::RegularDecomposer<Bounds>::
gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divisions)
gid_to_coords(int gid, DivisionsVector& coords, const DivisionsVector& divs)
{
int dim = static_cast<int>(divisions.size());
for (int i = 0; i < dim; ++i)
coords.clear();
for (int i = 0; i < static_cast<int>(divs.size()); ++i)
{
coords.push_back(gid % divisions[i]);
gid /= divisions[i];
coords.push_back(gid % divs[i]);
gid /= divs[i];
}
}
template<class Bounds>
int
diy::RegularDecomposer<Bounds>::
coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divisions)
coords_to_gid(const DivisionsVector& coords, const DivisionsVector& divs)
{
int gid = 0;
for (int i = static_cast<int>(coords.size()) - 1; i >= 0; --i)
{
gid *= divisions[i];
gid *= divs[i];
gid += coords[i];
}
return gid;
@ -445,12 +450,6 @@ fill_bounds(Bounds& bounds, //!< (output) bounds
bounds.max[i] = detail::BoundsHelper<Bounds>::to (coords[i], divisions[i], domain.min[i], domain.max[i], share_face[i]);
}
for (int i = dim; i < DIY_MAX_DIM; ++i) // set the unused dimension to 0
{
bounds.min[i] = 0;
bounds.max[i] = 0;
}
if (!add_ghosts)
return;
@ -554,8 +553,7 @@ fill_divisions(std::vector<int>& divisions_) const
}
// iterate over factorization of number of blocks (factors are sorted smallest to largest)
// NB: using int instead of size_t because must be negative in order to break out of loop
for (int i = factors.size() - 1; i >= 0; --i)
for (auto f = factors.rbegin(); f != factors.rend(); ++f)
{
// fill in missing divs by dividing dimension w/ largest block size
// except when this would be illegal (resulting in bounds.max < bounds.min;
@ -567,19 +565,19 @@ fill_divisions(std::vector<int>& divisions_) const
// split the dimension with the largest block size (first element in vector)
Coordinate min =
detail::BoundsHelper<Bounds>::from(0,
missing_divs[0].nb * factors[i],
missing_divs[0].nb * (*f),
domain.min[missing_divs[0].dim],
domain.max[missing_divs[0].dim],
share_face[missing_divs[0].dim]);
Coordinate max =
detail::BoundsHelper<Bounds>::to(0,
missing_divs[0].nb * factors[i],
missing_divs[0].nb * (*f),
domain.min[missing_divs[0].dim],
domain.max[missing_divs[0].dim],
share_face[missing_divs[0].dim]);
if (max >= min)
{
missing_divs[0].nb *= factors[i];
missing_divs[0].nb *= (*f);
missing_divs[0].b_size = max - min;
}
else

@ -74,7 +74,7 @@ operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners
dim = partners.dim(srp.round() - 1);
if (srp.round() == partners.rounds())
update_links(b, srp, dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
update_links(b, srp, dim, partners.sub_round((int)srp.round() - 2), (int)partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
else if (partners.swap_round(srp.round()) && partners.sub_round(srp.round()) < 0) // link round
{
dequeue_exchange(b, srp, dim); // from the swap round
@ -92,7 +92,7 @@ operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners
int prev_dim = dim - 1;
if (prev_dim < 0)
prev_dim += dim_;
update_links(b, srp, prev_dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
update_links(b, srp, prev_dim, partners.sub_round((int)srp.round() - 2), (int)partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
}
compute_local_samples(b, srp, dim);
@ -134,7 +134,7 @@ divide_gid(int gid, bool lower, int round, int rounds) const
template<class Block, class Point>
void
diy::detail::KDTreeSamplingPartition<Block,Point>::
update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
update_links(Block*, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
{
auto log = get_logger();
int gid = srp.gid();
@ -150,7 +150,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
std::vector<float> splits(link->size());
for (int i = 0; i < link->size(); ++i)
{
float split; diy::Direction dir;
float split; diy::Direction dir(dim_,0);
int in_gid = link->target(i).gid;
while(srp.incoming(in_gid))
@ -194,7 +194,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
if (wrap)
new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
else
new_link.add_wrap(diy::Direction());
new_link.add_wrap(diy::Direction(dim_,0));
}
} else // non-aligned side
{
@ -215,7 +215,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
if (wrap)
new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
else
new_link.add_wrap(diy::Direction());
new_link.add_wrap(diy::Direction(dim_,0));
}
}
}
@ -230,16 +230,16 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
update_neighbor_bounds(nbr_bounds, find_split(new_link.bounds(), nbr_bounds), dim, !lower);
new_link.add_bounds(nbr_bounds);
new_link.add_wrap(diy::Direction()); // dual block cannot be wrapped
new_link.add_wrap(diy::Direction(dim_,0)); // dual block cannot be wrapped
if (lower)
{
diy::Direction right;
diy::Direction right(dim_,0);
right[dim] = 1;
new_link.add_direction(right);
} else
{
diy::Direction left;
diy::Direction left(dim_,0);
left[dim] = -1;
new_link.add_direction(left);
}
@ -253,7 +253,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
template<class Block, class Point>
void
diy::detail::KDTreeSamplingPartition<Block,Point>::
split_to_neighbors(Block* b, const diy::ReduceProxy& srp, int dim) const
split_to_neighbors(Block*, const diy::ReduceProxy& srp, int) const
{
int lid = srp.master()->lid(srp.gid());
RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
@ -290,7 +290,7 @@ compute_local_samples(Block* b, const diy::ReduceProxy& srp, int dim) const
template<class Block, class Point>
void
diy::detail::KDTreeSamplingPartition<Block,Point>::
add_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
add_samples(Block*, const diy::ReduceProxy& srp, Samples& samples) const
{
// dequeue and combine the samples
for (int i = 0; i < srp.in_link().size(); ++i)
@ -307,7 +307,7 @@ add_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
template<class Block, class Point>
void
diy::detail::KDTreeSamplingPartition<Block,Point>::
receive_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
receive_samples(Block*, const diy::ReduceProxy& srp, Samples& samples) const
{
srp.dequeue(srp.in_link().target(0).gid, samples);
}
@ -315,7 +315,7 @@ receive_samples(Block* b, const diy::ReduceProxy& srp, Samples& samples) const
template<class Block, class Point>
void
diy::detail::KDTreeSamplingPartition<Block,Point>::
forward_samples(Block* b, const diy::ReduceProxy& srp, const Samples& samples) const
forward_samples(Block*, const diy::ReduceProxy& srp, const Samples& samples) const
{
for (int i = 0; i < srp.out_link().size(); ++i)
srp.enqueue(srp.out_link().target(i), samples);
@ -435,7 +435,7 @@ diy::Direction
diy::detail::KDTreeSamplingPartition<Block,Point>::
find_wrap(const Bounds& bounds, const Bounds& nbr_bounds, const Bounds& domain) const
{
diy::Direction wrap;
diy::Direction wrap(dim_,0);
for (int i = 0; i < dim_; ++i)
{
if (bounds.min[i] == domain.min[i] && nbr_bounds.max[i] == domain.max[i])

@ -68,10 +68,10 @@ struct diy::detail::KDTreePartners
wrap(wrap_),
domain(domain_)
{
for (unsigned i = 0; i < swap.rounds(); ++i)
for (int i = 0; i < swap.rounds(); ++i)
{
// fill histogram rounds
for (unsigned j = 0; j < histogram.rounds(); ++j)
for (int j = 0; j < histogram.rounds(); ++j)
{
rounds_.push_back(std::make_pair(false, j));
dim_.push_back(i % dim);
@ -115,7 +115,7 @@ struct diy::detail::KDTreePartners
else if (swap_round(round) && sub_round(round) < 0) // link round
swap.incoming(sub_round(round - 1) + 1, gid, partners, m);
else if (swap_round(round))
histogram.incoming(histogram.rounds(), gid, partners, m);
histogram.incoming(static_cast<int>(histogram.rounds()), gid, partners, m);
else
{
if (round > 0 && sub_round(round) == 0)
@ -177,7 +177,7 @@ operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners
dim = partners.dim(srp.round() - 1);
if (srp.round() == partners.rounds())
update_links(b, srp, dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
update_links(b, srp, dim, partners.sub_round((int)srp.round() - 2), (int)partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
else if (partners.swap_round(srp.round()) && partners.sub_round(srp.round()) < 0) // link round
{
dequeue_exchange(b, srp, dim); // from the swap round
@ -195,7 +195,7 @@ operator()(Block* b, const diy::ReduceProxy& srp, const KDTreePartners& partners
int prev_dim = dim - 1;
if (prev_dim < 0)
prev_dim += dim_;
update_links(b, srp, prev_dim, partners.sub_round(srp.round() - 2), partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
update_links(b, srp, prev_dim, partners.sub_round((int)srp.round() - 2), (int)partners.swap_rounds(), partners.wrap, partners.domain); // -1 would be the "uninformative" link round
}
compute_local_histogram(b, srp, dim);
@ -229,7 +229,7 @@ divide_gid(int gid, bool lower, int round, int rounds) const
template<class Block, class Point>
void
diy::detail::KDTreePartition<Block,Point>::
update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
update_links(Block*, const diy::ReduceProxy& srp, int dim, int round, int rounds, bool wrap, const Bounds& domain) const
{
int gid = srp.gid();
int lid = srp.master()->lid(gid);
@ -244,7 +244,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
std::vector<float> splits(link->size());
for (int i = 0; i < link->size(); ++i)
{
float split; diy::Direction dir;
float split; diy::Direction dir(dim_,0);
int in_gid = link->target(i).gid;
while(srp.incoming(in_gid))
@ -287,7 +287,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
if (wrap)
new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
else
new_link.add_wrap(diy::Direction());
new_link.add_wrap(diy::Direction(dim_,0));
}
} else // non-aligned side
{
@ -308,7 +308,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
if (wrap)
new_link.add_wrap(find_wrap(new_link.bounds(), bounds, domain));
else
new_link.add_wrap(diy::Direction());
new_link.add_wrap(diy::Direction(dim_, 0));
}
}
}
@ -323,16 +323,16 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
update_neighbor_bounds(nbr_bounds, find_split(new_link.bounds(), nbr_bounds), dim, !lower);
new_link.add_bounds(nbr_bounds);
new_link.add_wrap(diy::Direction()); // dual block cannot be wrapped
new_link.add_wrap(diy::Direction(dim_,0)); // dual block cannot be wrapped
if (lower)
{
diy::Direction right;
diy::Direction right(dim_,0);
right[dim] = 1;
new_link.add_direction(right);
} else
{
diy::Direction left;
diy::Direction left(dim_,0);
left[dim] = -1;
new_link.add_direction(left);
}
@ -346,7 +346,7 @@ update_links(Block* b, const diy::ReduceProxy& srp, int dim, int round, int roun
template<class Block, class Point>
void
diy::detail::KDTreePartition<Block,Point>::
split_to_neighbors(Block* b, const diy::ReduceProxy& srp, int dim) const
split_to_neighbors(Block*, const diy::ReduceProxy& srp, int) const
{
int lid = srp.master()->lid(srp.gid());
RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
@ -366,20 +366,23 @@ void
diy::detail::KDTreePartition<Block,Point>::
compute_local_histogram(Block* b, const diy::ReduceProxy& srp, int dim) const
{
auto udim = static_cast<unsigned>(dim);
int lid = srp.master()->lid(srp.gid());
RCLink* link = static_cast<RCLink*>(srp.master()->link(lid));
// compute and enqueue local histogram
Histogram histogram(bins_);
float width = (link->core().max[dim] - link->core().min[dim])/bins_;
float width = (link->core().max[udim] - link->core().min[udim])/bins_;
for (size_t i = 0; i < (b->*points_).size(); ++i)
{
float x = (b->*points_)[i][dim];
int loc = (x - link->core().min[dim]) / width;
if (loc < 0)
throw std::runtime_error(fmt::format("{} {} {}", loc, x, link->core().min[dim]));
if (loc >= (int) bins_)
float x = (b->*points_)[i][udim];
float floc = (x - link->core().min[udim]) / width;
if (floc < 0)
throw std::runtime_error(fmt::format("{} {} {}", floc, x, link->core().min[udim]));
auto loc = static_cast<size_t>(floc);
if (loc >= bins_)
loc = bins_ - 1;
++(histogram[loc]);
}
@ -390,7 +393,7 @@ compute_local_histogram(Block* b, const diy::ReduceProxy& srp, int dim) const
template<class Block, class Point>
void
diy::detail::KDTreePartition<Block,Point>::
add_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const
add_histogram(Block*, const diy::ReduceProxy& srp, Histogram& histogram) const
{
// dequeue and add up the histograms
for (int i = 0; i < srp.in_link().size(); ++i)
@ -407,7 +410,7 @@ add_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const
template<class Block, class Point>
void
diy::detail::KDTreePartition<Block,Point>::
receive_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) const
receive_histogram(Block*, const diy::ReduceProxy& srp, Histogram& histogram) const
{
srp.dequeue(srp.in_link().target(0).gid, histogram);
}
@ -415,7 +418,7 @@ receive_histogram(Block* b, const diy::ReduceProxy& srp, Histogram& histogram) c
template<class Block, class Point>
void
diy::detail::KDTreePartition<Block,Point>::
forward_histogram(Block* b, const diy::ReduceProxy& srp, const Histogram& histogram) const
forward_histogram(Block*, const diy::ReduceProxy& srp, const Histogram& histogram) const
{
for (int i = 0; i < srp.out_link().size(); ++i)
srp.enqueue(srp.out_link().target(i), histogram);
@ -445,19 +448,26 @@ enqueue_exchange(Block* b, const diy::ReduceProxy& srp, int dim, const Histogram
size_t cur = 0;
float width = (link->core().max[dim] - link->core().min[dim])/bins_;
float split = 0;
for (size_t i = 0; i < histogram.size(); ++i)
// scope-block for variable `i`
{
if (cur + histogram[i] > total/2)
size_t i = 0;
for (; i < histogram.size(); ++i)
{
split = link->core().min[dim] + width*i;
break;
if (cur + histogram[i] > total/2)
break;
cur += histogram[i];
}
cur += histogram[i];
if (i == 0)
++i;
else if (i >= histogram.size() - 1)
i = histogram.size() - 2;
split = link->core().min[dim] + width*i;
log->trace("Found split: {} (dim={}) in {} - {}", split, dim, link->core().min[dim], link->core().max[dim]);
}
log->trace("Found split: {} (dim={}) in {} - {}", split, dim, link->core().min[dim], link->core().max[dim]);
// subset and enqueue
std::vector< std::vector<Point> > out_points(srp.out_link().size());
std::vector< std::vector<Point> > out_points(static_cast<size_t>(srp.out_link().size()));
for (size_t i = 0; i < (b->*points_).size(); ++i)
{
float x = (b->*points_)[i][dim];
@ -554,7 +564,7 @@ diy::Direction
diy::detail::KDTreePartition<Block,Point>::
find_wrap(const Bounds& bounds, const Bounds& nbr_bounds, const Bounds& domain) const
{
diy::Direction wrap;
diy::Direction wrap(dim_,0);
for (int i = 0; i < dim_; ++i)
{
if (bounds.min[i] == domain.min[i] && nbr_bounds.max[i] == domain.max[i])

@ -85,29 +85,29 @@ struct SampleSort<Block,T,Cmp>::Sampler
Sampler(ValuesVector values_, ValuesVector dividers_, const Cmp& cmp_, size_t num_samples_):
values(values_), dividers(dividers_), cmp(cmp_), num_samples(num_samples_) {}
void operator()(Block* b, const ReduceProxy& srp, const RegularSwapPartners& partners) const
void operator()(Block* b, const ReduceProxy& srp, const RegularSwapPartners&) const
{
int k_in = srp.in_link().size();
int k_out = srp.out_link().size();
std::vector<T> samples;
std::vector<T> samps;
if (k_in == 0)
{
// draw random samples
for (size_t i = 0; i < num_samples; ++i)
samples.push_back((b->*values)[std::rand() % (b->*values).size()]);
samps.push_back((b->*values)[std::rand() % (b->*values).size()]);
} else
dequeue_values(samples, srp, false);
dequeue_values(samps, srp, false);
if (k_out == 0)
{
// pick subsamples that separate quantiles
std::sort(samples.begin(), samples.end(), cmp);
std::sort(samps.begin(), samps.end(), cmp);
std::vector<T> subsamples(srp.nblocks() - 1);
int step = samples.size() / srp.nblocks(); // NB: subsamples.size() + 1
size_t step = samps.size() / srp.nblocks(); // NB: subsamples.size() + 1
for (size_t i = 0; i < subsamples.size(); ++i)
subsamples[i] = samples[(i+1)*step];
subsamples[i] = samps[(i+1)*step];
(b->*dividers).swap(subsamples);
}
else
@ -115,7 +115,7 @@ struct SampleSort<Block,T,Cmp>::Sampler
for (int i = 0; i < k_out; ++i)
{
MemoryBuffer& out = srp.outgoing(srp.out_link().target(i));
save(out, &samples[0], samples.size());
save(out, &samps[0], samps.size());
}
}
}
@ -139,7 +139,7 @@ struct SampleSort<Block,T,Cmp>::Exchanger
// enqueue values to the correct locations
for (size_t i = 0; i < (b->*values).size(); ++i)
{
int to = std::lower_bound((b->*samples).begin(), (b->*samples).end(), (b->*values)[i], cmp) - (b->*samples).begin();
int to = static_cast<int>(std::lower_bound((b->*samples).begin(), (b->*samples).end(), (b->*values)[i], cmp) - (b->*samples).begin());
rp.enqueue(rp.out_link().target(to), (b->*values)[i]);
}
(b->*values).clear();

@ -20,7 +20,7 @@ namespace diy
void init() { out_ = in_; }
void update(const CollectiveOp& other) { out_ = op_(out_, static_cast<const AllReduceOp&>(other).in_); }
void global(const mpi::communicator& comm) { T res; mpi::all_reduce(comm, out_, res, op_); out_ = res; }
void global(const mpi::communicator& comm) { T res{}; mpi::all_reduce(comm, out_, res, op_); out_ = res; }
void copy_from(const CollectiveOp& other) { out_ = static_cast<const AllReduceOp&>(other).out_; }
void result_out(void* dest) const { *reinterpret_cast<T*>(dest) = out_; }
@ -95,7 +95,7 @@ diy::Master::
process_collectives()
{
auto scoped = prof.scoped("collectives");
DIY_UNUSED(scoped);
VTKMDIY_UNUSED(scoped);
if (collectives().empty())
return;

@ -1,10 +1,9 @@
namespace diy
{
struct Master::tags { enum { queue, piece }; };
struct Master::MessageInfo
{
int from, to;
int nparts;
int round;
};
@ -19,10 +18,10 @@ namespace diy
struct Master::InFlightRecv
{
MemoryBuffer message;
MessageInfo info { -1, -1, -1 };
MessageInfo info { -1, -1, -1, -1 };
bool done = false;
inline void recv(mpi::communicator& comm, const mpi::status& status);
inline bool recv(mpi::communicator& comm, const mpi::status& status);
inline void place(IncomingRound* in, bool unload, ExternalStorage* storage, IExchangeInfo* iexchange);
void reset() { *this = InFlightRecv(); }
};
@ -43,32 +42,6 @@ namespace diy
size_t limit = 0;
};
struct Master::IExchangeInfo
{
IExchangeInfo():
n(0) {}
IExchangeInfo(size_t n_, mpi::communicator comm_):
n(n_),
comm(comm_),
global_work_(new mpi::window<int>(comm, 1)) { global_work_->lock_all(MPI_MODE_NOCHECK); }
~IExchangeInfo() { global_work_->unlock_all(); }
inline void not_done(int gid);
inline int global_work(); // get global work status (for debugging)
inline bool all_done(); // get global all done status
inline void reset_work(); // reset global work counter
inline int add_work(int work); // add work to global work counter
int inc_work() { return add_work(1); } // increment global work counter
int dec_work() { return add_work(-1); } // decremnent global work counter
size_t n;
mpi::communicator comm;
std::unordered_map<int, bool> done; // gid -> done
std::unique_ptr<mpi::window<int>> global_work_; // global work to do
std::shared_ptr<spd::logger> log = get_logger();
};
// VectorWindow is used to send and receive subsets of a contiguous array in-place
namespace detail
{
@ -90,7 +63,7 @@ namespace diy
struct mpi_datatype< diy::detail::VectorWindow<T> >
{
using VecWin = diy::detail::VectorWindow<T>;
static MPI_Datatype datatype() { return get_mpi_datatype<T>(); }
static diy::mpi::datatype datatype() { return get_mpi_datatype<T>(); }
static const void* address(const VecWin& x) { return x.begin; }
static void* address(VecWin& x) { return x.begin; }
static int count(const VecWin& x) { return static_cast<int>(x.count); }
@ -99,18 +72,8 @@ namespace diy
} // namespace mpi::detail
} // namespace diy
void
diy::Master::IExchangeInfo::
not_done(int gid)
{
if (done[gid])
{
done[gid] = false;
int work = inc_work();
log->debug("[{}] Incrementing work when switching done (on receipt): work = {}\n", gid, work);
} else
log->debug("[{}] Not done, no need to increment work\n", gid);
}
/** InFlightRecv **/
diy::Master::InFlightRecv&
diy::Master::
@ -126,28 +89,27 @@ diy::Master::inflight_sends()
}
// receive message described by status
void
bool
diy::Master::InFlightRecv::
recv(mpi::communicator& comm, const mpi::status& status)
{
bool result = false; // indicates whether this is the first (and possibly only) message of a given queue
if (info.from == -1) // uninitialized
{
MemoryBuffer bb;
comm.recv(status.source(), status.tag(), bb.buffer);
if (status.tag() == tags::piece) // first piece is the header
diy::load_back(bb, info);
info.nparts--;
if (info.nparts > 0) // multi-part message
{
size_t msg_size;
diy::load(bb, msg_size);
diy::load(bb, info);
message.buffer.reserve(msg_size);
}
else // tags::queue
{
diy::load_back(bb, info);
} else
message.swap(bb);
}
result = true;
}
else
{
@ -160,43 +122,34 @@ recv(mpi::communicator& comm, const mpi::status& status)
window.count = count;
comm.recv(status.source(), status.tag(), window);
info.nparts--;
}
if (status.tag() == tags::queue)
if (info.nparts == 0)
done = true;
return result;
}
// once the InFlightRecv is done, place it either out of core or in the appropriate incoming queue
void
diy::Master::InFlightRecv::
place(IncomingRound* in, bool unload, ExternalStorage* storage, IExchangeInfo* iexchange)
place(IncomingRound* in, bool unload, ExternalStorage* storage, IExchangeInfo*)
{
size_t size = message.size();
int from = info.from;
int to = info.to;
int external = -1;
message.reset();
auto access = in->map[to][from].access();
access->emplace_back(std::move(message));
if (unload)
{
get_logger()->debug("Directly unloading queue {} <- {}", to, from);
external = storage->put(message); // unload directly
access->back().unload(storage);
}
else if (!iexchange)
{
in->map[to].queues[from].swap(message);
in->map[to].queues[from].reset(); // buffer position = 0
}
else // iexchange
{
auto log = get_logger();
iexchange->not_done(to);
in->map[to].queues[from].append_binary(&message.buffer[0], message.size()); // append insted of overwrite
int work = iexchange->dec_work();
log->debug("[{}] Decrementing work after receiving: work = {}\n", to, work);
}
in->map[to].records[from] = QueueRecord(size, external);
++(in->received);
}

@ -25,7 +25,10 @@ struct diy::Master::ProcessBlock
if ((size_t)cur >= blocks.size())
return;
int i = blocks[cur];
int i = blocks[cur];
int gid = master.gid(i);
stats::Annotation::Guard g( stats::Annotation("diy.block").set(gid) );
if (master.block(i))
{
if (local.size() == (size_t)local_limit)
@ -33,7 +36,7 @@ struct diy::Master::ProcessBlock
local.push_back(i);
}
master.log->debug("Processing block: {}", master.gid(i));
master.log->debug("Processing block: {}", gid);
bool skip = all_skip(i);
@ -58,8 +61,7 @@ struct diy::Master::ProcessBlock
cmd->execute(skip ? 0 : master.block(i), master.proxy(i));
// no longer need them, so get rid of them
current_incoming[master.gid(i)].queues.clear();
current_incoming[master.gid(i)].records.clear();
current_incoming[gid].clear();
}
if (skip && master.block(i) == 0)
@ -93,7 +95,7 @@ execute()
{
log->debug("Entered execute()");
auto scoped = prof.scoped("execute");
DIY_UNUSED(scoped);
VTKMDIY_UNUSED(scoped);
//show_incoming_records();
// touch the outgoing and incoming queues as well as collectives to make sure they exist

@ -0,0 +1,88 @@
#include <atomic>
namespace diy
{
struct Master::IExchangeInfoCollective: public IExchangeInfo
{
IExchangeInfoCollective(mpi::communicator c, stats::Profiler& p):
IExchangeInfo(c, p)
{
local_work_ = 0;
dirty = 0;
state = 0;
}
inline bool all_done() override; // get global all done status
inline void add_work(int work) override; // add work to global work counter
inline void control() override;
std::atomic<int> local_work_;
std::atomic<int> dirty;
int local_dirty, all_dirty;
std::atomic<int> state;
mpi::request r;
// debug
bool first_ibarrier = true;
using IExchangeInfo::prof;
};
}
bool
diy::Master::IExchangeInfoCollective::
all_done()
{
return state == 3;
}
void
diy::Master::IExchangeInfoCollective::
add_work(int work)
{
local_work_ += work;
if (local_work_ > 0)
dirty = 1;
}
void
diy::Master::IExchangeInfoCollective::
control()
{
if (state == 0 && local_work_ == 0)
{
// debug
if (first_ibarrier)
{
prof >> "iexchange-control"; // consensus-time cannot nest in iexchange-control
prof << "consensus-time";
prof << "iexchange-control";
first_ibarrier = false;
}
r = ibarrier(comm);
dirty = 0;
state = 1;
} else if (state == 1)
{
mpi::optional<mpi::status> ostatus = r.test();
if (ostatus)
{
local_dirty = dirty;
r = mpi::iall_reduce(comm, local_dirty, all_dirty, std::logical_or<int>());
state = 2;
}
} else if (state == 2)
{
mpi::optional<mpi::status> ostatus = r.test();
if (ostatus)
{
if (all_dirty == 0) // done
state = 3;
else
state = 0; // reset
}
}
}

@ -0,0 +1,28 @@
namespace diy
{
struct Master::IExchangeInfo
{
using Clock = std::chrono::high_resolution_clock;
using Time = Clock::time_point;
IExchangeInfo(mpi::communicator c, stats::Profiler& p):
comm(c),
prof(p) {}
virtual ~IExchangeInfo() {}
virtual bool all_done() =0; // get global all done status
virtual void add_work(int work) =0; // add work to global work counter
virtual void control() =0;
void inc_work() { add_work(1); } // increment work counter
void dec_work() { add_work(-1); } // decremnent work counter
mpi::communicator comm;
std::shared_ptr<spd::logger> log = get_logger();
stats::Profiler& prof;
};
}
#include "iexchange-collective.hpp"

@ -23,31 +23,38 @@ namespace detail
}
}
void operator()(Block* b, const ReduceProxy& srp, const RegularSwapPartners& partners) const
void operator()(Block* b, const ReduceProxy& srp, const RegularSwapPartners&) const
{
int k_in = srp.in_link().size();
int k_out = srp.out_link().size();
if (k_in == 0 && k_out == 0) // special case of a single block
{
ReduceProxy all_srp_out(srp, srp.block(), 0, srp.assigner(), empty_link, all_neighbors_link);
ReduceProxy all_srp_in (srp, srp.block(), 1, srp.assigner(), all_neighbors_link, empty_link);
ReduceProxy all_srp(std::move(const_cast<ReduceProxy&>(srp)), srp.block(), 0, srp.assigner(), empty_link, all_neighbors_link);
op(b, all_srp_out);
MemoryBuffer& in_queue = all_srp_in.incoming(all_srp_in.in_link().target(0).gid);
in_queue.swap(all_srp_out.outgoing(all_srp_out.out_link().target(0)));
op(b, all_srp);
MemoryBuffer& in_queue = all_srp.incoming(all_srp.in_link().target(0).gid);
in_queue.swap(all_srp.outgoing(all_srp.out_link().target(0)));
in_queue.reset();
all_srp.outgoing()->clear();
op(b, all_srp_in);
// change to incoming proxy
all_srp.set_round(1);
auto& in_link = const_cast<Link&>(all_srp.in_link());
auto& out_link = const_cast<Link&>(all_srp.out_link());
in_link.swap(out_link);
op(b, all_srp);
return;
}
if (k_in == 0) // initial round
{
ReduceProxy all_srp(srp, srp.block(), 0, srp.assigner(), empty_link, all_neighbors_link);
ReduceProxy all_srp(std::move(const_cast<ReduceProxy&>(srp)), srp.block(), 0, srp.assigner(), empty_link, all_neighbors_link);
op(b, all_srp);
Master::OutgoingQueues all_queues;
Master::Proxy::OutgoingQueues all_queues;
all_queues.swap(*all_srp.outgoing()); // clears out the queues and stores them locally
// enqueue outgoing
@ -67,10 +74,10 @@ namespace detail
} else if (k_out == 0) // final round
{
// dequeue incoming + reorder into the correct order
ReduceProxy all_srp(srp, srp.block(), 1, srp.assigner(), all_neighbors_link, empty_link);
ReduceProxy all_srp(std::move(const_cast<ReduceProxy&>(srp)), srp.block(), 1, srp.assigner(), all_neighbors_link, empty_link);
Master::IncomingQueues all_incoming;
all_incoming.swap(*srp.incoming());
Master::Proxy::IncomingQueues all_incoming;
all_incoming.swap(*all_srp.incoming());
std::pair<int, int> range; // all the ranges should be the same
for (int i = 0; i < k_in; ++i)

@ -0,0 +1,158 @@
#ifndef VTKMDIY_DYNAMIC_POINT_HPP
#define VTKMDIY_DYNAMIC_POINT_HPP
#include <string>
#include <sstream>
#include <vector>
#include <algorithm>
#include "constants.h"
#include "thirdparty/chobo/small_vector.hpp"
namespace diy
{
template<class Coordinate_, size_t static_size = VTKMDIY_MAX_DIM>
class DynamicPoint: public chobo::small_vector<Coordinate_, static_size>
{
public:
using Coordinate = Coordinate_;
using Parent = chobo::small_vector<Coordinate_, static_size>;
template<class U>
struct rebind { typedef DynamicPoint<U> type; };
public:
DynamicPoint(size_t dim, Coordinate x = 0):
Parent(dim, x) {}
template<class T> DynamicPoint(const DynamicPoint<T>& p) { for (size_t i = 0; i < dimension(); ++i) (*this)[i] = p[i]; }
template<class T> DynamicPoint(const T* a, size_t dim) { for (size_t i = 0; i < dim; ++i) (*this)[i] = a[i]; }
template<class T> DynamicPoint(const std::vector<T>& a):
Parent(a.begin(), a.end()) {}
DynamicPoint(std::initializer_list<Coordinate> lst):
Parent(lst.size()) { size_t i = 0; for (Coordinate x : lst) (*this)[i++] = x; }
DynamicPoint(DynamicPoint&&) =default;
DynamicPoint(const DynamicPoint&) =default;
DynamicPoint& operator=(const DynamicPoint&) =default;
unsigned dimension() const { return static_cast<unsigned>(Parent::size()); }
static DynamicPoint zero(size_t dim) { return DynamicPoint(dim, 0); }
static DynamicPoint one(size_t dim) { return DynamicPoint(dim, 1); }
DynamicPoint drop(size_t dim) const { DynamicPoint p(dimension() - 1); size_t c = 0; for (size_t i = 0; i < dimension(); ++i) { if (i == dim) continue; p[c++] = (*this)[i]; } return p; }
DynamicPoint lift(size_t dim, Coordinate x) const { DynamicPoint p(dimension() + 1); for (size_t i = 0; i < dimension()+1; ++i) { if (i < dim) p[i] = (*this)[i]; else if (i == dim) p[i] = x; else if (i > dim) p[i] = (*this)[i-1]; } return p; }
using Parent::operator[];
DynamicPoint& operator+=(const DynamicPoint& y) { for (size_t i = 0; i < dimension(); ++i) (*this)[i] += y[i]; return *this; }
DynamicPoint& operator-=(const DynamicPoint& y) { for (size_t i = 0; i < dimension(); ++i) (*this)[i] -= y[i]; return *this; }
DynamicPoint& operator*=(Coordinate a) { for (size_t i = 0; i < dimension(); ++i) (*this)[i] *= a; return *this; }
DynamicPoint& operator/=(Coordinate a) { for (size_t i = 0; i < dimension(); ++i) (*this)[i] /= a; return *this; }
DEPRECATED("Use norm2 instead")
Coordinate norm() const { return norm2(); }
Coordinate norm2() const { return (*this)*(*this); }
std::ostream& operator<<(std::ostream& out) const { out << (*this)[0]; for (size_t i = 1; i < dimension(); ++i) out << " " << (*this)[i]; return out; }
std::istream& operator>>(std::istream& in);
friend
DynamicPoint operator+(DynamicPoint x, const DynamicPoint& y) { x += y; return x; }
friend
DynamicPoint operator-(DynamicPoint x, const DynamicPoint& y) { x -= y; return x; }
friend
DynamicPoint operator/(DynamicPoint x, Coordinate y) { x /= y; return x; }
friend
DynamicPoint operator*(DynamicPoint x, Coordinate y) { x *= y; return x; }
friend
DynamicPoint operator*(Coordinate y, DynamicPoint x) { x *= y; return x; }
friend
Coordinate operator*(const DynamicPoint& x, const DynamicPoint& y) { Coordinate n = 0; for (size_t i = 0; i < x.dimension(); ++i) n += x[i] * y[i]; return n; }
friend
bool operator<(const DynamicPoint& x, const DynamicPoint& y) { return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end()); }
friend
bool operator>(const DynamicPoint& x, const DynamicPoint& y) { return y < x; }
template<class T, size_t s_>
friend
Coordinate operator*(const DynamicPoint<T,s_>& x, const DynamicPoint& y) { Coordinate n = 0; for (size_t i = 0; i < x.dimension(); ++i) n += x[i] * y[i]; return n; }
};
template<class C, size_t s_>
std::istream&
DynamicPoint<C,s_>::
operator>>(std::istream& in)
{
std::string point_str;
in >> point_str; // read until ' '
std::stringstream ps(point_str);
char x;
for (unsigned i = 0; i < dimension(); ++i)
{
ps >> (*this)[i];
ps >> x;
}
return in;
}
template<class Coordinate, size_t s_>
Coordinate norm2(const DynamicPoint<Coordinate,s_>& p)
{ Coordinate res = 0; for (unsigned i = 0; i < p.dimension(); ++i) res += p[i]*p[i]; return res; }
template<class C, size_t s_>
std::ostream&
operator<<(std::ostream& out, const DynamicPoint<C,s_>& p)
{ return p.operator<<(out); }
template<class C, size_t s_>
std::istream&
operator>>(std::istream& in, DynamicPoint<C,s_>& p)
{ return p.operator>>(in); }
// Serialization
template<class T>
struct Serialization;
struct BinaryBuffer;
template<class T> void save(BinaryBuffer&, const T&);
template<class T> void load(BinaryBuffer&, T&);
template<class T> void save(BinaryBuffer&, const T*, size_t);
template<class T> void load(BinaryBuffer&, T*, size_t);
template<class C, size_t s_>
struct Serialization<DynamicPoint<C, s_>>
{
using Point = DynamicPoint<C,s_>;
static void save(BinaryBuffer& bb, const Point& p)
{
size_t s = p.size();
diy::save(bb, s);
if (s > 0)
diy::save(bb, &p[0], p.size());
}
static void load(BinaryBuffer& bb, Point& p)
{
size_t s;
diy::load(bb, s);
p.resize(s);
if (s > 0)
diy::load(bb, &p[0], s);
}
};
}
#endif // VTKMDIY_POINT_HPP

@ -0,0 +1,84 @@
#ifndef VTKMDIY_FACTORY_HPP
#define VTKMDIY_FACTORY_HPP
// From http://www.nirfriedman.com/2018/04/29/unforgettable-factory/
// with minor changes.
#include <memory>
#include <string>
#include <unordered_map>
namespace diy
{
template <class Base, class... Args>
class Factory
{
public:
template <class... T>
static Base* make(const std::string &s, T&&... args)
{
return data().at(s)(std::forward<T>(args)...);
}
virtual std::string id() const { return typeid(Base).name(); }
template <class T>
struct Registrar: Base
{
static bool registerT()
{
const auto name = typeid(T).name();
Factory::data()[name] = [](Args... args) -> Base*
{
return new T(std::forward<Args>(args)...);
};
return true;
}
static volatile bool registered;
std::string id() const override { return typeid(T).name(); }
#if defined(__NVCC__)
protected:
#else
private:
friend T;
#endif
#if defined(__INTEL_COMPILER)
__attribute__ ((used))
#endif
Registrar(): Base(Key{}) { (void)registered; }
};
#if defined(__NVCC__)
protected:
#else
private:
friend Base;
#endif
class Key
{
Key(){};
template <class T> friend struct Registrar;
};
using FuncType = Base* (*)(Args...);
Factory() = default;
static std::unordered_map<std::string, FuncType>& data()
{
static std::unordered_map<std::string, FuncType> s;
return s;
}
};
template <class Base, class... Args>
template <class T>
volatile bool Factory<Base, Args...>::Registrar<T>::registered = Factory<Base, Args...>::Registrar<T>::registerT();
}
#endif

@ -1,535 +0,0 @@
/*
Formatting library for C++
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "format.h"
#include <string.h>
#include <cctype>
#include <cerrno>
#include <climits>
#include <cmath>
#include <cstdarg>
#include <cstddef> // for std::ptrdiff_t
#if defined(_WIN32) && defined(__MINGW32__)
# include <cstring>
#endif
#if FMT_USE_WINDOWS_H
# if !defined(FMT_HEADER_ONLY) && !defined(WIN32_LEAN_AND_MEAN)
# define WIN32_LEAN_AND_MEAN
# endif
# if defined(NOMINMAX) || defined(FMT_WIN_MINMAX)
# include <windows.h>
# else
# define NOMINMAX
# include <windows.h>
# undef NOMINMAX
# endif
#endif
#if FMT_EXCEPTIONS
# define FMT_TRY try
# define FMT_CATCH(x) catch (x)
#else
# define FMT_TRY if (true)
# define FMT_CATCH(x) if (false)
#endif
#ifdef _MSC_VER
# pragma warning(push)
# pragma warning(disable: 4127) // conditional expression is constant
# pragma warning(disable: 4702) // unreachable code
// Disable deprecation warning for strerror. The latter is not called but
// MSVC fails to detect it.
# pragma warning(disable: 4996)
#endif
// Dummy implementations of strerror_r and strerror_s called if corresponding
// system functions are not available.
static inline fmt::internal::Null<> strerror_r(int, char *, ...) {
return fmt::internal::Null<>();
}
static inline fmt::internal::Null<> strerror_s(char *, std::size_t, ...) {
return fmt::internal::Null<>();
}
namespace fmt {
FMT_FUNC internal::RuntimeError::~RuntimeError() FMT_DTOR_NOEXCEPT {}
FMT_FUNC FormatError::~FormatError() FMT_DTOR_NOEXCEPT {}
FMT_FUNC SystemError::~SystemError() FMT_DTOR_NOEXCEPT {}
namespace {
#ifndef _MSC_VER
# define FMT_SNPRINTF snprintf
#else // _MSC_VER
inline int fmt_snprintf(char *buffer, size_t size, const char *format, ...) {
va_list args;
va_start(args, format);
int result = vsnprintf_s(buffer, size, _TRUNCATE, format, args);
va_end(args);
return result;
}
# define FMT_SNPRINTF fmt_snprintf
#endif // _MSC_VER
#if defined(_WIN32) && defined(__MINGW32__) && !defined(__NO_ISOCEXT)
# define FMT_SWPRINTF snwprintf
#else
# define FMT_SWPRINTF swprintf
#endif // defined(_WIN32) && defined(__MINGW32__) && !defined(__NO_ISOCEXT)
const char RESET_COLOR[] = "\x1b[0m";
typedef void (*FormatFunc)(Writer &, int, StringRef);
// Portable thread-safe version of strerror.
// Sets buffer to point to a string describing the error code.
// This can be either a pointer to a string stored in buffer,
// or a pointer to some static immutable string.
// Returns one of the following values:
// 0 - success
// ERANGE - buffer is not large enough to store the error message
// other - failure
// Buffer should be at least of size 1.
int safe_strerror(
int error_code, char *&buffer, std::size_t buffer_size) FMT_NOEXCEPT {
FMT_ASSERT(buffer != 0 && buffer_size != 0, "invalid buffer");
class StrError {
private:
int error_code_;
char *&buffer_;
std::size_t buffer_size_;
// A noop assignment operator to avoid bogus warnings.
void operator=(const StrError &) {}
// Handle the result of XSI-compliant version of strerror_r.
int handle(int result) {
// glibc versions before 2.13 return result in errno.
return result == -1 ? errno : result;
}
// Handle the result of GNU-specific version of strerror_r.
int handle(char *message) {
// If the buffer is full then the message is probably truncated.
if (message == buffer_ && strlen(buffer_) == buffer_size_ - 1)
return ERANGE;
buffer_ = message;
return 0;
}
// Handle the case when strerror_r is not available.
int handle(internal::Null<>) {
return fallback(strerror_s(buffer_, buffer_size_, error_code_));
}
// Fallback to strerror_s when strerror_r is not available.
int fallback(int result) {
// If the buffer is full then the message is probably truncated.
return result == 0 && strlen(buffer_) == buffer_size_ - 1 ?
ERANGE : result;
}
// Fallback to strerror if strerror_r and strerror_s are not available.
int fallback(internal::Null<>) {
errno = 0;
buffer_ = strerror(error_code_);
return errno;
}
public:
StrError(int err_code, char *&buf, std::size_t buf_size)
: error_code_(err_code), buffer_(buf), buffer_size_(buf_size) {}
int run() {
// Suppress a warning about unused strerror_r.
strerror_r(0, FMT_NULL, "");
return handle(strerror_r(error_code_, buffer_, buffer_size_));
}
};
return StrError(error_code, buffer, buffer_size).run();
}
void format_error_code(Writer &out, int error_code,
StringRef message) FMT_NOEXCEPT {
// Report error code making sure that the output fits into
// INLINE_BUFFER_SIZE to avoid dynamic memory allocation and potential
// bad_alloc.
out.clear();
static const char SEP[] = ": ";
static const char ERROR_STR[] = "error ";
// Subtract 2 to account for terminating null characters in SEP and ERROR_STR.
std::size_t error_code_size = sizeof(SEP) + sizeof(ERROR_STR) - 2;
typedef internal::IntTraits<int>::MainType MainType;
MainType abs_value = static_cast<MainType>(error_code);
if (internal::is_negative(error_code)) {
abs_value = 0 - abs_value;
++error_code_size;
}
error_code_size += internal::count_digits(abs_value);
if (message.size() <= internal::INLINE_BUFFER_SIZE - error_code_size)
out << message << SEP;
out << ERROR_STR << error_code;
assert(out.size() <= internal::INLINE_BUFFER_SIZE);
}
void report_error(FormatFunc func, int error_code,
StringRef message) FMT_NOEXCEPT {
MemoryWriter full_message;
func(full_message, error_code, message);
// Use Writer::data instead of Writer::c_str to avoid potential memory
// allocation.
std::fwrite(full_message.data(), full_message.size(), 1, stderr);
std::fputc('\n', stderr);
}
} // namespace
FMT_FUNC void SystemError::init(
int err_code, CStringRef format_str, ArgList args) {
error_code_ = err_code;
MemoryWriter w;
format_system_error(w, err_code, format(format_str, args));
std::runtime_error &base = *this;
base = std::runtime_error(w.str());
}
template <typename T>
int internal::CharTraits<char>::format_float(
char *buffer, std::size_t size, const char *format,
unsigned width, int precision, T value) {
if (width == 0) {
return precision < 0 ?
FMT_SNPRINTF(buffer, size, format, value) :
FMT_SNPRINTF(buffer, size, format, precision, value);
}
return precision < 0 ?
FMT_SNPRINTF(buffer, size, format, width, value) :
FMT_SNPRINTF(buffer, size, format, width, precision, value);
}
template <typename T>
int internal::CharTraits<wchar_t>::format_float(
wchar_t *buffer, std::size_t size, const wchar_t *format,
unsigned width, int precision, T value) {
if (width == 0) {
return precision < 0 ?
FMT_SWPRINTF(buffer, size, format, value) :
FMT_SWPRINTF(buffer, size, format, precision, value);
}
return precision < 0 ?
FMT_SWPRINTF(buffer, size, format, width, value) :
FMT_SWPRINTF(buffer, size, format, width, precision, value);
}
template <typename T>
const char internal::BasicData<T>::DIGITS[] =
"0001020304050607080910111213141516171819"
"2021222324252627282930313233343536373839"
"4041424344454647484950515253545556575859"
"6061626364656667686970717273747576777879"
"8081828384858687888990919293949596979899";
#define FMT_POWERS_OF_10(factor) \
factor * 10, \
factor * 100, \
factor * 1000, \
factor * 10000, \
factor * 100000, \
factor * 1000000, \
factor * 10000000, \
factor * 100000000, \
factor * 1000000000
template <typename T>
const uint32_t internal::BasicData<T>::POWERS_OF_10_32[] = {
0, FMT_POWERS_OF_10(1)
};
template <typename T>
const uint64_t internal::BasicData<T>::POWERS_OF_10_64[] = {
0,
FMT_POWERS_OF_10(1),
FMT_POWERS_OF_10(ULongLong(1000000000)),
// Multiply several constants instead of using a single long long constant
// to avoid warnings about C++98 not supporting long long.
ULongLong(1000000000) * ULongLong(1000000000) * 10
};
FMT_FUNC void internal::report_unknown_type(char code, const char *type) {
(void)type;
if (std::isprint(static_cast<unsigned char>(code))) {
FMT_THROW(FormatError(
format("unknown format code '{}' for {}", code, type)));
}
FMT_THROW(FormatError(
format("unknown format code '\\x{:02x}' for {}",
static_cast<unsigned>(code), type)));
}
#if FMT_USE_WINDOWS_H
FMT_FUNC internal::UTF8ToUTF16::UTF8ToUTF16(StringRef s) {
static const char ERROR_MSG[] = "cannot convert string from UTF-8 to UTF-16";
if (s.size() > INT_MAX)
FMT_THROW(WindowsError(ERROR_INVALID_PARAMETER, ERROR_MSG));
int s_size = static_cast<int>(s.size());
int length = MultiByteToWideChar(
CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), s_size, FMT_NULL, 0);
if (length == 0)
FMT_THROW(WindowsError(GetLastError(), ERROR_MSG));
buffer_.resize(length + 1);
length = MultiByteToWideChar(
CP_UTF8, MB_ERR_INVALID_CHARS, s.data(), s_size, &buffer_[0], length);
if (length == 0)
FMT_THROW(WindowsError(GetLastError(), ERROR_MSG));
buffer_[length] = 0;
}
FMT_FUNC internal::UTF16ToUTF8::UTF16ToUTF8(WStringRef s) {
if (int error_code = convert(s)) {
FMT_THROW(WindowsError(error_code,
"cannot convert string from UTF-16 to UTF-8"));
}
}
FMT_FUNC int internal::UTF16ToUTF8::convert(WStringRef s) {
if (s.size() > INT_MAX)
return ERROR_INVALID_PARAMETER;
int s_size = static_cast<int>(s.size());
int length = WideCharToMultiByte(
CP_UTF8, 0, s.data(), s_size, FMT_NULL, 0, FMT_NULL, FMT_NULL);
if (length == 0)
return GetLastError();
buffer_.resize(length + 1);
length = WideCharToMultiByte(
CP_UTF8, 0, s.data(), s_size, &buffer_[0], length, FMT_NULL, FMT_NULL);
if (length == 0)
return GetLastError();
buffer_[length] = 0;
return 0;
}
FMT_FUNC void WindowsError::init(
int err_code, CStringRef format_str, ArgList args) {
error_code_ = err_code;
MemoryWriter w;
internal::format_windows_error(w, err_code, format(format_str, args));
std::runtime_error &base = *this;
base = std::runtime_error(w.str());
}
FMT_FUNC void internal::format_windows_error(
Writer &out, int error_code, StringRef message) FMT_NOEXCEPT {
FMT_TRY {
MemoryBuffer<wchar_t, INLINE_BUFFER_SIZE> buffer;
buffer.resize(INLINE_BUFFER_SIZE);
for (;;) {
wchar_t *system_message = &buffer[0];
int result = FormatMessageW(
FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
FMT_NULL, error_code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
system_message, static_cast<uint32_t>(buffer.size()), FMT_NULL);
if (result != 0) {
UTF16ToUTF8 utf8_message;
if (utf8_message.convert(system_message) == ERROR_SUCCESS) {
out << message << ": " << utf8_message;
return;
}
break;
}
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
break; // Can't get error message, report error code instead.
buffer.resize(buffer.size() * 2);
}
} FMT_CATCH(...) {}
fmt::format_error_code(out, error_code, message); // 'fmt::' is for bcc32.
}
#endif // FMT_USE_WINDOWS_H
FMT_FUNC void format_system_error(
Writer &out, int error_code, StringRef message) FMT_NOEXCEPT {
FMT_TRY {
internal::MemoryBuffer<char, internal::INLINE_BUFFER_SIZE> buffer;
buffer.resize(internal::INLINE_BUFFER_SIZE);
for (;;) {
char *system_message = &buffer[0];
int result = safe_strerror(error_code, system_message, buffer.size());
if (result == 0) {
out << message << ": " << system_message;
return;
}
if (result != ERANGE)
break; // Can't get error message, report error code instead.
buffer.resize(buffer.size() * 2);
}
} FMT_CATCH(...) {}
fmt::format_error_code(out, error_code, message); // 'fmt::' is for bcc32.
}
template <typename Char>
void internal::ArgMap<Char>::init(const ArgList &args) {
if (!map_.empty())
return;
typedef internal::NamedArg<Char> NamedArg;
const NamedArg *named_arg = FMT_NULL;
bool use_values =
args.type(ArgList::MAX_PACKED_ARGS - 1) == internal::Arg::NONE;
if (use_values) {
for (unsigned i = 0;/*nothing*/; ++i) {
internal::Arg::Type arg_type = args.type(i);
switch (arg_type) {
case internal::Arg::NONE:
return;
case internal::Arg::NAMED_ARG:
named_arg = static_cast<const NamedArg*>(args.values_[i].pointer);
map_.push_back(Pair(named_arg->name, *named_arg));
break;
default:
/*nothing*/;
}
}
return;
}
for (unsigned i = 0; i != ArgList::MAX_PACKED_ARGS; ++i) {
internal::Arg::Type arg_type = args.type(i);
if (arg_type == internal::Arg::NAMED_ARG) {
named_arg = static_cast<const NamedArg*>(args.args_[i].pointer);
map_.push_back(Pair(named_arg->name, *named_arg));
}
}
for (unsigned i = ArgList::MAX_PACKED_ARGS;/*nothing*/; ++i) {
switch (args.args_[i].type) {
case internal::Arg::NONE:
return;
case internal::Arg::NAMED_ARG:
named_arg = static_cast<const NamedArg*>(args.args_[i].pointer);
map_.push_back(Pair(named_arg->name, *named_arg));
break;
default:
/*nothing*/;
}
}
}
template <typename Char>
void internal::FixedBuffer<Char>::grow(std::size_t) {
FMT_THROW(std::runtime_error("buffer overflow"));
}
FMT_FUNC internal::Arg internal::FormatterBase::do_get_arg(
unsigned arg_index, const char *&error) {
internal::Arg arg = args_[arg_index];
switch (arg.type) {
case internal::Arg::NONE:
error = "argument index out of range";
break;
case internal::Arg::NAMED_ARG:
arg = *static_cast<const internal::Arg*>(arg.pointer);
break;
default:
/*nothing*/;
}
return arg;
}
FMT_FUNC void report_system_error(
int error_code, fmt::StringRef message) FMT_NOEXCEPT {
// 'fmt::' is for bcc32.
report_error(format_system_error, error_code, message);
}
#if FMT_USE_WINDOWS_H
FMT_FUNC void report_windows_error(
int error_code, fmt::StringRef message) FMT_NOEXCEPT {
// 'fmt::' is for bcc32.
report_error(internal::format_windows_error, error_code, message);
}
#endif
FMT_FUNC void print(std::FILE *f, CStringRef format_str, ArgList args) {
MemoryWriter w;
w.write(format_str, args);
std::fwrite(w.data(), 1, w.size(), f);
}
FMT_FUNC void print(CStringRef format_str, ArgList args) {
print(stdout, format_str, args);
}
FMT_FUNC void print_colored(Color c, CStringRef format, ArgList args) {
char escape[] = "\x1b[30m";
escape[3] = static_cast<char>('0' + c);
std::fputs(escape, stdout);
print(format, args);
std::fputs(RESET_COLOR, stdout);
}
#ifndef FMT_HEADER_ONLY
template struct internal::BasicData<void>;
// Explicit instantiations for char.
template void internal::FixedBuffer<char>::grow(std::size_t);
template void internal::ArgMap<char>::init(const ArgList &args);
template FMT_API int internal::CharTraits<char>::format_float(
char *buffer, std::size_t size, const char *format,
unsigned width, int precision, double value);
template FMT_API int internal::CharTraits<char>::format_float(
char *buffer, std::size_t size, const char *format,
unsigned width, int precision, long double value);
// Explicit instantiations for wchar_t.
template void internal::FixedBuffer<wchar_t>::grow(std::size_t);
template void internal::ArgMap<wchar_t>::init(const ArgList &args);
template FMT_API int internal::CharTraits<wchar_t>::format_float(
wchar_t *buffer, std::size_t size, const wchar_t *format,
unsigned width, int precision, double value);
template FMT_API int internal::CharTraits<wchar_t>::format_float(
wchar_t *buffer, std::size_t size, const wchar_t *format,
unsigned width, int precision, long double value);
#endif // FMT_HEADER_ONLY
} // namespace fmt
#ifdef _MSC_VER
# pragma warning(pop)
#endif

File diff suppressed because it is too large Load Diff

@ -1,35 +0,0 @@
/*
Formatting library for C++ - std::ostream support
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
#include "ostream.h"
namespace fmt {
namespace internal {
FMT_FUNC void write(std::ostream &os, Writer &w) {
const char *data = w.data();
typedef internal::MakeUnsigned<std::streamsize>::Type UnsignedStreamSize;
UnsignedStreamSize size = w.size();
UnsignedStreamSize max_size =
internal::to_unsigned((std::numeric_limits<std::streamsize>::max)());
do {
UnsignedStreamSize n = size <= max_size ? size : max_size;
os.write(data, static_cast<std::streamsize>(n));
data += n;
size -= n;
} while (size != 0);
}
}
FMT_FUNC void print(std::ostream &os, CStringRef format_str, ArgList args) {
MemoryWriter w;
w.write(format_str, args);
internal::write(os, w);
}
} // namespace fmt

@ -1,105 +0,0 @@
/*
Formatting library for C++ - std::ostream support
Copyright (c) 2012 - 2016, Victor Zverovich
All rights reserved.
For the license information refer to format.h.
*/
#ifndef FMT_OSTREAM_H_
#define FMT_OSTREAM_H_
#include "format.h"
#include <ostream>
namespace fmt {
namespace internal {
template <class Char>
class FormatBuf : public std::basic_streambuf<Char> {
private:
typedef typename std::basic_streambuf<Char>::int_type int_type;
typedef typename std::basic_streambuf<Char>::traits_type traits_type;
Buffer<Char> &buffer_;
public:
FormatBuf(Buffer<Char> &buffer) : buffer_(buffer) {}
protected:
// The put-area is actually always empty. This makes the implementation
// simpler and has the advantage that the streambuf and the buffer are always
// in sync and sputc never writes into uninitialized memory. The obvious
// disadvantage is that each call to sputc always results in a (virtual) call
// to overflow. There is no disadvantage here for sputn since this always
// results in a call to xsputn.
int_type overflow(int_type ch = traits_type::eof()) FMT_OVERRIDE {
if (!traits_type::eq_int_type(ch, traits_type::eof()))
buffer_.push_back(static_cast<Char>(ch));
return ch;
}
std::streamsize xsputn(const Char *s, std::streamsize count) FMT_OVERRIDE {
buffer_.append(s, s + count);
return count;
}
};
Yes &convert(std::ostream &);
struct DummyStream : std::ostream {
DummyStream(); // Suppress a bogus warning in MSVC.
// Hide all operator<< overloads from std::ostream.
void operator<<(Null<>);
};
No &operator<<(std::ostream &, int);
template<typename T>
struct ConvertToIntImpl<T, true> {
// Convert to int only if T doesn't have an overloaded operator<<.
enum {
value = sizeof(convert(get<DummyStream>() << get<T>())) == sizeof(No)
};
};
// Write the content of w to os.
FMT_API void write(std::ostream &os, Writer &w);
} // namespace internal
// Formats a value.
template <typename Char, typename ArgFormatter_, typename T>
void format_arg(BasicFormatter<Char, ArgFormatter_> &f,
const Char *&format_str, const T &value) {
internal::MemoryBuffer<Char, internal::INLINE_BUFFER_SIZE> buffer;
internal::FormatBuf<Char> format_buf(buffer);
std::basic_ostream<Char> output(&format_buf);
output << value;
BasicStringRef<Char> str(&buffer[0], buffer.size());
typedef internal::MakeArg< BasicFormatter<Char> > MakeArg;
format_str = f.format(format_str, MakeArg(str));
}
/**
\rst
Prints formatted data to the stream *os*.
**Example**::
print(cerr, "Don't {}!", "panic");
\endrst
*/
FMT_API void print(std::ostream &os, CStringRef format_str, ArgList args);
FMT_VARIADIC(void, print, std::ostream &, CStringRef)
} // namespace fmt
#ifdef FMT_HEADER_ONLY
# include "ostream.cc"
#endif
#endif // FMT_OSTREAM_H_

@ -55,7 +55,7 @@ struct GridRef
inline
Vertex vertex(Index idx) const;
Index index(const Vertex& v) const { Index idx = 0; for (unsigned i = 0; i < D; ++i) { idx += ((Index) v[i]) * ((Index) stride_[i]); } return idx; }
Index index(const Vertex& v) const { Index idx = 0; for (unsigned i = 0; i < D; ++i) { idx += ((Index) v[i]) * stride_[i]; } return idx; }
Index size() const { return size(shape()); }
void swap(GridRef& other) { std::swap(data_, other.data_); std::swap(shape_, other.shape_); std::swap(stride_, other.stride_); std::swap(c_order_, other.c_order_); }
@ -73,10 +73,9 @@ struct GridRef
{
Index cur = 1;
if (c_order_)
for (unsigned i = D; i > 0; --i) { stride_[i-1] = cur; cur *= shape_[i-1]; }
for (unsigned i = D; i > 0; --i) { stride_[i-1] = cur; cur *= static_cast<Index>(shape_[i-1]); }
else
for (unsigned i = 0; i < D; ++i) { stride_[i] = cur; cur *= shape_[i]; }
for (unsigned i = 0; i < D; ++i) { stride_[i] = cur; cur *= static_cast<Index>(shape_[i]); }
}
void set_shape(const Vertex& v) { shape_ = v; set_stride(); }
void set_data(C* data) { data_ = data; }
@ -85,7 +84,7 @@ struct GridRef
private:
C* data_;
Vertex shape_;
Vertex stride_;
diy::Point<Index, D> stride_;
bool c_order_;
};
@ -107,8 +106,8 @@ struct Grid: public GridRef<C,D>
Grid():
Parent(new C[0], Vertex::zero()) {}
template<class Int>
Grid(const Point<Int, D>& shape, bool c_order = true):
Parent(new C[size(shape)], shape, c_order)
Grid(const Point<Int, D>& s, bool c_order = true):
Parent(new C[size(s)], s, c_order)
{}
Grid(Grid&& g): Grid() { Parent::swap(g); }
@ -147,11 +146,11 @@ struct Grid: public GridRef<C,D>
private:
template<class OC>
void copy_data(const OC* data)
void copy_data(const OC* data_)
{
Index s = size(shape());
for (Index i = 0; i < s; ++i)
Parent::data()[i] = data[i];
Parent::data()[i] = data_[i];
}
};
@ -181,13 +180,13 @@ vertex(typename GridRef<C, D>::Index idx) const
if (c_order())
for (unsigned i = 0; i < D; ++i)
{
v[i] = idx / stride_[i];
v[i] = static_cast<int>(idx / stride_[i]);
idx %= stride_[i];
}
else
for (int i = D-1; i >= 0; --i)
{
v[i] = idx / stride_[i];
v[i] = static_cast<int>(idx / stride_[i]);
idx %= stride_[i];
}
return v;

@ -205,12 +205,11 @@ namespace io
extra.reset();
// Get local gids from assigner
size_t size = all_offset_counts.size();
assigner.set_nblocks(size);
assigner.set_nblocks(static_cast<int>(all_offset_counts.size()));
std::vector<int> gids;
assigner.local_gids(comm.rank(), gids);
for (unsigned i = 0; i < gids.size(); ++i)
for (size_t i = 0; i < gids.size(); ++i)
{
if (gids[i] != all_offset_counts[gids[i]].gid)
get_logger()->warn("gids don't match in diy::io::read_blocks(), {} vs {}",
@ -342,7 +341,7 @@ namespace split
}
// Get local gids from assigner
assigner.set_nblocks(size);
assigner.set_nblocks(static_cast<int>(size));
std::vector<int> gids;
assigner.local_gids(comm.rank(), gids);

@ -2,11 +2,8 @@
#define VTKMDIY_IO_BOV_HPP
#include <vector>
#include <algorithm>
#include <numeric>
#include "../types.hpp"
#include "../mpi.hpp"
#include "../mpi/io.hpp"
namespace diy
{
@ -39,8 +36,9 @@ namespace io
shape_.push_back(shape[i]);
stride_.push_back(1);
}
for (int i = shape_.size() - 2; i >= 0; --i)
for (auto i = shape_.size() - 2; i == 0; --i)
stride_[i] = stride_[i+1] * shape_[i+1];
stride_[0] = stride_[1] * shape_[1];
}
const Shape& shape() const { return shape_; }
@ -71,50 +69,7 @@ void
diy::io::BOV::
read(const DiscreteBounds& bounds, T* buffer, bool collective, int chunk) const
{
#ifndef VTKM_DIY_NO_MPI
int dim = shape_.size();
int total = 1;
std::vector<int> subsizes;
for (int i = 0; i < dim; ++i)
{
subsizes.push_back(bounds.max[i] - bounds.min[i] + 1);
total *= subsizes.back();
}
MPI_Datatype T_type;
if (chunk == 1)
T_type = mpi::detail::get_mpi_datatype<T>();
else
{
// create an MPI struct of size chunk to read the data in those chunks
// (this allows to work around MPI-IO weirdness where crucial quantities
// are ints, which are too narrow of a type)
int array_of_blocklengths[] = { chunk };
MPI_Aint array_of_displacements[] = { 0 };
MPI_Datatype array_of_types[] = { mpi::detail::get_mpi_datatype<T>() };
MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements, array_of_types, &T_type);
MPI_Type_commit(&T_type);
}
MPI_Datatype fileblk;
MPI_Type_create_subarray(dim, (int*) &shape_[0], &subsizes[0], (int*) &bounds.min[0], MPI_ORDER_C, T_type, &fileblk);
MPI_Type_commit(&fileblk);
MPI_File_set_view(f_.handle(), offset_, T_type, fileblk, (char*)"native", MPI_INFO_NULL);
mpi::status s;
if (!collective)
MPI_File_read(f_.handle(), buffer, total, T_type, &s.s);
else
MPI_File_read_all(f_.handle(), buffer, total, T_type, &s.s);
if (chunk != 1)
MPI_Type_free(&T_type);
MPI_Type_free(&fileblk);
#else
(void) bounds; (void) buffer; (void) collective; (void)chunk;
DIY_UNSUPPORTED_MPI_CALL(diy::io::BOV::read);
#endif
f_.read_bov(bounds, static_cast<int>(shape_.size()), shape_.data(), reinterpret_cast<char*>(buffer), offset_, mpi::detail::get_mpi_datatype<T>(), collective, chunk);
}
template<class T>
@ -130,52 +85,7 @@ void
diy::io::BOV::
write(const DiscreteBounds& bounds, const T* buffer, const DiscreteBounds& core, bool collective, int chunk)
{
#ifndef VTKM_DIY_NO_MPI
int dim = shape_.size();
std::vector<int> subsizes;
std::vector<int> buffer_shape, buffer_start;
for (int i = 0; i < dim; ++i)
{
buffer_shape.push_back(bounds.max[i] - bounds.min[i] + 1);
buffer_start.push_back(core.min[i] - bounds.min[i]);
subsizes.push_back(core.max[i] - core.min[i] + 1);
}
MPI_Datatype T_type;
if (chunk == 1)
T_type = mpi::detail::get_mpi_datatype<T>();
else
{
// assume T is a binary block and create an MPI struct of appropriate size
int array_of_blocklengths[] = { chunk };
MPI_Aint array_of_displacements[] = { 0 };
MPI_Datatype array_of_types[] = { mpi::detail::get_mpi_datatype<T>() };
MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements, array_of_types, &T_type);
MPI_Type_commit(&T_type);
}
MPI_Datatype fileblk, subbuffer;
MPI_Type_create_subarray(dim, (int*) &shape_[0], &subsizes[0], (int*) &core.min[0], MPI_ORDER_C, T_type, &fileblk);
MPI_Type_create_subarray(dim, (int*) &buffer_shape[0], &subsizes[0], (int*) &buffer_start[0], MPI_ORDER_C, T_type, &subbuffer);
MPI_Type_commit(&fileblk);
MPI_Type_commit(&subbuffer);
MPI_File_set_view(f_.handle(), offset_, T_type, fileblk, (char*)"native", MPI_INFO_NULL);
mpi::status s;
if (!collective)
MPI_File_write(f_.handle(), (void*)buffer, 1, subbuffer, &s.s);
else
MPI_File_write_all(f_.handle(), (void*)buffer, 1, subbuffer, &s.s);
if (chunk != 1)
MPI_Type_free(&T_type);
MPI_Type_free(&fileblk);
MPI_Type_free(&subbuffer);
#else
(void) bounds; (void) buffer;(void) core; (void) collective; (void) chunk;
DIY_UNSUPPORTED_MPI_CALL(diy::io::bov::write);
#endif
f_.write_bov(bounds, core, static_cast<int>(shape_.size()), shape_.data(), reinterpret_cast<const char*>(buffer), offset_, mpi::detail::get_mpi_datatype<T>(), collective, chunk);
}
#endif

@ -79,21 +79,21 @@ parse_npy_header(BOV::Shape& shape, bool& fortran_order)
header = header.substr(11, nl - 11 + 1);
size_t header_size = nl + 1;
int loc1, loc2;
size_t loc1, loc2;
//fortran order
loc1 = header.find("fortran_order")+16;
fortran_order = (header.substr(loc1,4) == "True" ? true : false);
//shape
unsigned ndims;
size_t ndims;
loc1 = header.find("(");
loc2 = header.find(")");
std::string str_shape = header.substr(loc1+1,loc2-loc1-1);
if(str_shape[str_shape.size()-1] == ',') ndims = 1;
else ndims = std::count(str_shape.begin(),str_shape.end(),',')+1;
shape.resize(ndims);
for(unsigned int i = 0;i < ndims;i++) {
for(size_t i = 0;i < ndims;i++) {
loc1 = str_shape.find(",");
shape[i] = atoi(str_shape.substr(0,loc1).c_str());
str_shape = str_shape.substr(loc1+1);

@ -31,8 +31,8 @@ class SharedOutFile: public std::ostringstream
// write the file serially
std::ofstream out(filename_);
for (auto& contents : all_contents)
out.write(contents.data(), contents.size());
for (auto& cntnts : all_contents)
out.write(cntnts.data(), cntnts.size());
} else
diy::mpi::gather(world_, contents, root_);
}

@ -14,7 +14,7 @@
#include <cstdlib> // mkstemp() on Linux
#include <sys/stat.h>
#include "../constants.h" // for DIY_UNUSED
#include "../constants.h" // for VTKMDIY_UNUSED
namespace diy
{
@ -82,8 +82,8 @@ namespace utils
_close(fd);
}
#else
auto r = ::truncate(filename.c_str(), static_cast<off_t>(length));
(void) r;
int error = ::truncate(filename.c_str(), static_cast<off_t>(length));
VTKMDIY_UNUSED(error);
#endif
}
@ -141,7 +141,7 @@ namespace utils
inline void sync(int fd)
{
#if defined(_WIN32)
DIY_UNUSED(fd);
VTKMDIY_UNUSED(fd);
#else
fsync(fd);
#endif

@ -1,5 +1,5 @@
#ifndef VTKMDIY_COVER_HPP
#define VTKMDIY_COVER_HPP
#ifndef VTKMDIY_LINK_HPP
#define VTKMDIY_LINK_HPP
#include <vector>
#include <map>
@ -9,14 +9,22 @@
#include "serialization.hpp"
#include "assigner.hpp"
#include "factory.hpp"
namespace diy
{
// Local view of a distributed representation of a cover, a completely unstructured link
class Link
class Link: public Factory<Link>
{
public:
using Neighbors = std::vector<BlockID>;
Link(Key) {} // for Factory
Link() = default;
Link(const Link&) = default;
Link(Link&&) = default;
Link& operator=(const Link&) = default;
Link& operator=(Link&&) = default;
virtual ~Link() {} // need to be able to delete derived classes
int size() const { return static_cast<int>(neighbors_.size()); }
@ -38,11 +46,11 @@ namespace diy
Neighbors&
neighbors() { return neighbors_; }
virtual Link* clone() const { return new Link(*this); }
virtual void save(BinaryBuffer& bb) const { diy::save(bb, neighbors_); }
virtual void load(BinaryBuffer& bb) { diy::load(bb, neighbors_); }
virtual size_t id() const { return 0; }
private:
Neighbors neighbors_;
};
@ -50,32 +58,13 @@ namespace diy
template<class Bounds_>
class RegularLink;
typedef RegularLink<DiscreteBounds> RegularGridLink;
typedef RegularLink<ContinuousBounds> RegularContinuousLink;
// Selector between regular discrete and contious links given bounds type
template<class Bounds_>
struct RegularLinkSelector;
template<>
struct RegularLinkSelector<DiscreteBounds>
{
typedef RegularGridLink type;
static const size_t id = 1;
};
template<>
struct RegularLinkSelector<ContinuousBounds>
{
typedef RegularContinuousLink type;
static const size_t id = 2;
};
using RegularGridLink = RegularLink<DiscreteBounds>;
using RegularContinuousLink = RegularLink<ContinuousBounds>;
// for a regular decomposition, it makes sense to address the neighbors by direction
// and store local and neighbor bounds
template<class Bounds_>
class RegularLink: public Link
class RegularLink: public Link::Registrar<RegularLink<Bounds_>>
{
public:
typedef Bounds_ Bounds;
@ -84,6 +73,8 @@ namespace diy
typedef std::vector<Direction> DirVec;
public:
RegularLink():
dim_(0), core_(0), bounds_(0) {} // for Factory
RegularLink(int dim, const Bounds& core__, const Bounds& bounds__):
dim_(dim), core_(core__), bounds_(bounds__) {}
@ -93,7 +84,7 @@ namespace diy
// direction
int direction(Direction dir) const; // convert direction to a neighbor (-1 if no neighbor)
Direction direction(int i) const { return dir_vec_[i]; }
void add_direction(Direction dir) { int c = dir_map_.size(); dir_map_[dir] = c; dir_vec_.push_back(dir); }
void add_direction(Direction dir) { auto c = static_cast<int>(dir_map_.size()); dir_map_[dir] = c; dir_vec_.push_back(dir); }
// wrap
void add_wrap(Direction dir) { wrap_.push_back(dir); }
@ -105,12 +96,16 @@ namespace diy
Bounds& core() { return core_; }
const Bounds& bounds() const { return bounds_; }
Bounds& bounds() { return bounds_; }
const Bounds& core(int i) const { return nbr_cores_[i]; }
const Bounds& bounds(int i) const { return nbr_bounds_[i]; }
void add_core(const Bounds& core__) { nbr_cores_.push_back(core__); }
void add_bounds(const Bounds& bounds__) { nbr_bounds_.push_back(bounds__); }
void swap(RegularLink& other) { Link::swap(other); dir_map_.swap(other.dir_map_); dir_vec_.swap(other.dir_vec_); nbr_bounds_.swap(other.nbr_bounds_); std::swap(dim_, other.dim_); wrap_.swap(other.wrap_); std::swap(core_, other.core_); std::swap(bounds_, other.bounds_); }
void save(BinaryBuffer& bb) const
Link* clone() const override { return new RegularLink(*this); }
void save(BinaryBuffer& bb) const override
{
Link::save(bb);
diy::save(bb, dim_);
@ -118,11 +113,12 @@ namespace diy
diy::save(bb, dir_vec_);
diy::save(bb, core_);
diy::save(bb, bounds_);
diy::save(bb, nbr_cores_);
diy::save(bb, nbr_bounds_);
diy::save(bb, wrap_);
}
void load(BinaryBuffer& bb)
void load(BinaryBuffer& bb) override
{
Link::load(bb);
diy::load(bb, dim_);
@ -130,12 +126,11 @@ namespace diy
diy::load(bb, dir_vec_);
diy::load(bb, core_);
diy::load(bb, bounds_);
diy::load(bb, nbr_cores_);
diy::load(bb, nbr_bounds_);
diy::load(bb, wrap_);
}
virtual size_t id() const { return RegularLinkSelector<Bounds>::id; }
private:
int dim_;
@ -144,31 +139,139 @@ namespace diy
Bounds core_;
Bounds bounds_;
std::vector<Bounds> nbr_cores_;
std::vector<Bounds> nbr_bounds_;
std::vector<Direction> wrap_;
};
// Other cover candidates: KDTreeLink, AMRGridLink
struct AMRLink: public Link::Registrar<AMRLink>
{
public:
using Bounds = DiscreteBounds;
using Directions = std::vector<Direction>;
using Point = Bounds::Point;
struct Description
{
int level { -1 };
Point refinement { 0 }; // refinement of this level w.r.t. level 0
Bounds core { 0 };
Bounds bounds { 0 }; // with ghosts
Description() = default;
Description(int level_, Point refinement_, Bounds core_, Bounds bounds_):
level(level_), refinement(refinement_), core(core_), bounds(bounds_) {}
};
using Descriptions = std::vector<Description>;
public:
AMRLink(int dim, int level, Point refinement, const Bounds& core, const Bounds& bounds):
dim_(dim), local_ { level, refinement, core, bounds } {}
AMRLink(int dim, int level, int refinement, const Bounds& core, const Bounds& bounds):
AMRLink(dim, level, refinement * Point::one(dim), core, bounds) {}
AMRLink(): AMRLink(0, -1, 0, Bounds(0), Bounds(0)) {} // for Factory
// dimension
int dimension() const { return dim_; }
// local information
int level() const { return local_.level; }
int level(int i) const { return nbr_descriptions_[i].level; }
Point refinement() const { return local_.refinement; }
Point refinement(int i) const { return nbr_descriptions_[i].refinement; }
// wrap
void add_wrap(Direction dir) { wrap_.push_back(dir); }
const Directions&
wrap() const { return wrap_; }
// bounds
const Bounds& core() const { return local_.core; }
Bounds& core() { return local_.core; }
const Bounds& bounds() const { return local_.bounds; }
Bounds& bounds() { return local_.bounds; }
const Bounds& core(int i) const { return nbr_descriptions_[i].core; }
const Bounds& bounds(int i) const { return nbr_descriptions_[i].bounds; }
void add_bounds(int level_,
Point refinement_,
const Bounds& core_,
const Bounds& bounds_) { nbr_descriptions_.emplace_back(Description {level_, refinement_, core_, bounds_}); }
void add_bounds(int level_,
int refinement_,
const Bounds& core_,
const Bounds& bounds_) { add_bounds(level_, refinement_ * Point::one(dim_), core_, bounds_); }
Link* clone() const override { return new AMRLink(*this); }
void save(BinaryBuffer& bb) const override
{
Link::save(bb);
diy::save(bb, dim_);
diy::save(bb, local_);
diy::save(bb, nbr_descriptions_);
diy::save(bb, wrap_);
}
void load(BinaryBuffer& bb) override
{
Link::load(bb);
diy::load(bb, dim_);
diy::load(bb, local_);
diy::load(bb, nbr_descriptions_);
diy::load(bb, wrap_);
}
private:
int dim_;
Description local_;
Descriptions nbr_descriptions_;
Directions wrap_;
};
struct LinkFactory
{
public:
static Link* create(size_t id)
static Link* create(std::string name)
{
// not pretty, but will do for now
if (id == 0)
return new Link;
else if (id == 1)
return new RegularGridLink(0, DiscreteBounds(), DiscreteBounds());
else if (id == 2)
return new RegularContinuousLink(0, ContinuousBounds(), ContinuousBounds());
else
return 0;
return Link::make(name);
}
inline static void save(BinaryBuffer& bb, const Link* l);
inline static Link* load(BinaryBuffer& bb);
};
namespace detail
{
inline void instantiate_common_regular_links()
{
// Instantiate the common types to register them
RegularLink<Bounds<int>> rl_int;
RegularLink<Bounds<float>> rl_float;
RegularLink<Bounds<double>> rl_double;
RegularLink<Bounds<long>> rl_long;
}
}
template<>
struct Serialization<diy::AMRLink::Description>
{
static void save(diy::BinaryBuffer& bb, const diy::AMRLink::Description& x)
{
diy::save(bb, x.level);
diy::save(bb, x.refinement);
diy::save(bb, x.core);
diy::save(bb, x.bounds);
}
static void load(diy::BinaryBuffer& bb, diy::AMRLink::Description& x)
{
diy::load(bb, x.level);
diy::load(bb, x.refinement);
diy::load(bb, x.core);
diy::load(bb, x.bounds);
}
};
}
@ -184,7 +287,7 @@ diy::Link*
diy::LinkFactory::
load(BinaryBuffer& bb)
{
size_t id;
std::string id;
diy::load(bb, id);
Link* l = create(id);
l->load(bb);
@ -223,4 +326,4 @@ direction(Direction dir) const
return it->second;
}
#endif
#endif // VTKMDIY_LINK_HPP

@ -4,8 +4,8 @@
#ifndef VTKMDIY_USE_SPDLOG
#include <memory>
#include "fmt/format.h"
#include "fmt/ostream.h"
#include "thirdparty/fmt/format.h"
#include "thirdparty/fmt/ostream.h"
namespace diy
{
@ -47,12 +47,13 @@ set_logger(Args...)
} // diy
#else // DIY_USE_SPDLOG
#else // VTKMDIY_USE_SPDLOG
#include <string>
#include <spdlog/spdlog.h>
#include <spdlog/sinks/null_sink.h>
#include <spdlog/sinks/stdout_sinks.h>
#include <spdlog/fmt/bundled/format.h>
#include <spdlog/fmt/bundled/ostream.h>
@ -80,10 +81,7 @@ std::shared_ptr<spd::logger>
create_logger(std::string log_level)
{
auto log = spd::stderr_logger_mt("diy");
int lvl;
for (lvl = spd::level::trace; lvl < spd::level::off; ++lvl)
if (spd::level::level_names[lvl] == log_level)
break;
int lvl = spd::level::from_str(log_level);
log->set_level(static_cast<spd::level::level_enum>(lvl));
return log;
}
@ -100,4 +98,4 @@ set_logger(Args... args)
#endif
#endif // DIY_LOG_HPP
#endif // VTKMDIY_LOG_HPP

File diff suppressed because it is too large Load Diff

@ -1,14 +1,9 @@
#ifndef VTKMDIY_MPI_HPP
#define VTKMDIY_MPI_HPP
#ifndef VTKM_DIY_NO_MPI
#include <mpi.h>
#else
#include "mpi/no-mpi.hpp"
#endif
#include "mpi/constants.hpp"
#include "mpi/config.hpp"
#include "mpi/datatypes.hpp"
#include "mpi/environment.hpp"
#include "mpi/optional.hpp"
#include "mpi/status.hpp"
#include "mpi/request.hpp"
@ -18,54 +13,4 @@
#include "mpi/io.hpp"
#include "mpi/window.hpp"
namespace diy
{
namespace mpi
{
//! \ingroup MPI
struct environment
{
inline environment(int threading = MPI_THREAD_FUNNELED);
inline environment(int argc, char* argv[], int threading = MPI_THREAD_FUNNELED);
inline ~environment();
int threading() const { return provided_threading; }
int provided_threading;
};
}
}
diy::mpi::environment::
environment(int threading)
{
#ifndef VTKM_DIY_NO_MPI
int argc = 0; char** argv;
MPI_Init_thread(&argc, &argv, threading, &provided_threading);
#else
provided_threading = threading;
#endif
}
diy::mpi::environment::
environment(int argc, char* argv[], int threading)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Init_thread(&argc, &argv, threading, &provided_threading);
#else
(void) argc; (void) argv;
provided_threading = threading;
#endif
}
diy::mpi::environment::
~environment()
{
#ifndef VTKM_DIY_NO_MPI
MPI_Finalize();
#endif
}
#endif
#endif // VTKMDIY_MPI_HPP

@ -0,0 +1,161 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "collectives.hpp"
#endif
namespace diy
{
namespace mpi
{
namespace detail
{
inline void copy_buffer(const void* src, void* dst, size_t size, int count)
{
if (src != dst)
{
std::copy_n(static_cast<const int8_t*>(src),
size * static_cast<size_t>(count),
static_cast<int8_t*>(dst));
}
}
void broadcast(const communicator& comm, void* data, int count, const datatype& type, int root)
{
#if VTKMDIY_HAS_MPI
MPI_Bcast(data, count, mpi_cast(type.handle), root, mpi_cast(comm.handle()));
#else
(void) comm; (void) data; (void) count; (void) type; (void) root;
#endif
}
request ibroadcast(const communicator& comm, void* data, int count, const datatype& type, int root)
{
request r;
#if VTKMDIY_HAS_MPI
MPI_Ibcast(data, count, mpi_cast(type.handle), root, mpi_cast(comm.handle()), &mpi_cast(r.handle));
#else
(void) comm; (void) data; (void) count; (void) type; (void) root;
#endif
return r;
}
void gather(const communicator& comm,
const void* dataIn, int count, const datatype& type, void* dataOut,
int root)
{
#if VTKMDIY_HAS_MPI
MPI_Gather(dataIn, count, mpi_cast(type.handle),
dataOut, count, mpi_cast(type.handle),
root, mpi_cast(comm.handle()));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
(void)comm; (void)root;
#endif
}
void gather_v(const communicator& comm,
const void* dataIn, int countIn, const datatype& type,
void* dataOut, const int counts[], const int offsets[],
int root)
{
#if VTKMDIY_HAS_MPI
MPI_Gatherv(dataIn, countIn, mpi_cast(type.handle),
dataOut, counts, offsets, mpi_cast(type.handle),
root, mpi_cast(comm.handle()));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), countIn);
(void)comm; (void)counts, (void)offsets, (void)root;
#endif
}
void all_gather(const communicator& comm,
const void* dataIn, int count, const datatype& type, void* dataOut)
{
#if VTKMDIY_HAS_MPI
MPI_Allgather(dataIn, count, mpi_cast(type.handle),
dataOut, count, mpi_cast(type.handle),
mpi_cast(comm.handle()));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
(void)comm;
#endif
}
void all_gather_v(const communicator& comm,
const void* dataIn, int countIn, const datatype& type,
void* dataOut, const int counts[], const int offsets[])
{
#if VTKMDIY_HAS_MPI
MPI_Allgatherv(dataIn, countIn, mpi_cast(type.handle),
dataOut, counts, offsets, mpi_cast(type.handle),
mpi_cast(comm.handle()));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), countIn);
(void)comm; (void)counts; (void)offsets;
#endif
}
void reduce(const communicator& comm,
const void* dataIn, int count, const datatype& type, void* dataOut,
const operation& op, int root)
{
#if VTKMDIY_HAS_MPI
MPI_Reduce(dataIn, dataOut, count, mpi_cast(type.handle), mpi_cast(op.handle), root, mpi_cast(comm.handle()));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
(void)comm; (void)op; (void)root;
#endif
}
void all_reduce(const communicator& comm,
const void* dataIn, void* dataOut, int count, const datatype& type,
const operation& op)
{
#if VTKMDIY_HAS_MPI
MPI_Allreduce(dataIn, dataOut, count, mpi_cast(type.handle), mpi_cast(op.handle), mpi_cast(comm.handle()));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
(void)comm; (void)op;
#endif
}
request iall_reduce(const communicator& comm,
const void* dataIn, void* dataOut, int count, const datatype& type,
const operation& op)
{
request r;
#if VTKMDIY_HAS_MPI
MPI_Iallreduce(dataIn, dataOut, count, mpi_cast(type.handle), mpi_cast(op.handle), mpi_cast(comm.handle()), &mpi_cast(r.handle));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
(void)comm; (void)op;
#endif
return r;
}
void scan(const communicator& comm,
const void* dataIn, void* dataOut, int count, const datatype& type,
const operation& op)
{
#if VTKMDIY_HAS_MPI
MPI_Scan(dataIn, dataOut, count, mpi_cast(type.handle), mpi_cast(op.handle), mpi_cast(comm.handle()));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
(void)comm; (void)op;
#endif
}
void all_to_all(const communicator& comm,
const void* dataIn, int count, const datatype& type, void* dataOut)
{
#if VTKMDIY_HAS_MPI
MPI_Alltoall(dataIn, count, mpi_cast(type.handle), dataOut, count, mpi_cast(type.handle), mpi_cast(comm.handle()));
#else
copy_buffer(dataIn, dataOut, mpi_cast(type.handle), count);
(void)comm;
#endif
}
}
}
} // diy::mpi::detail

@ -1,12 +1,80 @@
#include <vector>
#ifndef VTKMDIY_MPI_COLLECTIVES_HPP
#define VTKMDIY_MPI_COLLECTIVES_HPP
#include "../constants.h" // for DIY_UNUSED.
#include "config.hpp"
#include "communicator.hpp"
#include "datatypes.hpp"
#include "operations.hpp"
#include "request.hpp"
#include <algorithm>
#include <vector>
#include <numeric>
namespace diy
{
namespace mpi
{
namespace detail
{
VTKMDIY_MPI_EXPORT_FUNCTION
void broadcast(const communicator& comm,
void* data, int count, const datatype& type,
int root);
VTKMDIY_MPI_EXPORT_FUNCTION
request ibroadcast(const communicator& comm,
void* data, int count, const datatype& type,
int root);
VTKMDIY_MPI_EXPORT_FUNCTION
void gather(const communicator& comm,
const void* dataIn, int count, const datatype& type, void* dataOut,
int root);
VTKMDIY_MPI_EXPORT_FUNCTION
void gather_v(const communicator& comm,
const void* dataIn, int countIn, const datatype& type,
void* dataOut, const int counts[], const int offsets[],
int root);
VTKMDIY_MPI_EXPORT_FUNCTION
void all_gather(const communicator& comm,
const void* dataIn, int count, const datatype& type, void* dataOut);
VTKMDIY_MPI_EXPORT_FUNCTION
void all_gather_v(const communicator& comm,
const void* dataIn, int countIn, const datatype& type,
void* dataOut, const int counts[], const int offsets[]);
VTKMDIY_MPI_EXPORT_FUNCTION
void reduce(const communicator& comm,
const void* dataIn, int count, const datatype& type, void* dataOut,
const operation& op, int root);
VTKMDIY_MPI_EXPORT_FUNCTION
void all_reduce(const communicator& comm,
const void* dataIn, void* dataOut, int count, const datatype& type,
const operation& op);
VTKMDIY_MPI_EXPORT_FUNCTION
request iall_reduce(const communicator& comm,
const void* dataIn, void* dataOut, int count, const datatype& type,
const operation& op);
VTKMDIY_MPI_EXPORT_FUNCTION
void scan(const communicator& comm,
const void* dataIn, void* dataOut, int count, const datatype& type,
const operation& op);
VTKMDIY_MPI_EXPORT_FUNCTION
void all_to_all(const communicator& comm,
const void* dataIn, int count, const datatype& type, void* dataOut);
} // detail
//!\addtogroup MPI
//!@{
@ -15,259 +83,181 @@ namespace mpi
{
static void broadcast(const communicator& comm, T& x, int root)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Bcast(address(x), count(x), datatype(x), root, comm);
#else
DIY_UNUSED(comm);
DIY_UNUSED(x);
DIY_UNUSED(root);
#endif
detail::broadcast(comm, address(x), count(x), datatype_of(x), root);
}
static void broadcast(const communicator& comm, std::vector<T>& x, int root)
{
#ifndef VTKM_DIY_NO_MPI
size_t sz = x.size();
Collectives<size_t, void*>::broadcast(comm, sz, root);
detail::broadcast(comm, &sz, 1, datatype_of(sz), root);
if (comm.rank() != root)
x.resize(sz);
MPI_Bcast(address(x), count(x), datatype(x), root, comm);
#else
DIY_UNUSED(comm);
DIY_UNUSED(x);
DIY_UNUSED(root);
#endif
detail::broadcast(comm, address(x), count(x), datatype_of(x), root);
}
static request ibroadcast(const communicator& comm, T& x, int root)
{
#ifndef VTKM_DIY_NO_MPI
request r;
MPI_Ibcast(address(x), count(x), datatype(x), root, comm, &r.r);
return r;
#else
DIY_UNUSED(comm);
DIY_UNUSED(x);
DIY_UNUSED(root);
DIY_UNSUPPORTED_MPI_CALL(MPI_Ibcast);
#endif
return detail::ibroadcast(comm, address(x), count(x), datatype_of(x), root);
}
static void gather(const communicator& comm, const T& in, std::vector<T>& out, int root)
{
out.resize(comm.size());
#ifndef VTKM_DIY_NO_MPI
MPI_Gather(address(in), count(in), datatype(in), address(out), count(in), datatype(out), root, comm);
#else
DIY_UNUSED(comm);
DIY_UNUSED(root);
out[0] = in;
#endif
detail::gather(comm, address(in), count(in), datatype_of(in), address(out), root);
}
static void gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out, int root)
{
#ifndef VTKM_DIY_NO_MPI
std::vector<int> counts(comm.size());
std::vector<int> counts;
if (comm.rank() == root)
{
counts.resize(static_cast<size_t>(comm.size()));
}
Collectives<int,void*>::gather(comm, count(in), counts, root);
std::vector<int> offsets(comm.size(), 0);
for (unsigned i = 1; i < offsets.size(); ++i)
offsets[i] = offsets[i-1] + counts[i-1];
std::vector<int> offsets;
if (comm.rank() == root)
{
offsets.resize(counts.size());
offsets[0] = 0;
std::partial_sum(counts.begin(), counts.end() - 1, offsets.begin() + 1);
}
int elem_size = count(in[0]); // size of 1 vector element in units of mpi datatype
std::vector<T> buffer((offsets.back() + counts.back()) / elem_size);
MPI_Gatherv(address(in), count(in), datatype(in),
address(buffer),
&counts[0],
&offsets[0],
datatype(buffer),
root, comm);
out.resize(comm.size());
size_t cur = 0;
for (unsigned i = 0; i < (unsigned)comm.size(); ++i)
std::vector<T> buffer;
if (comm.rank() == root)
{
out[i].reserve(counts[i] / elem_size);
for (unsigned j = 0; j < (unsigned)(counts[i] / elem_size); ++j)
out[i].push_back(buffer[cur++]);
buffer.resize((offsets.back() + counts.back()) / elem_size);
}
detail::gather_v(comm, address(in), count(in), datatype_of(in),
address(buffer), counts.data(), offsets.data(),
root);
if (comm.rank() == root)
{
out.resize(static_cast<size_t>(comm.size()));
size_t offset = 0;
for (size_t i = 0; i < out.size(); ++i)
{
auto count = static_cast<size_t>(counts[i] / elem_size);
out[i].insert(out[i].end(), buffer.data() + offset, buffer.data() + offset + count);
offset += count;
}
}
#else
DIY_UNUSED(comm);
DIY_UNUSED(root);
out.resize(1);
out[0] = in;
#endif
}
static void gather(const communicator& comm, const T& in, int root)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Gather(address(in), count(in), datatype(in), address(in), count(in), datatype(in), root, comm);
#else
DIY_UNUSED(comm);
DIY_UNUSED(in);
DIY_UNUSED(root);
DIY_UNSUPPORTED_MPI_CALL("MPI_Gather");
#endif
detail::gather(comm, address(in), count(in), datatype_of(in), address(in), root);
}
static void gather(const communicator& comm, const std::vector<T>& in, int root)
{
#ifndef VTKM_DIY_NO_MPI
Collectives<int,void*>::gather(comm, count(in), root);
MPI_Gatherv(address(in), count(in), datatype(in),
0, 0, 0,
datatype(in),
root, comm);
#else
DIY_UNUSED(comm);
DIY_UNUSED(in);
DIY_UNUSED(root);
DIY_UNSUPPORTED_MPI_CALL("MPI_Gatherv");
#endif
detail::gather_v(comm, address(in), count(in), datatype_of(in), 0, 0, 0, root);
}
static void all_gather(const communicator& comm, const T& in, std::vector<T>& out)
{
out.resize(comm.size());
#ifndef VTKM_DIY_NO_MPI
MPI_Allgather(address(in), count(in), datatype(in),
address(out), count(in), datatype(in),
comm);
#else
DIY_UNUSED(comm);
out[0] = in;
#endif
detail::all_gather(comm, address(in), count(in), datatype_of(in), address(out));
}
static void all_gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out)
{
#ifndef VTKM_DIY_NO_MPI
std::vector<int> counts(comm.size());
std::vector<int> counts(static_cast<size_t>(comm.size()));
Collectives<int,void*>::all_gather(comm, count(in), counts);
std::vector<int> offsets(comm.size(), 0);
for (unsigned i = 1; i < offsets.size(); ++i)
offsets[i] = offsets[i-1] + counts[i-1];
std::vector<int> offsets(counts.size());
offsets[0] = 0;
std::partial_sum(counts.begin(), counts.end() - 1, offsets.begin() + 1);
int elem_size = count(in[0]); // size of 1 vector element in units of mpi datatype
std::vector<T> buffer((offsets.back() + counts.back()) / elem_size);
MPI_Allgatherv(address(in), count(in), datatype(in),
address(buffer),
&counts[0],
&offsets[0],
datatype(buffer),
comm);
detail::all_gather_v(comm,
address(in), count(in), datatype_of(in),
address(buffer),
&counts[0],
&offsets[0]);
out.resize(comm.size());
size_t cur = 0;
for (int i = 0; i < comm.size(); ++i)
out.resize(static_cast<size_t>(comm.size()));
size_t offset = 0;
for (size_t i = 0; i < out.size(); ++i)
{
out[i].reserve(counts[i] / elem_size);
for (int j = 0; j < (int)(counts[i] / elem_size); ++j)
out[i].push_back(buffer[cur++]);
auto count = static_cast<size_t>(counts[i] / elem_size);
out[i].insert(out[i].end(), buffer.data() + offset, buffer.data() + offset + count);
offset += count;
}
#else
DIY_UNUSED(comm);
out.resize(1);
out[0] = in;
#endif
}
static void reduce(const communicator& comm, const T& in, T& out, int root, const Op&)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Reduce(address(in), address(out), count(in), datatype(in),
detail::mpi_op<Op>::get(),
root, comm);
#else
DIY_UNUSED(comm);
DIY_UNUSED(root);
out = in;
#endif
auto op = detail::mpi_op<Op>::get();
detail::reduce(comm, address(in), count(in), datatype_of(in), address(out), op, root);
}
static void reduce(const communicator& comm, const T& in, int root, const Op&)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Reduce(address(in), address(in), count(in), datatype(in),
detail::mpi_op<Op>::get(),
root, comm);
#else
DIY_UNUSED(comm);
DIY_UNUSED(in);
DIY_UNUSED(root);
DIY_UNSUPPORTED_MPI_CALL("MPI_Reduce");
#endif
auto op = detail::mpi_op<Op>::get();
detail::reduce(comm, address(in), count(in), datatype_of(in), address(in), op, root);
}
static void all_reduce(const communicator& comm, const T& in, T& out, const Op&)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Allreduce(address(in), address(out), count(in), datatype(in),
detail::mpi_op<Op>::get(),
comm);
#else
DIY_UNUSED(comm);
out = in;
#endif
auto op = detail::mpi_op<Op>::get();
detail::all_reduce(comm, address(in), address(out), count(in), datatype_of(in), op);
}
static void all_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op&)
{
#ifndef VTKM_DIY_NO_MPI
auto op = detail::mpi_op<Op>::get();
out.resize(in.size());
MPI_Allreduce(address(in), address(out), count(in),
datatype(in),
detail::mpi_op<Op>::get(),
comm);
#else
DIY_UNUSED(comm);
out = in;
#endif
detail::all_reduce(comm, address(in), address(out), count(in), datatype_of(in), op);
}
static request iall_reduce(const communicator& comm, const T& in, T& out, const Op&)
{
auto op = detail::mpi_op<Op>::get();
return detail::iall_reduce(comm, address(in), address(out), count(in), datatype_of(in), op);
}
static request iall_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op&)
{
auto op = detail::mpi_op<Op>::get();
out.resize(in.size());
return detail::iall_reduce(comm, address(in), address(out), count(in), datatype_of(in), op);
}
static void scan(const communicator& comm, const T& in, T& out, const Op&)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Scan(address(in), address(out), count(in), datatype(in),
detail::mpi_op<Op>::get(),
comm);
#else
DIY_UNUSED(comm);
out = in;
#endif
auto op = detail::mpi_op<Op>::get();
detail::scan(comm, address(in), address(out), count(in), datatype_of(in), op);
}
static void all_to_all(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, int n = 1)
{
#ifndef VTKM_DIY_NO_MPI
// n specifies how many elements go to/from every process from every process;
// the sizes of in and out are expected to be n * comm.size()
int elem_size = count(in[0]); // size of 1 vector element in units of mpi datatype
// NB: this will fail if T is a vector
MPI_Alltoall(address(in),
elem_size * n,
datatype(in),
address(out),
elem_size * n,
datatype(out),
comm);
#else
DIY_UNUSED(comm);
DIY_UNUSED(n);
out = in;
#endif
detail::all_to_all(comm, address(in), elem_size * n, datatype_of(in), address(out));
}
};
//! iBarrier; standalone function version for completeness
inline request ibarrier(const communicator& comm)
{
return comm.ibarrier();
}
//! Broadcast to all processes in `comm`.
template<class T>
inline
void broadcast(const communicator& comm, T& x, int root)
{
Collectives<T,void*>::broadcast(comm, x, root);
@ -275,6 +265,7 @@ namespace mpi
//! Broadcast for vectors
template<class T>
inline
void broadcast(const communicator& comm, std::vector<T>& x, int root)
{
Collectives<T,void*>::broadcast(comm, x, root);
@ -291,6 +282,7 @@ namespace mpi
//! On `root` process, `out` is resized to `comm.size()` and filled with
//! elements from the respective ranks.
template<class T>
inline
void gather(const communicator& comm, const T& in, std::vector<T>& out, int root)
{
Collectives<T,void*>::gather(comm, in, out, root);
@ -298,6 +290,7 @@ namespace mpi
//! Same as above, but for vectors.
template<class T>
inline
void gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out, int root)
{
Collectives<T,void*>::gather(comm, in, out, root);
@ -305,6 +298,7 @@ namespace mpi
//! Simplified version (without `out`) for use on non-root processes.
template<class T>
inline
void gather(const communicator& comm, const T& in, int root)
{
Collectives<T,void*>::gather(comm, in, root);
@ -312,6 +306,7 @@ namespace mpi
//! Simplified version (without `out`) for use on non-root processes.
template<class T>
inline
void gather(const communicator& comm, const std::vector<T>& in, int root)
{
Collectives<T,void*>::gather(comm, in, root);
@ -321,6 +316,7 @@ namespace mpi
//! `out` is resized to `comm.size()` and filled with
//! elements from the respective ranks.
template<class T>
inline
void all_gather(const communicator& comm, const T& in, std::vector<T>& out)
{
Collectives<T,void*>::all_gather(comm, in, out);
@ -328,6 +324,7 @@ namespace mpi
//! Same as above, but for vectors.
template<class T>
inline
void all_gather(const communicator& comm, const std::vector<T>& in, std::vector< std::vector<T> >& out)
{
Collectives<T,void*>::all_gather(comm, in, out);
@ -335,6 +332,7 @@ namespace mpi
//! reduce
template<class T, class Op>
inline
void reduce(const communicator& comm, const T& in, T& out, int root, const Op& op)
{
Collectives<T, Op>::reduce(comm, in, out, root, op);
@ -342,6 +340,7 @@ namespace mpi
//! Simplified version (without `out`) for use on non-root processes.
template<class T, class Op>
inline
void reduce(const communicator& comm, const T& in, int root, const Op& op)
{
Collectives<T, Op>::reduce(comm, in, root, op);
@ -349,6 +348,7 @@ namespace mpi
//! all_reduce
template<class T, class Op>
inline
void all_reduce(const communicator& comm, const T& in, T& out, const Op& op)
{
Collectives<T, Op>::all_reduce(comm, in, out, op);
@ -356,13 +356,32 @@ namespace mpi
//! Same as above, but for vectors.
template<class T, class Op>
inline
void all_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op& op)
{
Collectives<T, Op>::all_reduce(comm, in, out, op);
}
//! iall_reduce
template<class T, class Op>
inline
request iall_reduce(const communicator& comm, const T& in, T& out, const Op& op)
{
return Collectives<T, Op>::iall_reduce(comm, in, out, op);
}
//! Same as above, but for vectors.
template<class T, class Op>
inline
request iall_reduce(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, const Op& op)
{
return Collectives<T, Op>::iall_reduce(comm, in, out, op);
}
//! scan
template<class T, class Op>
inline
void scan(const communicator& comm, const T& in, T& out, const Op& op)
{
Collectives<T, Op>::scan(comm, in, out, op);
@ -370,6 +389,7 @@ namespace mpi
//! all_to_all
template<class T>
inline
void all_to_all(const communicator& comm, const std::vector<T>& in, std::vector<T>& out, int n = 1)
{
Collectives<T, void*>::all_to_all(comm, in, out, n);
@ -378,3 +398,9 @@ namespace mpi
//!@}
}
}
#ifndef VTKMDIY_MPI_AS_LIB
#include "collectives.cpp"
#endif
#endif // VTKMDIY_MPI_COLLECTIVES_HPP

@ -0,0 +1,130 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "communicator.hpp"
#endif
diy::mpi::communicator::communicator()
: comm_(make_DIY_MPI_Comm(MPI_COMM_WORLD)), rank_(0), size_(1), owner_(false)
{
#if VTKMDIY_HAS_MPI
MPI_Comm_rank(mpi_cast(comm_), &rank_);
MPI_Comm_size(mpi_cast(comm_), &size_);
#endif
}
diy::mpi::communicator::
communicator(DIY_MPI_Comm comm, bool owner):
comm_(comm), rank_(0), size_(1), owner_(owner)
{
#if VTKMDIY_HAS_MPI
if (mpi_cast(comm_) != MPI_COMM_NULL)
{
MPI_Comm_rank(mpi_cast(comm_), &rank_);
MPI_Comm_size(mpi_cast(comm_), &size_);
}
#endif
}
#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
diy::mpi::communicator::
communicator(MPI_Comm comm, bool owner):
comm_(comm), rank_(0), size_(1), owner_(owner)
{
#if VTKMDIY_HAS_MPI
if (comm_ != MPI_COMM_NULL)
{
MPI_Comm_rank(comm_, &rank_);
MPI_Comm_size(comm_, &size_);
}
#endif
}
#endif
void
diy::mpi::communicator::
destroy()
{
#if VTKMDIY_HAS_MPI
if (owner_)
MPI_Comm_free(&mpi_cast(comm_));
#endif
}
diy::mpi::status
diy::mpi::communicator::
probe(int source, int tag) const
{
#if VTKMDIY_HAS_MPI
status s;
MPI_Probe(source, tag, mpi_cast(comm_), &mpi_cast(s.handle));
return s;
#else
(void) source; (void) tag;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Probe);
#endif
}
diy::mpi::optional<diy::mpi::status>
diy::mpi::communicator::
iprobe(int source, int tag) const
{
(void) source; (void) tag;
#if VTKMDIY_HAS_MPI
status s;
int flag;
MPI_Iprobe(source, tag, mpi_cast(comm_), &flag, &mpi_cast(s.handle));
if (flag)
return s;
#endif
return optional<status>();
}
void
diy::mpi::communicator::
barrier() const
{
#if VTKMDIY_HAS_MPI
MPI_Barrier(mpi_cast(comm_));
#endif
}
diy::mpi::communicator
diy::mpi::communicator::
split(int color, int key) const
{
#if VTKMDIY_HAS_MPI
DIY_MPI_Comm newcomm;
MPI_Comm_split(mpi_cast(comm_), color, key, &mpi_cast(newcomm));
return communicator(newcomm, true);
#else
(void) color; (void) key;
return communicator();
#endif
}
diy::mpi::request
diy::mpi::communicator::
ibarrier() const
{
#if VTKMDIY_HAS_MPI
request r;
MPI_Ibarrier(mpi_cast(comm_), &mpi_cast(r.handle));
return r;
#else
// this is not the ideal fix; in principle we should just return a status
// that tests true, but this requires redesigning some parts of our no-mpi
// handling
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Ibarrier);
#endif
}
void
diy::mpi::communicator::
duplicate(const communicator& other)
{
#if VTKMDIY_HAS_MPI
DIY_MPI_Comm newcomm;
MPI_Comm_dup(mpi_cast(other.comm_), &mpi_cast(newcomm));
(*this) = communicator(newcomm,true);
#endif
(void) other;
}

@ -1,3 +1,12 @@
#ifndef VTKMDIY_MPI_COMMUNICATOR_HPP
#define VTKMDIY_MPI_COMMUNICATOR_HPP
#include "config.hpp"
#include "optional.hpp"
#include "point-to-point.hpp"
#include "request.hpp"
#include "status.hpp"
namespace diy
{
namespace mpi
@ -8,8 +17,14 @@ namespace mpi
class communicator
{
public:
inline
communicator(MPI_Comm comm = MPI_COMM_WORLD, bool owner = false);
VTKMDIY_MPI_EXPORT_FUNCTION
communicator();
communicator(DIY_MPI_Comm comm):
communicator(comm, false) {}
VTKMDIY_MPI_EXPORT_FUNCTION
communicator(DIY_MPI_Comm comm, bool owner);
~communicator() { destroy(); }
@ -25,9 +40,19 @@ namespace mpi
size_(other.size_),
owner_(other.owner_) { other.owner_ = false; }
communicator&
#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
communicator(MPI_Comm comm):
communicator(comm, false) {}
VTKMDIY_MPI_EXPORT_FUNCTION
communicator(MPI_Comm comm, bool owner);
operator MPI_Comm() { return comm_; }
#endif
communicator&
operator=(const communicator& other) { destroy(); comm_ = other.comm_; rank_ = other.rank_; size_ = other.size_; owner_ = false; return *this; }
communicator&
communicator&
operator=(communicator&& other) { destroy(); comm_ = other.comm_; rank_ = other.rank_; size_ = other.size_; owner_ = other.owner_; other.owner_ = false; return *this; }
int rank() const { return rank_; }
@ -35,193 +60,71 @@ namespace mpi
//! Send `x` to processor `dest` using `tag` (blocking).
template<class T>
void send(int dest, int tag, const T& x) const { detail::send<T>()(comm_, dest, tag, x); }
void send(int dest, int tag, const T& x) const { detail::send(comm_, dest, tag, x); }
//! Receive `x` from `dest` using `tag` (blocking).
//! If `T` is an `std::vector<...>`, `recv` will resize it to fit exactly the sent number of values.
template <class T>
status recv(int source, int tag, T &x) const
{
#if defined(VTKM_DIY_NO_MPI) && defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ < 8 // CUDA 7.5 workaround
(void) source; (void)tag; (void)x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Recv);
#else
return detail::recv<T>{}(comm_, source, tag, x);
#endif
}
template<class T>
status recv(int source, int tag, T& x) const { return detail::recv(comm_, source, tag, x); }
//! Non-blocking version of `send()`.
template <class T>
request isend(int dest, int tag, const T &x) const
{
#if defined(VTKM_DIY_NO_MPI) && defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ < 8 // CUDA 7.5 workaround
(void) dest; (void)tag; (void)x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Send);
#else
return detail::isend<T>{}(comm_, dest, tag, x);
#endif
}
template<class T>
request isend(int dest, int tag, const T& x) const { return detail::isend(comm_, dest, tag, x); }
//! Non-blocking version of `ssend()`.
template<class T>
request issend(int dest, int tag, const T& x) const { return detail::issend<T>()(comm_, dest, tag, x); }
request issend(int dest, int tag, const T& x) const { return detail::issend(comm_, dest, tag, x); }
//! Non-blocking version of `recv()`.
//! If `T` is an `std::vector<...>`, its size must be big enough to accommodate the sent values.
template <class T>
request irecv(int source, int tag, T &x) const
{
#if defined(VTKM_DIY_NO_MPI) && defined(__CUDACC_VER_MAJOR__) && __CUDACC_VER_MAJOR__ < 8 // CUDA 7.5 workaround
(void)source; (void)tag; (void)x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Irecv);
#else
return detail::irecv<T>()(comm_, source, tag, x);
#endif
}
template<class T>
request irecv(int source, int tag, T& x) const { return detail::irecv(comm_, source, tag, x); }
//! probe
inline
VTKMDIY_MPI_EXPORT_FUNCTION
status probe(int source, int tag) const;
//! iprobe
inline
VTKMDIY_MPI_EXPORT_FUNCTION
optional<status>
iprobe(int source, int tag) const;
//! barrier
inline
VTKMDIY_MPI_EXPORT_FUNCTION
void barrier() const;
//! Nonblocking version of barrier
inline
VTKMDIY_MPI_EXPORT_FUNCTION
request ibarrier() const;
operator MPI_Comm() const { return comm_; }
//! split
//! When keys are the same, the ties are broken by the rank in the original comm.
inline
VTKMDIY_MPI_EXPORT_FUNCTION
communicator
split(int color, int key = 0) const;
//! duplicate
inline
VTKMDIY_MPI_EXPORT_FUNCTION
void duplicate(const communicator& other);
DIY_MPI_Comm handle() const { return comm_; }
private:
inline
VTKMDIY_MPI_EXPORT_FUNCTION
void destroy();
private:
MPI_Comm comm_;
int rank_;
int size_;
bool owner_;
DIY_MPI_Comm comm_;
int rank_;
int size_;
bool owner_;
};
}
}
diy::mpi::communicator::
communicator(MPI_Comm comm, bool owner):
comm_(comm), rank_(0), size_(1), owner_(owner)
{
#ifndef VTKM_DIY_NO_MPI
if (comm != MPI_COMM_NULL)
{
MPI_Comm_rank(comm_, &rank_);
MPI_Comm_size(comm_, &size_);
}
}
} // diy::mpi
#ifndef VTKMDIY_MPI_AS_LIB
#include "communicator.cpp"
#endif
}
void
diy::mpi::communicator::
destroy()
{
#ifndef VTKM_DIY_NO_MPI
if (owner_)
MPI_Comm_free(&comm_);
#endif
}
diy::mpi::status
diy::mpi::communicator::
probe(int source, int tag) const
{
(void) source;
(void) tag;
#ifndef VTKM_DIY_NO_MPI
status s;
MPI_Probe(source, tag, comm_, &s.s);
return s;
#else
DIY_UNSUPPORTED_MPI_CALL(MPI_Probe);
#endif
}
diy::mpi::optional<diy::mpi::status>
diy::mpi::communicator::
iprobe(int source, int tag) const
{
(void) source;
(void) tag;
#ifndef VTKM_DIY_NO_MPI
status s;
int flag;
MPI_Iprobe(source, tag, comm_, &flag, &s.s);
if (flag)
return s;
#endif
return optional<status>();
}
void
diy::mpi::communicator::
barrier() const
{
#ifndef VTKM_DIY_NO_MPI
MPI_Barrier(comm_);
#endif
}
diy::mpi::communicator
diy::mpi::communicator::
split(int color, int key) const
{
#ifndef VTKM_DIY_NO_MPI
MPI_Comm newcomm;
MPI_Comm_split(comm_, color, key, &newcomm);
return communicator(newcomm, true);
#else
return communicator();
#endif
}
diy::mpi::request
diy::mpi::communicator::
ibarrier() const
{
#ifndef VTKM_DIY_NO_MPI
request r_;
MPI_Ibarrier(comm_, &r_.r);
return r_;
#else
// this is not the ideal fix; in principle we should just return a status
// that tests true, but this requires redesigning some parts of our no-mpi
// handling
DIY_UNSUPPORTED_MPI_CALL(MPI_Ibarrier);
#endif
}
void
diy::mpi::communicator::
duplicate(const communicator& other)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Comm newcomm;
MPI_Comm_dup(other.comm_, &newcomm);
(*this) = communicator(newcomm,true);
#endif
}
#endif // VTKMDIY_MPI_COMMUNICATOR_HPP

@ -0,0 +1,70 @@
#ifndef VTKMDIY_MPI_CONFIG_HPP
#define VTKMDIY_MPI_CONFIG_HPP
/// We want to allow the use of `diy::mpi` in either header-only or library mode.
/// VTKMDIY_MPI_AS_LIB is defined when using library mode.
/// This file contains some configuration macros. To maintain backwards compatibility
/// suitable default values should be defined when using header-only mode.
/// VTKMDIY_HAS_MPI should always be defined when VTKMDIY_MPI_AS_LIB is defined, but only for
/// the compilation units that are part of the library.
/// VTKMDIY_HAS_MPI=1 means MPI library is availalbe.
/// For header-only, the default is to assume MPI is available
#if !defined(VTKMDIY_MPI_AS_LIB) && !defined(VTKMDIY_HAS_MPI)
# define VTKMDIY_HAS_MPI 1
#endif
/// Include appropriate mpi header. Since VTKMDIY_HAS_MPI is only defined for
/// the compilation units of the library, when in library mode, the header is
/// only included for the library's compilation units.
#ifdef VTKMDIY_HAS_MPI
# if VTKMDIY_HAS_MPI
# include <mpi.h>
# else
# include "no-mpi.hpp"
# endif
#endif
/// Classes and objects that need to be visible to clients of the library should be
/// marked as VTKMDIY_MPI_EXPORT. Similarly API functions should be marked as
/// VTKMDIY_MPI_EXPORT_FUNCTION.
#include "diy-mpi-export.h" // defines VTKMDIY_MPI_EXPORT and VTKMDIY_MPI_EXPORT_FUNCTION
/// Define alisases for MPI types
#ifdef VTKMDIY_MPI_AS_LIB
# include "mpitypes.hpp" // only configured in library mode
#else // ifdef VTKMDIY_MPI_AS_LIB
namespace diy
{
namespace mpi
{
#define DEFINE_DIY_MPI_TYPE(mpitype) \
struct DIY_##mpitype { \
DIY_##mpitype() = default; \
DIY_##mpitype(const mpitype& obj) : data(obj) {} \
DIY_##mpitype& operator=(const mpitype& obj) { data = obj; return *this; } \
operator mpitype() { return data; } \
mpitype data; \
};
DEFINE_DIY_MPI_TYPE(MPI_Comm)
DEFINE_DIY_MPI_TYPE(MPI_Datatype)
DEFINE_DIY_MPI_TYPE(MPI_Status)
DEFINE_DIY_MPI_TYPE(MPI_Request)
DEFINE_DIY_MPI_TYPE(MPI_Op)
DEFINE_DIY_MPI_TYPE(MPI_File)
DEFINE_DIY_MPI_TYPE(MPI_Win)
#undef DEFINE_DIY_MPI_TYPE
}
} // diy::mpi
#endif // ifdef VTKMDIY_MPI_AS_LIB
#ifdef VTKMDIY_HAS_MPI
# include "mpi_cast.hpp"
#endif
#endif // VTKMDIY_MPI_CONFIG_HPP

@ -1,13 +0,0 @@
#ifndef VTKMDIY_MPI_CONSTANTS_HPP
#define VTKMDIY_MPI_CONSTANTS_HPP
namespace diy
{
namespace mpi
{
const int any_source = MPI_ANY_SOURCE;
const int any_tag = MPI_ANY_TAG;
}
}
#endif

@ -0,0 +1,34 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "datatypes.hpp"
#endif
namespace diy
{
namespace mpi
{
namespace detail
{
#define DIY_MPI_DATATYPE_MAP(cpp_type, mpi_type) \
template<> datatype get_mpi_datatype<cpp_type>() { \
return datatype(make_DIY_MPI_Datatype(mpi_type)); \
}
DIY_MPI_DATATYPE_MAP(char, MPI_BYTE)
DIY_MPI_DATATYPE_MAP(unsigned char, MPI_BYTE)
DIY_MPI_DATATYPE_MAP(bool, MPI_BYTE)
DIY_MPI_DATATYPE_MAP(int, MPI_INT)
DIY_MPI_DATATYPE_MAP(unsigned, MPI_UNSIGNED)
DIY_MPI_DATATYPE_MAP(long, MPI_LONG)
DIY_MPI_DATATYPE_MAP(unsigned long, MPI_UNSIGNED_LONG)
DIY_MPI_DATATYPE_MAP(long long, MPI_LONG_LONG_INT)
DIY_MPI_DATATYPE_MAP(unsigned long long, MPI_UNSIGNED_LONG_LONG)
DIY_MPI_DATATYPE_MAP(float, MPI_FLOAT)
DIY_MPI_DATATYPE_MAP(double, MPI_DOUBLE)
#undef DIY_MPI_DATATYPE_MAP
}
}
} // diy::mpi::detail

@ -1,16 +1,31 @@
#ifndef VTKMDIY_MPI_DATATYPES_HPP
#define VTKMDIY_MPI_DATATYPES_HPP
#include "config.hpp"
#include <vector>
#include <array>
namespace diy
{
namespace mpi
{
struct datatype
{
datatype() = default;
datatype(const DIY_MPI_Datatype& dt) : handle(dt) {}
#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
datatype(const MPI_Datatype& dt) : handle(dt) {}
operator MPI_Datatype() { return handle; }
#endif
DIY_MPI_Datatype handle;
};
namespace detail
{
template<class T> MPI_Datatype get_mpi_datatype();
struct true_type {};
struct false_type {};
@ -18,28 +33,34 @@ namespace detail
template<class T>
struct is_mpi_datatype { typedef false_type type; };
#define VTKMDIY_MPI_DATATYPE_MAP(cpp_type, mpi_type) \
template<> inline MPI_Datatype get_mpi_datatype<cpp_type>() { return mpi_type; } \
template<> struct is_mpi_datatype<cpp_type> { typedef true_type type; }; \
template<> struct is_mpi_datatype< std::vector<cpp_type> > { typedef true_type type; };
template<class T> datatype get_mpi_datatype();
VTKMDIY_MPI_DATATYPE_MAP(char, MPI_BYTE);
VTKMDIY_MPI_DATATYPE_MAP(unsigned char, MPI_BYTE);
VTKMDIY_MPI_DATATYPE_MAP(bool, MPI_BYTE);
VTKMDIY_MPI_DATATYPE_MAP(int, MPI_INT);
VTKMDIY_MPI_DATATYPE_MAP(unsigned, MPI_UNSIGNED);
VTKMDIY_MPI_DATATYPE_MAP(long, MPI_LONG);
VTKMDIY_MPI_DATATYPE_MAP(unsigned long, MPI_UNSIGNED_LONG);
VTKMDIY_MPI_DATATYPE_MAP(long long, MPI_LONG_LONG_INT);
VTKMDIY_MPI_DATATYPE_MAP(unsigned long long, MPI_UNSIGNED_LONG_LONG);
VTKMDIY_MPI_DATATYPE_MAP(float, MPI_FLOAT);
VTKMDIY_MPI_DATATYPE_MAP(double, MPI_DOUBLE);
#define VTKMDIY_MPI_DATATYPE_DEFAULT(cpp_type) \
template<> VTKMDIY_MPI_EXPORT_FUNCTION datatype get_mpi_datatype<cpp_type>(); \
template<> struct is_mpi_datatype< cpp_type > { typedef true_type type; }; \
template<> struct is_mpi_datatype< std::vector<cpp_type> > { typedef true_type type; }; \
template<size_t N> \
struct is_mpi_datatype< std::array<cpp_type, N> > { typedef true_type type; };
/* mpi_datatype: helper routines, specialized for std::vector<...> */
VTKMDIY_MPI_DATATYPE_DEFAULT(char)
VTKMDIY_MPI_DATATYPE_DEFAULT(unsigned char)
VTKMDIY_MPI_DATATYPE_DEFAULT(bool)
VTKMDIY_MPI_DATATYPE_DEFAULT(int)
VTKMDIY_MPI_DATATYPE_DEFAULT(unsigned)
VTKMDIY_MPI_DATATYPE_DEFAULT(long)
VTKMDIY_MPI_DATATYPE_DEFAULT(unsigned long)
VTKMDIY_MPI_DATATYPE_DEFAULT(long long)
VTKMDIY_MPI_DATATYPE_DEFAULT(unsigned long long)
VTKMDIY_MPI_DATATYPE_DEFAULT(float)
VTKMDIY_MPI_DATATYPE_DEFAULT(double)
#undef VTKMDIY_MPI_DATATYPE_DEFAULT
/* mpi_datatype: helper routines, specialized for std::vector<...>, std::array<...> */
template<class T>
struct mpi_datatype
{
static MPI_Datatype datatype() { return get_mpi_datatype<T>(); }
static diy::mpi::datatype datatype() { return get_mpi_datatype<T>(); }
static const void* address(const T& x) { return &x; }
static void* address(T& x) { return &x; }
static int count(const T&) { return 1; }
@ -50,44 +71,53 @@ namespace detail
{
typedef std::vector<U> VecU;
static MPI_Datatype datatype() { return mpi_datatype<U>::datatype(); }
static diy::mpi::datatype datatype() { return mpi_datatype<U>::datatype(); }
static const void* address(const VecU& x) { return x.data(); }
static void* address(VecU& x) { return x.data(); }
static int count(const VecU& x) { return x.empty() ? 0 : (static_cast<int>(x.size()) * mpi_datatype<U>::count(x[0])); }
};
template<class U, size_t D>
struct mpi_datatype< std::array<U,D> >
{
typedef std::array<U,D> ArrayU;
static diy::mpi::datatype datatype() { return mpi_datatype<U>::datatype(); }
static const void* address(const ArrayU& x) { return x.data(); }
static void* address(ArrayU& x) { return x.data(); }
static int count(const ArrayU& x) { return x.empty() ? 0 : (static_cast<int>(x.size()) * mpi_datatype<U>::count(x[0])); }
};
} // detail
template<class U>
static MPI_Datatype datatype(const U&)
static datatype datatype_of(const U&)
{
using Datatype = detail::mpi_datatype<U>;
return Datatype::datatype();
return detail::mpi_datatype<U>::datatype();
}
template<class U>
static void* address(const U& x)
{
using Datatype = detail::mpi_datatype<U>;
return const_cast<void*>(Datatype::address(x));
return const_cast<void*>(detail::mpi_datatype<U>::address(x));
}
template<class U>
static void* address(U& x)
{
using Datatype = detail::mpi_datatype<U>;
return Datatype::address(x);
return detail::mpi_datatype<U>::address(x);
}
template<class U>
static int count(const U& x)
{
using Datatype = detail::mpi_datatype<U>;
return Datatype::count(x);
return detail::mpi_datatype<U>::count(x);
}
} // mpi
} // diy
#ifndef VTKMDIY_MPI_AS_LIB
#include "datatypes.cpp"
#endif
#endif // VTKMDIY_MPI_DATATYPES_HPP

@ -0,0 +1,49 @@
#ifndef VTKMDIY_MPI_EXPORT_H
#define VTKMDIY_MPI_EXPORT_H
#if defined(_MSC_VER)
# ifdef VTKMDIY_MPI_STATIC_BUILD
/* This is a static component and has no need for exports
elf based static libraries are able to have hidden/default visibility
controls on symbols so we should propagate this information in that
use case
*/
# define VTKMDIY_MPI_EXPORT_DEFINE
# define VTKMDIY_MPI_IMPORT_DEFINE
# define VTKMDIY_MPI_NO_EXPORT_DEFINE
# else
# define VTKMDIY_MPI_EXPORT_DEFINE __declspec(dllexport)
# define VTKMDIY_MPI_IMPORT_DEFINE __declspec(dllimport)
# define VTKMDIY_MPI_NO_EXPORT_DEFINE
# endif
#else
# define VTKMDIY_MPI_EXPORT_DEFINE __attribute__((visibility("default")))
# define VTKMDIY_MPI_IMPORT_DEFINE __attribute__((visibility("default")))
# define VTKMDIY_MPI_NO_EXPORT_DEFINE __attribute__((visibility("hidden")))
#endif
#ifndef VTKMDIY_MPI_EXPORT
# if !defined(VTKMDIY_MPI_AS_LIB)
# define VTKMDIY_MPI_EXPORT
# define VTKMDIY_MPI_EXPORT_FUNCTION inline
# else
# if defined(VTKMDIY_HAS_MPI)
/* We are building this library */
# define VTKMDIY_MPI_EXPORT VTKMDIY_MPI_EXPORT_DEFINE
# else
/* We are using this library */
# define VTKMDIY_MPI_EXPORT VTKMDIY_MPI_IMPORT_DEFINE
# endif
# define VTKMDIY_MPI_EXPORT_FUNCTION VTKMDIY_MPI_EXPORT
# endif
#endif
#ifndef VTKMDIY_MPI_EXPORT_FUNCTION
#error "VTKMDIY_MPI_EXPORT_FUNCTION not defined"
#endif
#ifndef VTKMDIY_MPI_NO_EXPORT
# define VTKMDIY_MPI_NO_EXPORT VTKMDIY_MPI_NO_EXPORT_DEFINE
#endif
#endif // VTKMDIY_MPI_EXPORT_H

@ -0,0 +1,62 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "environment.hpp"
#endif
bool diy::mpi::environment::initialized()
{
#if VTKMDIY_HAS_MPI
int flag;
MPI_Initialized(&flag);
return flag != 0;
#else
return true;
#endif
}
diy::mpi::environment::environment()
{
#if VTKMDIY_HAS_MPI
int argc = 0; char** argv = nullptr;
MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided_threading);
#else
provided_threading = MPI_THREAD_FUNNELED;
#endif
}
diy::mpi::environment::environment(int requested_threading)
{
#if VTKMDIY_HAS_MPI
int argc = 0; char** argv = nullptr;
MPI_Init_thread(&argc, &argv, requested_threading, &provided_threading);
#else
provided_threading = requested_threading;
#endif
}
diy::mpi::environment::environment(int argc, char* argv[])
{
#if VTKMDIY_HAS_MPI
MPI_Init_thread(&argc, &argv, MPI_THREAD_FUNNELED, &provided_threading);
#else
(void) argc; (void) argv;
provided_threading = MPI_THREAD_FUNNELED;
#endif
}
diy::mpi::environment::environment(int argc, char* argv[], int requested_threading)
{
#if VTKMDIY_HAS_MPI
MPI_Init_thread(&argc, &argv, requested_threading, &provided_threading);
#else
(void) argc; (void) argv;
provided_threading = requested_threading;
#endif
}
diy::mpi::environment::
~environment()
{
#if VTKMDIY_HAS_MPI
MPI_Finalize();
#endif
}

@ -0,0 +1,35 @@
#ifndef VTKMDIY_MPI_ENVIRONMENT_HPP
#define VTKMDIY_MPI_ENVIRONMENT_HPP
#include "config.hpp"
namespace diy
{
namespace mpi
{
//! \ingroup MPI
struct environment
{
VTKMDIY_MPI_EXPORT_FUNCTION static bool initialized();
VTKMDIY_MPI_EXPORT_FUNCTION environment();
VTKMDIY_MPI_EXPORT_FUNCTION environment(int requested_threading);
VTKMDIY_MPI_EXPORT_FUNCTION environment(int argc, char* argv[]);
VTKMDIY_MPI_EXPORT_FUNCTION environment(int argc, char* argv[], int requested_threading);
VTKMDIY_MPI_EXPORT_FUNCTION ~environment();
int threading() const { return provided_threading; }
int provided_threading;
};
}
} // diy::mpi
#ifndef VTKMDIY_MPI_AS_LIB
#include "environment.cpp"
#endif
#endif // VTKMDIY_MPI_ENVIRONMENT_HPP

@ -0,0 +1,222 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "io.hpp"
#endif
#include "status.hpp"
#ifdef VTKMDIY_MPI_AS_LIB
const int diy::mpi::io::file::rdonly = MPI_MODE_RDONLY;
const int diy::mpi::io::file::rdwr = MPI_MODE_RDWR;
const int diy::mpi::io::file::wronly = MPI_MODE_WRONLY;
const int diy::mpi::io::file::create = MPI_MODE_CREATE;
const int diy::mpi::io::file::exclusive = MPI_MODE_EXCL;
const int diy::mpi::io::file::delete_on_close = MPI_MODE_DELETE_ON_CLOSE;
const int diy::mpi::io::file::unique_open = MPI_MODE_UNIQUE_OPEN;
const int diy::mpi::io::file::sequential = MPI_MODE_SEQUENTIAL;
const int diy::mpi::io::file::append = MPI_MODE_APPEND;
#endif
diy::mpi::io::file::
file(const communicator& comm__, const std::string& filename, int mode)
: comm_(comm__)
{
#if VTKMDIY_HAS_MPI
int ret = MPI_File_open(diy::mpi::mpi_cast(comm__.handle()), const_cast<char*>(filename.c_str()), mode, MPI_INFO_NULL, &diy::mpi::mpi_cast(fh));
if (ret)
throw std::runtime_error("DIY cannot open file: " + filename);
#else
(void)comm__; (void)filename; (void)mode;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_open);
#endif
}
void
diy::mpi::io::file::
close()
{
#if VTKMDIY_HAS_MPI
if (diy::mpi::mpi_cast(fh) != MPI_FILE_NULL)
MPI_File_close(&diy::mpi::mpi_cast(fh));
#endif
}
diy::mpi::io::offset
diy::mpi::io::file::
size() const
{
#if VTKMDIY_HAS_MPI
MPI_Offset sz;
MPI_File_get_size(diy::mpi::mpi_cast(fh), &sz);
return static_cast<offset>(sz);
#else
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_get_size);
#endif
}
void
diy::mpi::io::file::
resize(diy::mpi::io::offset size_)
{
#if VTKMDIY_HAS_MPI
MPI_File_set_size(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(size_));
#else
(void)size_;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_set_size);
#endif
}
void
diy::mpi::io::file::
read_at(offset o, char* buffer, size_t size_)
{
#if VTKMDIY_HAS_MPI
status s;
MPI_File_read_at(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(o), buffer, static_cast<int>(size_), MPI_BYTE, &diy::mpi::mpi_cast(s.handle));
#else
(void)o; (void)buffer; (void)size_;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_read_at);
#endif
}
void
diy::mpi::io::file::
read_at_all(offset o, char* buffer, size_t size_)
{
#if VTKMDIY_HAS_MPI
status s;
MPI_File_read_at_all(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(o), buffer, static_cast<int>(size_), MPI_BYTE, &diy::mpi::mpi_cast(s.handle));
#else
(void)o; (void)buffer; (void)size_;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_read_at_all);
#endif
}
void
diy::mpi::io::file::
write_at(offset o, const char* buffer, size_t size_)
{
#if VTKMDIY_HAS_MPI
status s;
MPI_File_write_at(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(o), (void *)buffer, static_cast<int>(size_), MPI_BYTE, &diy::mpi::mpi_cast(s.handle));
#else
(void)o; (void)buffer; (void)size_;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_write_at);
#endif
}
void
diy::mpi::io::file::
write_at_all(offset o, const char* buffer, size_t size_)
{
#if VTKMDIY_HAS_MPI
status s;
MPI_File_write_at_all(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(o), (void *)buffer, static_cast<int>(size_), MPI_BYTE, &diy::mpi::mpi_cast(s.handle));
#else
(void)o; (void)buffer; (void)size_;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_File_write_at_all);
#endif
}
void
diy::mpi::io::file::
read_bov(const DiscreteBounds& bounds, int ndims, const int dims[], char* buffer, size_t offset, const datatype& dt, bool collective, int chunk)
{
#if VTKMDIY_HAS_MPI
int total = 1;
std::vector<int> subsizes;
for (unsigned i = 0; i < static_cast<unsigned>(ndims); ++i)
{
subsizes.push_back(bounds.max[i] - bounds.min[i] + 1);
total *= subsizes.back();
}
MPI_Datatype T_type;
if (chunk == 1)
{
T_type = diy::mpi::mpi_cast(dt.handle);
}
else
{
// create an MPI struct of size chunk to read the data in those chunks
// (this allows to work around MPI-IO weirdness where crucial quantities
// are ints, which are too narrow of a type)
int array_of_blocklengths[] = { chunk };
MPI_Aint array_of_displacements[] = { 0 };
MPI_Datatype array_of_types[] = { diy::mpi::mpi_cast(dt.handle) };
MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements, array_of_types, &T_type);
MPI_Type_commit(&T_type);
}
MPI_Datatype fileblk;
MPI_Type_create_subarray(ndims, dims, subsizes.data(), (int*) &bounds.min[0], MPI_ORDER_C, T_type, &fileblk);
MPI_Type_commit(&fileblk);
MPI_File_set_view(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(offset), T_type, fileblk, (char*)"native", MPI_INFO_NULL);
mpi::status s;
if (!collective)
MPI_File_read(diy::mpi::mpi_cast(fh), buffer, total, T_type, &mpi_cast(s.handle));
else
MPI_File_read_all(diy::mpi::mpi_cast(fh), buffer, total, T_type, &mpi_cast(s.handle));
if (chunk != 1)
MPI_Type_free(&T_type);
MPI_Type_free(&fileblk);
#else
(void) bounds; (void) ndims; (void) dims, (void) buffer; (void) offset, (void) dt, (void) collective; (void) chunk;
VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::io::file::read_bov);
#endif
}
void
diy::mpi::io::file::
write_bov(const DiscreteBounds& bounds, const DiscreteBounds& core, int ndims, const int dims[], const char* buffer, size_t offset, const datatype& dt, bool collective, int chunk)
{
#if VTKMDIY_HAS_MPI
std::vector<int> subsizes;
std::vector<int> buffer_shape, buffer_start;
for (unsigned i = 0; i < static_cast<unsigned>(ndims); ++i)
{
buffer_shape.push_back(bounds.max[i] - bounds.min[i] + 1);
buffer_start.push_back(core.min[i] - bounds.min[i]);
subsizes.push_back(core.max[i] - core.min[i] + 1);
}
MPI_Datatype T_type;
if (chunk == 1)
{
T_type = diy::mpi::mpi_cast(dt.handle);
}
else
{
// assume T is a binary block and create an MPI struct of appropriate size
int array_of_blocklengths[] = { chunk };
MPI_Aint array_of_displacements[] = { 0 };
MPI_Datatype array_of_types[] = { diy::mpi::mpi_cast(dt.handle) };
MPI_Type_create_struct(1, array_of_blocklengths, array_of_displacements, array_of_types, &T_type);
MPI_Type_commit(&T_type);
}
MPI_Datatype fileblk, subbuffer;
MPI_Type_create_subarray(ndims, dims, subsizes.data(), (int*) &core.min[0], MPI_ORDER_C, T_type, &fileblk);
MPI_Type_create_subarray(ndims, buffer_shape.data(), subsizes.data(), buffer_start.data(), MPI_ORDER_C, T_type, &subbuffer);
MPI_Type_commit(&fileblk);
MPI_Type_commit(&subbuffer);
MPI_File_set_view(diy::mpi::mpi_cast(fh), static_cast<MPI_Offset>(offset), T_type, fileblk, (char*)"native", MPI_INFO_NULL);
mpi::status s;
if (!collective)
MPI_File_write(diy::mpi::mpi_cast(fh), (void*)buffer, 1, subbuffer, &mpi_cast(s.handle));
else
MPI_File_write_all(diy::mpi::mpi_cast(fh), (void*)buffer, 1, subbuffer, &mpi_cast(s.handle));
if (chunk != 1)
MPI_Type_free(&T_type);
MPI_Type_free(&fileblk);
MPI_Type_free(&subbuffer);
#else
(void) bounds; (void) core, (void) ndims; (void) dims, (void) buffer; (void) offset, (void) dt, (void) collective; (void) chunk;
VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::io::file::write_bov);
#endif
}

@ -1,139 +1,82 @@
#ifndef VTKMDIY_MPI_IO_HPP
#define VTKMDIY_MPI_IO_HPP
#include "../constants.h"
#include "config.hpp"
#include "communicator.hpp"
#include <vtkmdiy/types.hpp>
#include <vector>
#include <string>
#include <stdexcept>
namespace diy
{
namespace mpi
{
namespace io
{
typedef MPI_Offset offset;
#if !defined(VTKMDIY_MPI_AS_LIB) && VTKMDIY_HAS_MPI
using offset = MPI_Offset;
#else
using offset = long long;
#endif
//! Wraps MPI file IO. \ingroup MPI
class file
{
public:
enum
{
rdonly = MPI_MODE_RDONLY,
rdwr = MPI_MODE_RDWR,
wronly = MPI_MODE_WRONLY,
create = MPI_MODE_CREATE,
exclusive = MPI_MODE_EXCL,
delete_on_close = MPI_MODE_DELETE_ON_CLOSE,
unique_open = MPI_MODE_UNIQUE_OPEN,
sequential = MPI_MODE_SEQUENTIAL,
append = MPI_MODE_APPEND
};
#ifndef VTKMDIY_MPI_AS_LIB
static constexpr int rdonly = MPI_MODE_RDONLY;
static constexpr int rdwr = MPI_MODE_RDWR;
static constexpr int wronly = MPI_MODE_WRONLY;
static constexpr int create = MPI_MODE_CREATE;
static constexpr int exclusive = MPI_MODE_EXCL;
static constexpr int delete_on_close = MPI_MODE_DELETE_ON_CLOSE;
static constexpr int unique_open = MPI_MODE_UNIQUE_OPEN;
static constexpr int sequential = MPI_MODE_SEQUENTIAL;
static constexpr int append = MPI_MODE_APPEND;
#else
static const int rdonly, rdwr, wronly, create, exclusive, delete_on_close, unique_open, sequential, append;
#endif
public:
inline file(const communicator& comm, const std::string& filename, int mode);
~file() { close(); }
inline void close();
VTKMDIY_MPI_EXPORT_FUNCTION file(const communicator& comm, const std::string& filename, int mode);
~file() { close(); }
VTKMDIY_MPI_EXPORT_FUNCTION void close();
inline offset size() const;
inline void resize(offset size);
VTKMDIY_MPI_EXPORT_FUNCTION offset size() const;
VTKMDIY_MPI_EXPORT_FUNCTION void resize(offset size);
inline void read_at(offset o, char* buffer, size_t size);
inline void read_at_all(offset o, char* buffer, size_t size);
inline void write_at(offset o, const char* buffer, size_t size);
inline void write_at_all(offset o, const char* buffer, size_t size);
VTKMDIY_MPI_EXPORT_FUNCTION void read_at(offset o, char* buffer, size_t size);
VTKMDIY_MPI_EXPORT_FUNCTION void read_at_all(offset o, char* buffer, size_t size);
VTKMDIY_MPI_EXPORT_FUNCTION void write_at(offset o, const char* buffer, size_t size);
VTKMDIY_MPI_EXPORT_FUNCTION void write_at_all(offset o, const char* buffer, size_t size);
template<class T>
inline void read_at(offset o, std::vector<T>& data);
inline void read_at(offset o, std::vector<T>& data);
template<class T>
inline void read_at_all(offset o, std::vector<T>& data);
inline void read_at_all(offset o, std::vector<T>& data);
template<class T>
inline void write_at(offset o, const std::vector<T>& data);
inline void write_at(offset o, const std::vector<T>& data);
template<class T>
inline void write_at_all(offset o, const std::vector<T>& data);
inline void write_at_all(offset o, const std::vector<T>& data);
const communicator&
comm() const { return comm_; }
VTKMDIY_MPI_EXPORT_FUNCTION void read_bov(const DiscreteBounds& bounds, int ndims, const int dims[], char* buffer, size_t offset, const datatype& dt, bool collective, int chunk);
VTKMDIY_MPI_EXPORT_FUNCTION void write_bov(const DiscreteBounds& bounds, const DiscreteBounds& core, int ndims, const int dims[], const char* buffer, size_t offset, const datatype& dt, bool collective, int chunk);
MPI_File& handle() { return fh; }
const communicator& comm() const { return comm_; }
private:
const communicator& comm_;
MPI_File fh;
communicator comm_;
protected: // mark protected to avoid the "unused private field" warning
DIY_MPI_File fh;
};
}
}
}
diy::mpi::io::file::
file(const communicator& comm__, const std::string& filename, int mode)
: comm_(comm__)
{
#ifndef VTKM_DIY_NO_MPI
int ret = MPI_File_open(comm__, const_cast<char*>(filename.c_str()), mode, MPI_INFO_NULL, &fh);
if (ret)
throw std::runtime_error("DIY cannot open file: " + filename);
#else
DIY_UNUSED(comm__);
DIY_UNUSED(filename);
DIY_UNUSED(mode);
DIY_UNSUPPORTED_MPI_CALL(MPI_File_open);
#endif
}
void
diy::mpi::io::file::
close()
{
#ifndef VTKM_DIY_NO_MPI
if (fh != MPI_FILE_NULL)
MPI_File_close(&fh);
#endif
}
diy::mpi::io::offset
diy::mpi::io::file::
size() const
{
#ifndef VTKM_DIY_NO_MPI
offset sz;
MPI_File_get_size(fh, &sz);
return sz;
#else
DIY_UNSUPPORTED_MPI_CALL(MPI_File_get_size);
#endif
}
void
diy::mpi::io::file::
resize(diy::mpi::io::offset size_)
{
#ifndef VTKM_DIY_NO_MPI
MPI_File_set_size(fh, size_);
#else
DIY_UNUSED(size_);
DIY_UNSUPPORTED_MPI_CALL(MPI_File_set_size);
#endif
}
void
diy::mpi::io::file::
read_at(offset o, char* buffer, size_t size_)
{
#ifndef VTKM_DIY_NO_MPI
status s;
MPI_File_read_at(fh, o, buffer, static_cast<int>(size_), detail::get_mpi_datatype<char>(), &s.s);
#else
DIY_UNUSED(o);
DIY_UNUSED(buffer);
DIY_UNUSED(size_);
DIY_UNSUPPORTED_MPI_CALL(MPI_File_read_at);
#endif
}
template<class T>
void
@ -143,21 +86,6 @@ read_at(offset o, std::vector<T>& data)
read_at(o, &data[0], data.size()*sizeof(T));
}
void
diy::mpi::io::file::
read_at_all(offset o, char* buffer, size_t size_)
{
#ifndef VTKM_DIY_NO_MPI
status s;
MPI_File_read_at_all(fh, o, buffer, static_cast<int>(size_), detail::get_mpi_datatype<char>(), &s.s);
#else
DIY_UNUSED(o);
DIY_UNUSED(buffer);
DIY_UNUSED(size_);
DIY_UNSUPPORTED_MPI_CALL(MPI_File_read_at_all);
#endif
}
template<class T>
void
diy::mpi::io::file::
@ -166,21 +94,6 @@ read_at_all(offset o, std::vector<T>& data)
read_at_all(o, (char*) &data[0], data.size()*sizeof(T));
}
void
diy::mpi::io::file::
write_at(offset o, const char* buffer, size_t size_)
{
#ifndef VTKM_DIY_NO_MPI
status s;
MPI_File_write_at(fh, o, (void *)buffer, static_cast<int>(size_), detail::get_mpi_datatype<char>(), &s.s);
#else
DIY_UNUSED(o);
DIY_UNUSED(buffer);
DIY_UNUSED(size_);
DIY_UNSUPPORTED_MPI_CALL(MPI_File_write_at);
#endif
}
template<class T>
void
diy::mpi::io::file::
@ -189,21 +102,6 @@ write_at(offset o, const std::vector<T>& data)
write_at(o, (const char*) &data[0], data.size()*sizeof(T));
}
void
diy::mpi::io::file::
write_at_all(offset o, const char* buffer, size_t size_)
{
#ifndef VTKM_DIY_NO_MPI
status s;
MPI_File_write_at_all(fh, o, (void *)buffer, static_cast<int>(size_), detail::get_mpi_datatype<char>(), &s.s);
#else
DIY_UNUSED(o);
DIY_UNUSED(buffer);
DIY_UNUSED(size_);
DIY_UNSUPPORTED_MPI_CALL(MPI_File_write_at_all);
#endif
}
template<class T>
void
diy::mpi::io::file::
@ -212,4 +110,11 @@ write_at_all(offset o, const std::vector<T>& data)
write_at_all(o, &data[0], data.size()*sizeof(T));
}
}
} // diy::mpi::io
#ifndef VTKMDIY_MPI_AS_LIB
#include "io.cpp"
#endif
#endif // VTKMDIY_MPI_IO_HPP

@ -0,0 +1,34 @@
#ifndef VTKMDIY_MPI_MPICAST_HPP
#define VTKMDIY_MPI_MPICAST_HPP
/// This header provides convinience functions to cast from diy's type erased MPI objects
/// to thier correct types.
#ifndef VTKMDIY_HAS_MPI
# include <mpi.h>
#endif
namespace diy
{
namespace mpi
{
#define DEFINE_MPI_CAST(mpitype) \
inline mpitype& mpi_cast(DIY_##mpitype& obj) { return *reinterpret_cast<mpitype*>(&obj); } \
inline const mpitype& mpi_cast(const DIY_##mpitype& obj) { return *reinterpret_cast<const mpitype*>(&obj); } \
inline DIY_##mpitype make_DIY_##mpitype(const mpitype& obj) { DIY_##mpitype ret; mpi_cast(ret) = obj; return ret; }
DEFINE_MPI_CAST(MPI_Comm)
DEFINE_MPI_CAST(MPI_Datatype)
DEFINE_MPI_CAST(MPI_Status)
DEFINE_MPI_CAST(MPI_Request)
DEFINE_MPI_CAST(MPI_Op)
DEFINE_MPI_CAST(MPI_File)
DEFINE_MPI_CAST(MPI_Win)
#undef DEFINE_MPI_CAST
}
} // diy::mpi
#endif // VTKMDIY_MPI_MPICAST_HPP

@ -0,0 +1,51 @@
#ifndef VTKMDIY_MPI_MPITYPES_H
#define VTKMDIY_MPI_MPITYPES_H
#cmakedefine TYPESIZE_MPI_Comm @TYPESIZE_MPI_Comm@
#cmakedefine TYPESIZE_MPI_Datatype @TYPESIZE_MPI_Datatype@
#cmakedefine TYPESIZE_MPI_Status @TYPESIZE_MPI_Status@
#cmakedefine TYPESIZE_MPI_Request @TYPESIZE_MPI_Request@
#cmakedefine TYPESIZE_MPI_Op @TYPESIZE_MPI_Op@
#cmakedefine TYPESIZE_MPI_File @TYPESIZE_MPI_File@
#cmakedefine TYPESIZE_MPI_Win @TYPESIZE_MPI_Win@
namespace diy
{
namespace mpi
{
#if defined(VTKMDIY_HAS_MPI)
# define ASSERT_MPI_TYPE_SIZE(mpitype) static_assert(sizeof(mpitype) <= sizeof(DIY_##mpitype), "");
#else
# define ASSERT_MPI_TYPE_SIZE(mpitype)
#endif
#define DEFINE_DIY_MPI_TYPE(mpitype) \
struct DIY_##mpitype { \
void* data[((TYPESIZE_##mpitype) + sizeof(void*) - 1)/sizeof(void*)]; \
}; \
ASSERT_MPI_TYPE_SIZE(mpitype)
DEFINE_DIY_MPI_TYPE(MPI_Comm)
DEFINE_DIY_MPI_TYPE(MPI_Datatype)
DEFINE_DIY_MPI_TYPE(MPI_Status)
DEFINE_DIY_MPI_TYPE(MPI_Request)
DEFINE_DIY_MPI_TYPE(MPI_Op)
DEFINE_DIY_MPI_TYPE(MPI_File)
DEFINE_DIY_MPI_TYPE(MPI_Win)
#undef DEFINE_DIY_MPI_TYPE
#undef ASSERT_MPI_TYPE_SIZE
}
} // diy::mpi
#undef TYPESIZE_MPI_Comm
#undef TYPESIZE_MPI_Datatype
#undef TYPESIZE_MPI_Status
#undef TYPESIZE_MPI_Request
#undef TYPESIZE_MPI_Op
#undef TYPESIZE_MPI_File
#undef TYPESIZE_MPI_Win
#endif // VTKMDIY_MPI_MPITYPES_H

@ -22,18 +22,17 @@ static const int MPI_THREAD_MULTIPLE = 3;
/* define datatypes */
using MPI_Datatype = size_t;
#define VTKM_DIY_NO_MPI_DATATYPE(cpp_type, mpi_type) \
#define VTKMDIY_NO_MPI_DATATYPE(cpp_type, mpi_type) \
static const MPI_Datatype mpi_type = sizeof(cpp_type);
VTKM_DIY_NO_MPI_DATATYPE(char, MPI_BYTE);
VTKM_DIY_NO_MPI_DATATYPE(int, MPI_INT);
VTKM_DIY_NO_MPI_DATATYPE(unsigned, MPI_UNSIGNED);
VTKM_DIY_NO_MPI_DATATYPE(long, MPI_LONG);
VTKM_DIY_NO_MPI_DATATYPE(unsigned long, MPI_UNSIGNED_LONG);
VTKM_DIY_NO_MPI_DATATYPE(long long, MPI_LONG_LONG_INT);
VTKM_DIY_NO_MPI_DATATYPE(unsigned long long, MPI_UNSIGNED_LONG_LONG);
VTKM_DIY_NO_MPI_DATATYPE(float, MPI_FLOAT);
VTKM_DIY_NO_MPI_DATATYPE(double, MPI_DOUBLE);
#endif
VTKMDIY_NO_MPI_DATATYPE(char, MPI_BYTE);
VTKMDIY_NO_MPI_DATATYPE(int, MPI_INT);
VTKMDIY_NO_MPI_DATATYPE(unsigned, MPI_UNSIGNED);
VTKMDIY_NO_MPI_DATATYPE(long, MPI_LONG);
VTKMDIY_NO_MPI_DATATYPE(unsigned long, MPI_UNSIGNED_LONG);
VTKMDIY_NO_MPI_DATATYPE(long long, MPI_LONG_LONG_INT);
VTKMDIY_NO_MPI_DATATYPE(unsigned long long, MPI_UNSIGNED_LONG_LONG);
VTKMDIY_NO_MPI_DATATYPE(float, MPI_FLOAT);
VTKMDIY_NO_MPI_DATATYPE(double, MPI_DOUBLE);
/* status type */
struct MPI_Status
@ -48,10 +47,8 @@ struct MPI_Status
/* define MPI_Request */
using MPI_Request = int;
#ifndef DIY_UNSUPPORTED_MPI_CALL
#define DIY_UNSUPPORTED_MPI_CALL(name) \
throw std::runtime_error("`" #name "` not supported when VTKM_DIY_NO_MPI is defined.");
#endif
#define VTKMDIY_UNSUPPORTED_MPI_CALL(name) \
throw std::runtime_error("`" #name "` not supported when VTKMDIY_HAS_MPI is false.");
/* define operations */
using MPI_Op = int;
@ -63,7 +60,7 @@ static const MPI_Op MPI_LAND = 0;
static const MPI_Op MPI_LOR = 0;
/* mpi i/o stuff */
using MPI_Offset = size_t;
using MPI_Offset = long long;
using MPI_File = int;
static const MPI_File MPI_FILE_NULL = 0;
@ -78,7 +75,7 @@ static const int MPI_MODE_APPEND = 128;
static const int MPI_MODE_SEQUENTIAL = 256;
/* define window type */
using MPI_Win = int;
using MPI_Win = void*;
/* window fence assertions */
static const int MPI_MODE_NOSTORE = 1;
@ -90,3 +87,5 @@ static const int MPI_MODE_NOCHECK = 16;
/* window lock types */
static const int MPI_LOCK_SHARED = 1;
static const int MPI_LOCK_EXCLUSIVE = 2;
#endif // VTKMDIY_MPI_NO_MPI_HPP

@ -0,0 +1,33 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "operations.hpp"
#endif
#include <functional>
namespace diy
{
namespace mpi
{
namespace detail
{
operation get_builtin_operation(BuiltinOperation id)
{
operation op{};
switch(id)
{
case OP_MAXIMUM: op.handle = make_DIY_MPI_Op(MPI_MAX); break;
case OP_MINIMUM: op.handle = make_DIY_MPI_Op(MPI_MIN); break;
case OP_PLUS: op.handle = make_DIY_MPI_Op(MPI_SUM); break;
case OP_MULTIPLIES: op.handle = make_DIY_MPI_Op(MPI_PROD); break;
case OP_LOGICAL_AND: op.handle = make_DIY_MPI_Op(MPI_LAND); break;
case OP_LOGICAL_OR: op.handle = make_DIY_MPI_Op(MPI_LOR); break;
default: break;
}
return op;
}
}
}
} // diy::mpi::detail

@ -1,3 +1,8 @@
#ifndef VTKMDIY_MPI_OPERATIONS_HPP
#define VTKMDIY_MPI_OPERATIONS_HPP
#include "config.hpp"
#include <algorithm> // for std::min/max
#include <functional>
@ -7,6 +12,19 @@ namespace mpi
{
//! \addtogroup MPI
//!@{
struct operation
{
operation() = default;
operation(const DIY_MPI_Op& op) : handle(op) {}
#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
operation(const MPI_Op& op) : handle(op) {}
operator MPI_Op() { return handle; }
#endif
DIY_MPI_Op handle;
};
template<class U>
struct maximum { const U& operator()(const U& x, const U& y) const { return std::max(x,y); } };
template<class U>
@ -15,13 +33,32 @@ namespace mpi
namespace detail
{
template<class T> struct mpi_op { static MPI_Op get(); };
template<class U> struct mpi_op< maximum<U> > { static MPI_Op get() { return MPI_MAX; } };
template<class U> struct mpi_op< minimum<U> > { static MPI_Op get() { return MPI_MIN; } };
template<class U> struct mpi_op< std::plus<U> > { static MPI_Op get() { return MPI_SUM; } };
template<class U> struct mpi_op< std::multiplies<U> > { static MPI_Op get() { return MPI_PROD; } };
template<class U> struct mpi_op< std::logical_and<U> > { static MPI_Op get() { return MPI_LAND; } };
template<class U> struct mpi_op< std::logical_or<U> > { static MPI_Op get() { return MPI_LOR; } };
}
enum BuiltinOperation {
OP_MAXIMUM = 0,
OP_MINIMUM,
OP_PLUS,
OP_MULTIPLIES,
OP_LOGICAL_AND,
OP_LOGICAL_OR
};
VTKMDIY_MPI_EXPORT_FUNCTION operation get_builtin_operation(BuiltinOperation id);
template<class T> struct mpi_op;
template<class U> struct mpi_op< maximum<U> > { static operation get() { return get_builtin_operation(OP_MAXIMUM); } };
template<class U> struct mpi_op< minimum<U> > { static operation get() { return get_builtin_operation(OP_MINIMUM); } };
template<class U> struct mpi_op< std::plus<U> > { static operation get() { return get_builtin_operation(OP_PLUS); } };
template<class U> struct mpi_op< std::multiplies<U> > { static operation get() { return get_builtin_operation(OP_MULTIPLIES); } };
template<class U> struct mpi_op< std::logical_and<U> > { static operation get() { return get_builtin_operation(OP_LOGICAL_AND); } };
template<class U> struct mpi_op< std::logical_or<U> > { static operation get() { return get_builtin_operation(OP_LOGICAL_OR); } };
}
}
} // diy::mpi
#ifndef VTKMDIY_MPI_AS_LIB
#include "operations.cpp"
#endif
#endif // VTKMDIY_MPI_OPERATIONS_HPP

@ -1,3 +1,6 @@
#ifndef VTKMDIY_MPI_OPTIONAL_HPP
#define VTKMDIY_MPI_OPTIONAL_HPP
namespace diy
{
namespace mpi
@ -53,3 +56,5 @@ operator=(const optional& o)
return *this;
}
#endif // VTKMDIY_MPI_OPTIONAL_HPP

@ -0,0 +1,96 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "point-to-point.hpp"
#endif
namespace diy
{
namespace mpi
{
#ifdef VTKMDIY_MPI_AS_LIB
# ifdef _MSC_VER
# define EXPORT_MACRO VTKMDIY_MPI_EXPORT
# else
# define EXPORT_MACRO
# endif
EXPORT_MACRO const int any_source = MPI_ANY_SOURCE;
EXPORT_MACRO const int any_tag = MPI_ANY_TAG;
# undef EXPORT_MACRO
#endif
namespace detail
{
void send(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type)
{
#if VTKMDIY_HAS_MPI
MPI_Send(data, count, mpi_cast(type.handle), dest, tag, mpi_cast(comm));
#else
(void) comm; (void) dest; (void) tag; (void) data; (void) count; (void) type;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Send);
#endif
}
status probe(DIY_MPI_Comm comm, int source, int tag)
{
#if VTKMDIY_HAS_MPI
status s;
MPI_Probe(source, tag, mpi_cast(comm), &mpi_cast(s.handle));
return s;
#else
(void) comm; (void) source; (void) tag;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Probe);
#endif
}
status recv(DIY_MPI_Comm comm, int source, int tag, void* data, int count, const datatype& type)
{
#if VTKMDIY_HAS_MPI
status s;
MPI_Recv(data, count, mpi_cast(type.handle), source, tag, mpi_cast(comm), &mpi_cast(s.handle));
return s;
#else
(void) comm; (void) source; (void) tag; (void) data; (void) count; (void) type;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Recv);
#endif
}
request isend(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type)
{
#if VTKMDIY_HAS_MPI
request r;
MPI_Isend(data, count, mpi_cast(type.handle), dest, tag, mpi_cast(comm), &mpi_cast(r.handle));
return r;
#else
(void) comm; (void) dest; (void) tag; (void) data; (void) count; (void) type;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Isend);
#endif
}
request issend(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type)
{
#if VTKMDIY_HAS_MPI
request r;
MPI_Issend(data, count, mpi_cast(type.handle), dest, tag, mpi_cast(comm), &mpi_cast(r.handle));
return r;
#else
(void) comm; (void) dest; (void) tag; (void) data; (void) count; (void) type;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Issend);
#endif
}
request irecv(DIY_MPI_Comm comm, int source, int tag, void* data, int count, const datatype& type)
{
#if VTKMDIY_HAS_MPI
request r;
MPI_Irecv(data, count, mpi_cast(type.handle), source, tag, mpi_cast(comm), &mpi_cast(r.handle));
return r;
#else
(void) comm; (void) source; (void) tag; (void) data; (void) count; (void) type;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Irecv);
#endif
}
}
}
} // diy::mpi::detail

@ -1,147 +1,84 @@
#ifndef VTKMDIY_MPI_POINT_TO_POINT_HPP
#define VTKMDIY_MPI_POINT_TO_POINT_HPP
#include "config.hpp"
#include "datatypes.hpp"
#include "request.hpp"
#include "status.hpp"
#include <vector>
namespace diy
{
namespace mpi
{
#ifndef VTKMDIY_MPI_AS_LIB
constexpr int any_source = MPI_ANY_SOURCE;
constexpr int any_tag = MPI_ANY_TAG;
#else
VTKMDIY_MPI_EXPORT extern const int any_source;
VTKMDIY_MPI_EXPORT extern const int any_tag;
#endif
namespace detail
{
// send
template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
struct send;
VTKMDIY_MPI_EXPORT_FUNCTION void send(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type);
VTKMDIY_MPI_EXPORT_FUNCTION request isend(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type);
VTKMDIY_MPI_EXPORT_FUNCTION request issend(DIY_MPI_Comm comm, int dest, int tag, const void* data, int count, const datatype& type);
VTKMDIY_MPI_EXPORT_FUNCTION status probe(DIY_MPI_Comm comm, int source, int tag);
VTKMDIY_MPI_EXPORT_FUNCTION status recv(DIY_MPI_Comm comm, int source, int tag, void* data, int count, const datatype& type);
VTKMDIY_MPI_EXPORT_FUNCTION request irecv(DIY_MPI_Comm comm, int source, int tag, void* data, int count, const datatype& type);
template<class T>
struct send<T, true_type>
template <class T>
inline void send(DIY_MPI_Comm comm, int dest, int tag, const T& x)
{
void operator()(MPI_Comm comm, int dest, int tag, const T& x) const
{
#ifndef VTKM_DIY_NO_MPI
typedef mpi_datatype<T> Datatype;
MPI_Send((void*) Datatype::address(x),
Datatype::count(x),
Datatype::datatype(),
dest, tag, comm);
#else
(void) comm; (void) dest; (void) tag; (void) x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Send);
#endif
}
};
static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
send(comm, dest, tag, address(x), count(x), datatype_of(x));
}
// recv
template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
struct recv;
template<class T>
struct recv<T, true_type>
template <class T>
status recv(DIY_MPI_Comm comm, int source, int tag, T& x)
{
status operator()(MPI_Comm comm, int source, int tag, T& x) const
{
#ifndef VTKM_DIY_NO_MPI
typedef mpi_datatype<T> Datatype;
status s;
MPI_Recv((void*) Datatype::address(x),
Datatype::count(x),
Datatype::datatype(),
source, tag, comm, &s.s);
return s;
#else
(void) comm; (void) source; (void) tag; (void) x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Recv);
#endif
}
};
static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
return recv(comm, source, tag, address(x), count(x), datatype_of(x));
}
template<class U>
struct recv<std::vector<U>, true_type>
template <class T>
status recv(DIY_MPI_Comm comm, int source, int tag, std::vector<T>& x)
{
status operator()(MPI_Comm comm, int source, int tag, std::vector<U>& x) const
{
#ifndef VTKM_DIY_NO_MPI
status s;
auto s = probe(comm, source, tag);
x.resize(static_cast<size_t>(s.count<T>()));
return recv(comm, source, tag, address(x), count(x), datatype_of(x));
}
MPI_Probe(source, tag, comm, &s.s);
x.resize(s.count<U>());
MPI_Recv(&x[0], static_cast<int>(x.size()), get_mpi_datatype<U>(), source, tag, comm, &s.s);
return s;
#else
(void) comm; (void) source; (void) tag; (void) x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Recv);
#endif
}
};
// isend
template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
struct isend;
template<class T>
struct isend<T, true_type>
template <class T>
request isend(DIY_MPI_Comm comm, int dest, int tag, const T& x)
{
request operator()(MPI_Comm comm, int dest, int tag, const T& x) const
{
#ifndef VTKM_DIY_NO_MPI
request r;
typedef mpi_datatype<T> Datatype;
MPI_Isend((void*) Datatype::address(x),
Datatype::count(x),
Datatype::datatype(),
dest, tag, comm, &r.r);
return r;
#else
(void) comm; (void) dest; (void) tag; (void) x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Isend);
#endif
}
};
static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
return isend(comm, dest, tag, address(x), count(x), datatype_of(x));
}
// issend
template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
struct issend;
template<class T>
struct issend<T, true_type>
template <class T>
request issend(DIY_MPI_Comm comm, int dest, int tag, const T& x)
{
request operator()(MPI_Comm comm, int dest, int tag, const T& x) const
{
#ifndef VTKM_DIY_NO_MPI
request r;
typedef mpi_datatype<T> Datatype;
MPI_Issend((void*) Datatype::address(x),
Datatype::count(x),
Datatype::datatype(),
dest, tag, comm, &r.r);
return r;
#else
(void) comm; (void) dest; (void) tag; (void) x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Issend);
#endif
}
};
static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
return issend(comm, dest, tag, address(x), count(x), datatype_of(x));
}
// irecv
template< class T, class is_mpi_datatype_ = typename is_mpi_datatype<T>::type >
struct irecv;
template<class T>
struct irecv<T, true_type>
template <class T>
request irecv(DIY_MPI_Comm comm, int source, int tag, T& x)
{
request operator()(MPI_Comm comm, int source, int tag, T& x) const
{
#ifndef VTKM_DIY_NO_MPI
request r;
typedef mpi_datatype<T> Datatype;
MPI_Irecv(Datatype::address(x),
Datatype::count(x),
Datatype::datatype(),
source, tag, comm, &r.r);
return r;
#else
(void) comm; (void) source; (void) tag; (void) x;
DIY_UNSUPPORTED_MPI_CALL(MPI_Irecv);
#endif
}
};
}
static_assert(std::is_same<typename is_mpi_datatype<T>::type, true_type>::value, "is_mpi_datatype<T>::type must be true_type");
return irecv(comm, source, tag, address(x), count(x), datatype_of(x));
}
}
}
} // diy::mpi::detail
#ifndef VTKMDIY_MPI_AS_LIB
#include "point-to-point.cpp"
#endif
#endif // VTKMDIY_MPI_POINT_TO_POINT_HPP

@ -0,0 +1,45 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "request.hpp"
#endif
#include <algorithm>
#include <iterator>
#if defined(VTKMDIY_MPI_AS_LIB) && !VTKMDIY_HAS_MPI
diy::mpi::request::request()
{
std::fill(std::begin(this->handle.data), std::end(this->handle.data), nullptr);
}
#else
diy::mpi::request::request() = default;
#endif
diy::mpi::status diy::mpi::request::wait()
{
#if VTKMDIY_HAS_MPI
status s;
MPI_Wait(&mpi_cast(handle), &mpi_cast(s.handle));
return s;
#else
VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::request::wait);
#endif
}
diy::mpi::optional<diy::mpi::status> diy::mpi::request::test()
{
#if VTKMDIY_HAS_MPI
status s;
int flag;
MPI_Test(&mpi_cast(handle), &flag, &mpi_cast(s.handle));
if (flag)
return s;
#endif
return optional<status>();
}
void diy::mpi::request::cancel()
{
#if VTKMDIY_HAS_MPI
MPI_Cancel(&mpi_cast(handle));
#endif
}

@ -1,50 +1,29 @@
#ifndef VTKMDIY_MPI_REQUEST_HPP
#define VTKMDIY_MPI_REQUEST_HPP
#include "config.hpp"
#include "status.hpp"
#include "optional.hpp"
namespace diy
{
namespace mpi
{
struct request
{
inline
status wait();
inline
optional<status> test();
inline
void cancel();
VTKMDIY_MPI_EXPORT_FUNCTION request();
VTKMDIY_MPI_EXPORT_FUNCTION status wait();
VTKMDIY_MPI_EXPORT_FUNCTION optional<status> test();
VTKMDIY_MPI_EXPORT_FUNCTION void cancel();
MPI_Request r;
DIY_MPI_Request handle;
};
}
}
diy::mpi::status
diy::mpi::request::wait()
{
#ifndef VTKM_DIY_NO_MPI
status s;
MPI_Wait(&r, &s.s);
return s;
#else
DIY_UNSUPPORTED_MPI_CALL(diy::mpi::request::wait);
#endif
}
} // diy::mpi
diy::mpi::optional<diy::mpi::status>
diy::mpi::request::test()
{
#ifndef VTKM_DIY_NO_MPI
status s;
int flag;
MPI_Test(&r, &flag, &s.s);
if (flag)
return s;
#ifndef VTKMDIY_MPI_AS_LIB
#include "request.cpp"
#endif
return optional<status>();
}
void
diy::mpi::request::cancel()
{
#ifndef VTKM_DIY_NO_MPI
MPI_Cancel(&r);
#endif
}
#endif // VTKMDIY_MPI_REQUEST_HPP

@ -0,0 +1,30 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "status.hpp"
#endif
int diy::mpi::status::source() const { return mpi_cast(handle).MPI_SOURCE; }
int diy::mpi::status::tag() const { return mpi_cast(handle).MPI_TAG; }
int diy::mpi::status::error() const { return mpi_cast(handle).MPI_ERROR; }
bool diy::mpi::status::cancelled() const
{
#if VTKMDIY_HAS_MPI
int flag;
MPI_Test_cancelled(&mpi_cast(handle), &flag);
return flag;
#else
VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::status::cancelled);
#endif
}
int diy::mpi::status::count(const diy::mpi::datatype& type) const
{
#if VTKMDIY_HAS_MPI
int c;
MPI_Get_count(&mpi_cast(handle), mpi_cast(type.handle), &c);
return c;
#else
(void) type;
VTKMDIY_UNSUPPORTED_MPI_CALL(diy::mpi::status::count);
#endif
}

@ -1,49 +1,42 @@
#ifndef VTKMDIY_MPI_STATUS_HPP
#define VTKMDIY_MPI_STATUS_HPP
#include "config.hpp"
#include "datatypes.hpp"
namespace diy
{
namespace mpi
{
struct status
{
int source() const { return s.MPI_SOURCE; }
int tag() const { return s.MPI_TAG; }
int error() const { return s.MPI_ERROR; }
status() = default;
status(const DIY_MPI_Status& s) : handle(s) {}
inline
bool cancelled() const;
#ifndef VTKMDIY_MPI_AS_LIB // only available in header-only mode
status(const MPI_Status& s) : handle(s) {}
operator MPI_Status() { return handle; }
#endif
template<class T>
int count() const;
VTKMDIY_MPI_EXPORT_FUNCTION int source() const;
VTKMDIY_MPI_EXPORT_FUNCTION int tag() const;
VTKMDIY_MPI_EXPORT_FUNCTION int error() const;
VTKMDIY_MPI_EXPORT_FUNCTION bool cancelled() const;
VTKMDIY_MPI_EXPORT_FUNCTION int count(const datatype& type) const;
operator MPI_Status&() { return s; }
operator const MPI_Status&() const { return s; }
template<class T> int count() const
{
return this->count(detail::get_mpi_datatype<T>());
}
MPI_Status s;
DIY_MPI_Status handle;
};
}
}
}
} // diy::mpi
bool
diy::mpi::status::cancelled() const
{
#ifndef VTKM_DIY_NO_MPI
int flag;
MPI_Test_cancelled(const_cast<MPI_Status*>(&s), &flag);
return flag;
#else
DIY_UNSUPPORTED_MPI_CALL(diy::mpi::status::cancelled);
#ifndef VTKMDIY_MPI_AS_LIB
#include "status.cpp"
#endif
}
template<class T>
int
diy::mpi::status::count() const
{
#ifndef VTKM_DIY_NO_MPI
int c;
MPI_Get_count(const_cast<MPI_Status*>(&s), detail::get_mpi_datatype<T>(), &c);
return c;
#else
DIY_UNSUPPORTED_MPI_CALL(diy::mpi::status::count);
#endif
}
#endif // VTKMDIY_MPI_STATUS_HPP

@ -0,0 +1,208 @@
#ifdef VTKMDIY_MPI_AS_LIB
#include "window.hpp"
#endif
#include <algorithm>
namespace diy
{
namespace mpi
{
#ifdef VTKMDIY_MPI_AS_LIB
# ifdef _MSC_VER
# define EXPORT_MACRO VTKMDIY_MPI_EXPORT
# else
# define EXPORT_MACRO
# endif
EXPORT_MACRO const int nocheck = MPI_MODE_NOCHECK;
# undef EXPORT_MACRO
#endif
namespace detail
{
DIY_MPI_Win win_create(const communicator& comm, void* base, unsigned size, int disp)
{
#if VTKMDIY_HAS_MPI
DIY_MPI_Win win;
MPI_Win_create(base, size, disp, MPI_INFO_NULL, mpi_cast(comm.handle()), &mpi_cast(win));
return win;
#else
(void)comm; (void)size; (void)disp;
auto win = make_DIY_MPI_Win(base);
return win;
#endif
}
void win_free(DIY_MPI_Win& win)
{
#if VTKMDIY_HAS_MPI
MPI_Win_free(&mpi_cast(win));
#else
(void)win;
#endif
}
void put(const DIY_MPI_Win& win, const void* data, int count, const datatype& type, int rank, unsigned offset)
{
#if VTKMDIY_HAS_MPI
MPI_Put(data, count, mpi_cast(type.handle), rank, offset, count, mpi_cast(type.handle), mpi_cast(win));
#else
void* buffer = mpi_cast(win);
size_t size = mpi_cast(type.handle);
std::copy_n(static_cast<const int8_t*>(data),
size * static_cast<size_t>(count),
static_cast<int8_t*>(buffer) + (offset * size));
(void)rank;
#endif
}
void get(const DIY_MPI_Win& win, void* data, int count, const datatype& type, int rank, unsigned offset)
{
#if VTKMDIY_HAS_MPI
MPI_Get(data, count, mpi_cast(type.handle), rank, offset, count, mpi_cast(type.handle), mpi_cast(win));
#else
const void* buffer = mpi_cast(win);
size_t size = mpi_cast(type.handle);
std::copy_n(static_cast<const int8_t*>(buffer) + (offset * size),
size * static_cast<size_t>(count),
static_cast<int8_t*>(data));
(void)rank;
#endif
}
void fence(const DIY_MPI_Win& win, int assert)
{
#if VTKMDIY_HAS_MPI
MPI_Win_fence(assert, mpi_cast(win));
#else
(void) win; (void) assert;
#endif
}
void lock(const DIY_MPI_Win& win, int lock_type, int rank, int assert)
{
#if VTKMDIY_HAS_MPI
MPI_Win_lock(lock_type, rank, assert, mpi_cast(win));
#else
(void) win; (void) lock_type; (void) rank; (void) assert;
#endif
}
void unlock(const DIY_MPI_Win& win, int rank)
{
#if VTKMDIY_HAS_MPI
MPI_Win_unlock(rank, mpi_cast(win));
#else
(void) win; (void) rank;
#endif
}
void lock_all(const DIY_MPI_Win& win, int assert)
{
#if VTKMDIY_HAS_MPI
MPI_Win_lock_all(assert, mpi_cast(win));
#else
(void) win; (void) assert;
#endif
}
void unlock_all(const DIY_MPI_Win& win)
{
#if VTKMDIY_HAS_MPI
MPI_Win_unlock_all(mpi_cast(win));
#else
(void) win;
#endif
}
void fetch_and_op(const DIY_MPI_Win& win,
const void* origin, void* result, const datatype& type,
int rank, unsigned offset,
const operation& op)
{
#if VTKMDIY_HAS_MPI
MPI_Fetch_and_op(origin, result, mpi_cast(type.handle), rank, offset, mpi_cast(op.handle), mpi_cast(win));
#else
(void) win; (void) origin; (void) result; (void) type; (void) rank; (void) offset; (void) op;
VTKMDIY_UNSUPPORTED_MPI_CALL(MPI_Fetch_and_op);
#endif
}
void fetch(const DIY_MPI_Win& win, void* result, const datatype& type, int rank, unsigned offset)
{
#if VTKMDIY_HAS_MPI
MPI_Fetch_and_op(nullptr, result, mpi_cast(type.handle), rank, offset, MPI_NO_OP, mpi_cast(win));
#else
(void) rank;
const void* buffer = mpi_cast(win);
size_t size = mpi_cast(type.handle);
std::copy_n(static_cast<const int8_t*>(buffer) + (offset * size),
size,
static_cast<int8_t*>(result));
#endif
}
void replace(const DIY_MPI_Win& win, const void* value, const datatype& type, int rank, unsigned offset)
{
#if VTKMDIY_HAS_MPI
MPI_Fetch_and_op(value, nullptr, mpi_cast(type.handle), rank, offset, MPI_REPLACE, mpi_cast(win));
#else
(void) rank;
void* buffer = mpi_cast(win);
size_t size = mpi_cast(type.handle);
std::copy_n(static_cast<const int8_t*>(value),
size,
static_cast<int8_t*>(buffer) + (offset * size));
#endif
}
void sync(const DIY_MPI_Win& win)
{
#if VTKMDIY_HAS_MPI
MPI_Win_sync(mpi_cast(win));
#else
(void) win;
#endif
}
void flush(const DIY_MPI_Win& win, int rank)
{
#if VTKMDIY_HAS_MPI
MPI_Win_flush(rank, mpi_cast(win));
#else
(void) win; (void) rank;
#endif
}
void flush_all(const DIY_MPI_Win& win)
{
#if VTKMDIY_HAS_MPI
MPI_Win_flush_all(mpi_cast(win));
#else
(void) win;
#endif
}
void flush_local(const DIY_MPI_Win& win, int rank)
{
#if VTKMDIY_HAS_MPI
MPI_Win_flush_local(rank, mpi_cast(win));
#else
(void) win; (void) rank;
#endif
}
void flush_local_all(const DIY_MPI_Win& win)
{
#if VTKMDIY_HAS_MPI
MPI_Win_flush_local_all(mpi_cast(win));
#else
(void) win;
#endif
}
}
}
} // diy::mpi::detail

@ -1,10 +1,89 @@
#ifndef VTKMDIY_MPI_WINODW_HPP
#define VTKMDIY_MPI_WINODW_HPP
#include "config.hpp"
#include "communicator.hpp"
#include "operations.hpp"
#include <type_traits>
#include <vector>
namespace diy
{
namespace mpi
{
#ifndef VTKMDIY_MPI_AS_LIB
constexpr int nocheck = MPI_MODE_NOCHECK;
#else
VTKMDIY_MPI_EXPORT extern const int nocheck;
#endif
namespace detail
{
VTKMDIY_MPI_EXPORT_FUNCTION
DIY_MPI_Win win_create(const communicator& comm, void* base, unsigned size, int disp);
VTKMDIY_MPI_EXPORT_FUNCTION
void win_free(DIY_MPI_Win& win);
VTKMDIY_MPI_EXPORT_FUNCTION
void put(const DIY_MPI_Win& win,
const void* data, int count, const datatype& type,
int rank, unsigned offset);
VTKMDIY_MPI_EXPORT_FUNCTION
void get(const DIY_MPI_Win& win,
void* data, int count, const datatype& type,
int rank, unsigned offset);
VTKMDIY_MPI_EXPORT_FUNCTION
void fence(const DIY_MPI_Win& win, int assert);
VTKMDIY_MPI_EXPORT_FUNCTION
void lock(const DIY_MPI_Win& win, int lock_type, int rank, int assert);
VTKMDIY_MPI_EXPORT_FUNCTION
void unlock(const DIY_MPI_Win& win, int rank);
VTKMDIY_MPI_EXPORT_FUNCTION
void lock_all(const DIY_MPI_Win& win, int assert);
VTKMDIY_MPI_EXPORT_FUNCTION
void unlock_all(const DIY_MPI_Win& win);
VTKMDIY_MPI_EXPORT_FUNCTION
void fetch_and_op(const DIY_MPI_Win& win,
const void* origin, void* result, const datatype& type,
int rank, unsigned offset,
const operation& op);
VTKMDIY_MPI_EXPORT_FUNCTION
void fetch(const DIY_MPI_Win& win, void* result, const datatype& type, int rank, unsigned offset);
VTKMDIY_MPI_EXPORT_FUNCTION
void replace(const DIY_MPI_Win& win,
const void* value, const datatype& type,
int rank, unsigned offset);
VTKMDIY_MPI_EXPORT_FUNCTION
void sync(const DIY_MPI_Win& win);
VTKMDIY_MPI_EXPORT_FUNCTION
void flush(const DIY_MPI_Win& win, int rank);
VTKMDIY_MPI_EXPORT_FUNCTION
void flush_all(const DIY_MPI_Win& win);
VTKMDIY_MPI_EXPORT_FUNCTION
void flush_local(const DIY_MPI_Win& win, int rank);
VTKMDIY_MPI_EXPORT_FUNCTION
void flush_local_all(const DIY_MPI_Win& win);
} // detail
//! \ingroup MPI
//! Simple wrapper around MPI window functions.
template<class T>
@ -38,7 +117,7 @@ namespace mpi
inline void lock_all(int assert = 0);
inline void unlock_all();
inline void fetch_and_op(const T* origin, T* result, int rank, unsigned offset, MPI_Op op);
inline void fetch_and_op(const T* origin, T* result, int rank, unsigned offset, const operation& op);
inline void fetch(T& result, int rank, unsigned offset);
inline void replace(const T& value, int rank, unsigned offset);
@ -52,30 +131,25 @@ namespace mpi
private:
std::vector<T> buffer_;
int rank_;
#ifndef VTKM_DIY_NO_MPI
MPI_Win window_;
#endif
DIY_MPI_Win window_;
};
} // mpi
} // diy
template<class T>
diy::mpi::window<T>::
window(const communicator& comm, unsigned size):
window(const diy::mpi::communicator& comm, unsigned size):
buffer_(size), rank_(comm.rank())
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_create(buffer_.data(), buffer_.size()*sizeof(T), sizeof(T), MPI_INFO_NULL, comm, &window_);
#endif
window_ = detail::win_create(comm, buffer_.data(), static_cast<unsigned>(buffer_.size()*sizeof(T)), static_cast<int>(sizeof(T)));
}
template<class T>
diy::mpi::window<T>::
~window()
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_free(&window_);
#endif
detail::win_free(window_);
}
template<class T>
@ -83,15 +157,7 @@ void
diy::mpi::window<T>::
put(const T& x, int rank, unsigned offset)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Put(address(x), count(x), datatype(x),
rank,
offset,
count(x), datatype(x),
window_);
#else
buffer_[offset] = x;
#endif
detail::put(window_, address(x), count(x), datatype_of(x), rank, offset);
}
template<class T>
@ -99,16 +165,7 @@ void
diy::mpi::window<T>::
put(const std::vector<T>& x, int rank, unsigned offset)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Put(address(x), count(x), datatype(x),
rank,
offset,
count(x), datatype(x),
window_);
#else
for (size_t i = 0; i < x.size(); ++i)
buffer_[offset + i] = x[i];
#endif
detail::put(window_, address(x), count(x), datatype_of(x), rank, offset);
}
template<class T>
@ -116,15 +173,7 @@ void
diy::mpi::window<T>::
get(T& x, int rank, unsigned offset)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Get(address(x), count(x), datatype(x),
rank,
offset,
count(x), datatype(x),
window_);
#else
x = buffer_[offset];
#endif
detail::get(window_, address(x), count(x), datatype_of(x), rank, offset);
}
template<class T>
@ -132,16 +181,7 @@ void
diy::mpi::window<T>::
get(std::vector<T>& x, int rank, unsigned offset)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Get(address(x), count(x), datatype(x),
rank,
offset,
count(x), datatype(x),
window_);
#else
for (size_t i = 0; i < x.size(); ++i)
x[i] = buffer_[offset + i];
#endif
detail::get(window_, address(x), count(x), datatype_of(x), rank, offset);
}
template<class T>
@ -149,9 +189,7 @@ void
diy::mpi::window<T>::
fence(int assert)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_fence(assert, window_);
#endif
detail::fence(window_, assert);
}
template<class T>
@ -159,9 +197,7 @@ void
diy::mpi::window<T>::
lock(int lock_type, int rank, int assert)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_lock(lock_type, rank, assert, window_);
#endif
detail::lock(window_, lock_type, rank, assert);
}
template<class T>
@ -169,9 +205,7 @@ void
diy::mpi::window<T>::
unlock(int rank)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_unlock(rank, window_);
#endif
detail::unlock(window_, rank);
}
template<class T>
@ -179,9 +213,7 @@ void
diy::mpi::window<T>::
lock_all(int assert)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_lock_all(assert, window_);
#endif
detail::lock_all(window_, assert);
}
template<class T>
@ -189,20 +221,15 @@ void
diy::mpi::window<T>::
unlock_all()
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_unlock_all(window_);
#endif
detail::unlock_all(window_);
}
template<class T>
void
diy::mpi::window<T>::
fetch_and_op(const T* origin, T* result, int rank, unsigned offset, MPI_Op op)
fetch_and_op(const T* origin, T* result, int rank, unsigned offset, const diy::mpi::operation& op)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Fetch_and_op(origin, result, datatype(*origin), rank, offset, op, window_);
#else
DIY_UNSUPPORTED_MPI_CALL(MPI_Fetch_and_op);
#endif
detail::fetch_and_op(window_, origin, result, datatype_of(*origin), rank, offset, op);
}
template<class T>
@ -210,12 +237,7 @@ void
diy::mpi::window<T>::
fetch(T& result, int rank, unsigned offset)
{
#ifndef VTKM_DIY_NO_MPI
T unused;
fetch_and_op(&unused, &result, rank, offset, MPI_NO_OP);
#else
result = buffer_[offset];
#endif
detail::fetch(window_, &result, datatype_of(result), rank, offset);
}
template<class T>
@ -223,12 +245,7 @@ void
diy::mpi::window<T>::
replace(const T& value, int rank, unsigned offset)
{
#ifndef VTKM_DIY_NO_MPI
T unused;
fetch_and_op(&value, &unused, rank, offset, MPI_REPLACE);
#else
buffer_[offset] = value;
#endif
detail::replace(window_, &value, datatype_of(value), rank, offset);
}
template<class T>
@ -236,9 +253,7 @@ void
diy::mpi::window<T>::
sync()
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_sync(window_);
#endif
detail::sync(window_);
}
template<class T>
@ -246,9 +261,7 @@ void
diy::mpi::window<T>::
flush(int rank)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_flush(rank, window_);
#endif
detail::flush(window_, rank);
}
template<class T>
@ -256,9 +269,7 @@ void
diy::mpi::window<T>::
flush_all()
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_flush_all(window_);
#endif
detail::flush_all(window_);
}
template<class T>
@ -266,9 +277,7 @@ void
diy::mpi::window<T>::
flush_local(int rank)
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_flush_local(rank, window_);
#endif
detail::flush_local(window_, rank);
}
template<class T>
@ -276,7 +285,11 @@ void
diy::mpi::window<T>::
flush_local_all()
{
#ifndef VTKM_DIY_NO_MPI
MPI_Win_flush_local_all(window_);
#endif
detail::flush_local_all(window_);
}
#ifndef VTKMDIY_MPI_AS_LIB
#include "window.cpp"
#endif
#endif // VTKMDIY_MPI_WINODW_HPP

@ -18,6 +18,8 @@ namespace diy
template<class Function, class... Args>
explicit thread(Function&& f, Args&&... args) { f(args...); } // not ideal, since it doesn't support member functions
thread& operator=(thread&&) = default;
void join() {}
static unsigned hardware_concurrency() { return 1; }
@ -31,8 +33,13 @@ namespace diy
struct lock_guard
{
lock_guard(T&) {}
void lock() {}
void unlock() {}
};
template<class T, class U>
using concurrent_map = std::map<T,U>;
namespace this_thread
{
inline unsigned long int get_id() { return 0; }

@ -2,63 +2,74 @@
#define VTKMDIY_PICK_HPP
#include "link.hpp"
#include "constants.h" // for DEPRECATED
namespace diy
{
template<class Bounds, class Point, class OutIter>
void near(const RegularLink<Bounds>& link, const Point& p, float r, OutIter out,
const Bounds& domain);
template<class Bounds, class Point, class OutIter, class Coordinate>
void near(const RegularLink<Bounds>& link, const Point& p, Coordinate r, OutIter out, const Bounds& domain);
template<class Bounds, class Point, class OutIter>
void in(const RegularLink<Bounds>& link, const Point& p, OutIter out, const Bounds& domain);
void in(const RegularLink<Bounds>& link, const Point& p, OutIter out, const Bounds& domain, bool core = true);
template<class Point, class Bounds>
float distance(int dim, const Bounds& bounds, const Point& p);
template<class Point, class Bounds, class Out = double>
Out distance(const Bounds& bounds, const Point& p);
template<class Point, class Bounds, class Out = double>
DEPRECATED("Use distance(const Bounds& bounds, const Point& p) instead.")
Out distance(int dim, const Bounds& bounds, const Point& p);
template<class Bounds, class Out = double>
Out distance(const Bounds& bounds1, const Bounds& bounds2);
template<class Bounds, class Out = double>
DEPRECATED("Use distance(const Bounds& bounds1, const Bounds& bounds2) instead.")
Out distance(int dim, const Bounds& bounds1, const Bounds& bounds2);
template<class Bounds>
inline
float distance(int dim, const Bounds& bounds1, const Bounds& bounds2);
void wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain);
template<class Bounds>
DEPRECATED("Use wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain) instead.")
void wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain, int dim);
}
//! Finds the neighbors within radius r of a target point.
template<class Bounds, class Point, class OutIter>
template<class Bounds, class Point, class OutIter, class Coordinate>
void
diy::
near(const RegularLink<Bounds>& link, //!< neighbors
const Point& p, //!< target point (must be in current block)
float r, //!< target radius (>= 0.0)
Coordinate r, //!< target radius (>= 0.0)
OutIter out, //!< insert iterator for output set of neighbors
const Bounds& domain) //!< global domain bounds
{
Bounds neigh_bounds; // neighbor block bounds
Bounds neigh_bounds {0}; // neighbor block bounds
// for all neighbors of this block
for (int n = 0; n < link.size(); n++)
{
// wrap neighbor bounds, if necessary, otherwise bounds will be unchanged
neigh_bounds = link.bounds(n);
wrap_bounds(neigh_bounds, link.wrap(n), domain, link.dimension());
wrap_bounds(neigh_bounds, link.wrap(n), domain);
if (distance(link.dimension(), neigh_bounds, p) <= r)
if (distance(neigh_bounds, p) <= r)
*out++ = n;
} // for all neighbors
}
//! Find the distance between point `p` and box `bounds`.
template<class Point, class Bounds>
float
template<class Point, class Bounds, class Out>
Out
diy::
distance(int dim, const Bounds& bounds, const Point& p)
distance(const Bounds& bounds, const Point& p)
{
float res = 0;
for (int i = 0; i < dim; ++i)
Out res = 0;
for (int i = 0; i < p.size(); ++i)
{
// avoids all the annoying case logic by finding
// diff = max(bounds.min[i] - p[i], 0, p[i] - bounds.max[i])
float diff = 0, d;
Out diff = 0, d;
d = bounds.min[i] - p[i];
if (d > diff) diff = d;
@ -70,18 +81,68 @@ distance(int dim, const Bounds& bounds, const Point& p)
return sqrt(res);
}
template<class Bounds>
float
// DEPRECATED
//! Find the distance between point `p` and box `bounds`.
template<class Point, class Bounds, class Out>
Out
diy::
distance(int dim, const Bounds& bounds, const Point& p)
{
Out res = 0;
for (int i = 0; i < dim; ++i)
{
// avoids all the annoying case logic by finding
// diff = max(bounds.min[i] - p[i], 0, p[i] - bounds.max[i])
Out diff = 0, d;
d = bounds.min[i] - p[i];
if (d > diff) diff = d;
d = p[i] - bounds.max[i];
if (d > diff) diff = d;
res += diff*diff;
}
return sqrt(res);
}
template<class Bounds, class Out>
Out
diy::
distance(const Bounds& bounds1, const Bounds& bounds2)
{
Out res = 0;
for (int i = 0; i < bounds1.min.size(); ++i) // assume min, max of both bounds have same size
{
Out diff = 0, d;
Out d1 = bounds1.max[i] - bounds2.min[i];
Out d2 = bounds2.max[i] - bounds1.min[i];
if (d1 > 0 && d2 > 0)
diff = 0;
else if (d1 <= 0)
diff = -d1;
else if (d2 <= 0)
diff = -d2;
res += diff*diff;
}
return sqrt(res);
}
// DEPRECATED
template<class Bounds, class Out>
Out
diy::
distance(int dim, const Bounds& bounds1, const Bounds& bounds2)
{
float res = 0;
Out res = 0;
for (int i = 0; i < dim; ++i)
{
float diff = 0, d;
Out diff = 0, d;
float d1 = bounds1.max[i] - bounds2.min[i];
float d2 = bounds2.max[i] - bounds1.min[i];
Out d1 = bounds1.max[i] - bounds2.min[i];
Out d2 = bounds2.max[i] - bounds1.min[i];
if (d1 > 0 && d2 > 0)
diff = 0;
@ -102,22 +163,43 @@ diy::
in(const RegularLink<Bounds>& link, //!< neighbors
const Point& p, //!< target point
OutIter out, //!< insert iterator for output set of neighbors
const Bounds& domain) //!< global domain bounds
const Bounds& domain, //!< global domain bounds
bool core) //!< check against core (or bounds, if false)
{
Bounds neigh_bounds; // neighbor block bounds
Bounds neigh_bounds {0}; // neighbor block bounds
// for all neighbors of this block
for (int n = 0; n < link.size(); n++)
{
// wrap neighbor bounds, if necessary, otherwise bounds will be unchanged
neigh_bounds = link.bounds(n);
wrap_bounds(neigh_bounds, link.wrap(n), domain, link.dimension());
if (core)
neigh_bounds = link.core(n);
else
neigh_bounds = link.bounds(n);
if (distance(link.dimension(), neigh_bounds, p) == 0)
// wrap neighbor bounds, if necessary, otherwise bounds will be unchanged
wrap_bounds(neigh_bounds, link.wrap(n), domain);
if (distance(neigh_bounds, p) == 0)
*out++ = n;
} // for all neighbors
}
// wraps block bounds
// wrap dir is the wrapping direction from original block to wrapped neighbor block
// overall domain bounds and dimensionality are also needed
template<class Bounds>
void
diy::
wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain)
{
for (int i = 0; i < bounds.min.size(); ++i) // assume min, max of bounds, domain have same size
{
bounds.min[i] += wrap_dir[i] * (domain.max[i] - domain.min[i]);
bounds.max[i] += wrap_dir[i] * (domain.max[i] - domain.min[i]);
}
}
// DEPRECATED
// wraps block bounds
// wrap dir is the wrapping direction from original block to wrapped neighbor block
// overall domain bounds and dimensionality are also needed
@ -133,5 +215,4 @@ wrap_bounds(Bounds& bounds, Direction wrap_dir, const Bounds& domain, int dim)
}
}
#endif

@ -8,6 +8,8 @@
#include <array>
#include "constants.h" // for DEPRECATED
namespace diy
{
@ -53,7 +55,9 @@ class Point: public std::array<Coordinate_, D>
Point& operator*=(Coordinate a) { for (unsigned i = 0; i < D; ++i) (*this)[i] *= a; return *this; }
Point& operator/=(Coordinate a) { for (unsigned i = 0; i < D; ++i) (*this)[i] /= a; return *this; }
Coordinate norm() const { return (*this)*(*this); }
DEPRECATED("Use norm2 instead")
Coordinate norm() const { return norm2(); }
Coordinate norm2() const { return (*this)*(*this); }
std::ostream& operator<<(std::ostream& out) const { out << (*this)[0]; for (unsigned i = 1; i < D; ++i) out << " " << (*this)[i]; return out; }
std::istream& operator>>(std::istream& in);
@ -117,4 +121,4 @@ operator>>(std::istream& in, Point<C,D>& p)
}
#endif // DIY_POINT_HPP
#endif // VTKMDIY_POINT_HPP

@ -10,29 +10,99 @@ namespace diy
template <class T>
struct EnqueueIterator;
Proxy(Master* master__, int gid__):
using IncomingQueues = std::map<int, MemoryBuffer>;
using OutgoingQueues = std::map<BlockID, MemoryBuffer>;
Proxy(Master* master__, int gid__,
IExchangeInfo* iexchange__ = 0):
gid_(gid__),
master_(master__),
incoming_(&master__->incoming(gid__)),
outgoing_(&master__->outgoing(gid__)),
collectives_(&master__->collectives(gid__)) {}
iexchange_(iexchange__),
collectives_(&master__->collectives(gid__))
{
fill_incoming();
// move outgoing_ back into proxy, in case it's a multi-foreach round
if (!iexchange_)
for (auto& x : master_->outgoing(gid_))
{
auto access = x.second.access();
if (!access->empty())
{
outgoing_.emplace(x.first, access->back().move());
access->pop_back();
}
}
}
// delete copy constructor to avoid coping incoming_ and outgoing_ (plus it
// won't work otherwise because MemoryBuffer has a deleted copy
// constructor)
Proxy(const Proxy&) =delete;
Proxy(Proxy&&) =default;
Proxy& operator=(const Proxy&) =delete;
Proxy& operator=(Proxy&&) =default;
~Proxy()
{
auto& outgoing = master_->outgoing(gid_);
auto& incoming = master_->incoming(gid_);
// copy out outgoing_
for (auto& x : outgoing_)
{
outgoing[x.first].access()->emplace_back(std::move(x.second));
if (iexchange_)
iexchange_->inc_work();
}
// move incoming_ back into master, in case it's a multi-foreach round
if (!iexchange_)
for (auto& x : incoming_)
incoming[x.first].access()->emplace_front(std::move(x.second));
}
int gid() const { return gid_; }
bool fill_incoming() const
{
bool exists = false;
incoming_.clear();
// fill incoming_
for (auto& x : master_->incoming(gid_))
{
auto access = x.second.access();
if (!access->empty())
{
exists = true;
incoming_.emplace(x.first, access->front().move());
access->pop_front();
if (iexchange_)
iexchange_->dec_work();
}
}
return exists;
}
//! Enqueue data whose size can be determined automatically, e.g., an STL vector.
template<class T>
void enqueue(const BlockID& to, //!< target block (gid,proc)
const T& x, //!< data (eg. STL vector)
void (*save)(BinaryBuffer&, const T&) = &::diy::save<T> //!< optional serialization function
void (*save)(BinaryBuffer&, const T&) = &::diy::save //!< optional serialization function
) const
{ OutgoingQueues& out = *outgoing_; save(out[to], x); }
{
save(outgoing_[to], x);
}
//! Enqueue data whose size is given explicitly by the user, e.g., an array.
template<class T>
void enqueue(const BlockID& to, //!< target block (gid,proc)
const T* x, //!< pointer to the data (eg. address of start of vector)
size_t n, //!< size in data elements (eg. ints)
void (*save)(BinaryBuffer&, const T&) = &::diy::save<T> //!< optional serialization function
void (*save)(BinaryBuffer&, const T&) = &::diy::save //!< optional serialization function
) const;
//! Dequeue data whose size can be determined automatically (e.g., STL vector) and that was
@ -41,9 +111,9 @@ namespace diy
template<class T>
void dequeue(int from, //!< target block gid
T& x, //!< data (eg. STL vector)
void (*load)(BinaryBuffer&, T&) = &::diy::load<T> //!< optional serialization function
void (*load)(BinaryBuffer&, T&) = &::diy::load //!< optional serialization function
) const
{ IncomingQueues& in = *incoming_; load(in[from], x); }
{ load(incoming_[from], x); }
//! Dequeue an array of data whose size is given explicitly by the user.
//! In this case, the user needs to allocate the receive buffer prior to calling dequeue.
@ -51,7 +121,7 @@ namespace diy
void dequeue(int from, //!< target block gid
T* x, //!< pointer to the data (eg. address of start of vector)
size_t n, //!< size in data elements (eg. ints)
void (*load)(BinaryBuffer&, T&) = &::diy::load<T> //!< optional serialization function
void (*load)(BinaryBuffer&, T&) = &::diy::load //!< optional serialization function
) const;
//! Dequeue data whose size can be determined automatically (e.g., STL vector) and that was
@ -60,7 +130,7 @@ namespace diy
template<class T>
void dequeue(const BlockID& from, //!< target block (gid,proc)
T& x, //!< data (eg. STL vector)
void (*load)(BinaryBuffer&, T&) = &::diy::load<T> //!< optional serialization function
void (*load)(BinaryBuffer&, T&) = &::diy::load //!< optional serialization function
) const { dequeue(from.gid, x, load); }
//! Dequeue an array of data whose size is given explicitly by the user.
@ -69,20 +139,24 @@ namespace diy
void dequeue(const BlockID& from, //!< target block (gid,proc)
T* x, //!< pointer to the data (eg. address of start of vector)
size_t n, //!< size in data elements (eg. ints)
void (*load)(BinaryBuffer&, T&) = &::diy::load<T> //!< optional serialization function
void (*load)(BinaryBuffer&, T&) = &::diy::load //!< optional serialization function
) const { dequeue(from.gid, x, n, load); }
template<class T>
EnqueueIterator<T> enqueuer(const T& x,
void (*save)(BinaryBuffer&, const T&) = &::diy::save<T>) const
void (*save)(BinaryBuffer&, const T&) = &::diy::save ) const
{ return EnqueueIterator<T>(this, x, save); }
IncomingQueues* incoming() const { return incoming_; }
MemoryBuffer& incoming(int from) const { return (*incoming_)[from]; }
IncomingQueues* incoming() const { return &incoming_; }
MemoryBuffer& incoming(int from) const { return incoming_[from]; }
inline void incoming(std::vector<int>& v) const; // fill v with every gid from which we have a message
OutgoingQueues* outgoing() const { return outgoing_; }
MemoryBuffer& outgoing(const BlockID& to) const { return (*outgoing_)[to]; }
OutgoingQueues* outgoing() const { return &outgoing_; }
MemoryBuffer& outgoing(const BlockID& to) const { return outgoing_[to]; }
inline bool empty_incoming_queues() const;
inline bool empty_outgoing_queues() const;
inline bool empty_queues() const;
/**
* \ingroup Communication
@ -118,12 +192,18 @@ namespace diy
CollectivesList* collectives() const { return collectives_; }
Master* master() const { return master_; }
IExchangeInfo* iexchange() const { return iexchange_; }
private:
int gid_;
Master* master_;
IncomingQueues* incoming_;
OutgoingQueues* outgoing_;
IExchangeInfo* iexchange_;
// TODO: these are marked mutable to not have to undo consts on enqueue/dequeue, in case it breaks things;
// eventually, implement this change
mutable IncomingQueues incoming_;
mutable OutgoingQueues outgoing_;
CollectivesList* collectives_;
};
@ -151,14 +231,12 @@ namespace diy
struct Master::ProxyWithLink: public Master::Proxy
{
ProxyWithLink(const Proxy& proxy,
ProxyWithLink(Proxy&& proxy,
void* block__,
Link* link__,
IExchangeInfo* iexchange__ = 0):
Proxy(proxy),
Link* link__):
Proxy(std::move(proxy)),
block_(block__),
link_(link__),
iexchange_(iexchange__) {}
link_(link__) {}
Link* link() const { return link_; }
void* block() const { return block_; }
@ -166,52 +244,6 @@ namespace diy
private:
void* block_;
Link* link_;
IExchangeInfo* iexchange_; // not used for iexchange presently, but later could trigger some special behavior
public:
template<class T>
void enqueue(const BlockID& to,
const T& x,
void (*save)(BinaryBuffer&, const T&) = &::diy::save<T>) const
{
diy::Master::Proxy::enqueue(to, x, save);
if (iexchange_)
master()->icommunicate(iexchange_);
}
template<class T>
void enqueue(const BlockID& to,
const T* x,
size_t n,
void (*save)(BinaryBuffer&, const T&) = &::diy::save<T>) const
{
diy::Master::Proxy::enqueue(to, x, n, save);
if (iexchange_)
master()->icommunicate(iexchange_);
}
template<class T>
void dequeue(int from,
T& x,
void (*load)(BinaryBuffer&, T&) = &::diy::load<T>) const
{
// TODO: uncomment if necessary, try first without icommunicating on dequeue
// if (iexchange_)
// master()->icommunicate(iexchange_);
diy::Master::Proxy::dequeue(from, x, load);
}
template<class T>
void dequeue(int from,
T* x,
size_t n,
void (*load)(BinaryBuffer&, T&) = &::diy::load<T>) const
{
// TODO: uncomment if necessary, try first without icommunicating on dequeue
// if (iexchange_)
// master()->icommunicate(iexchange_);
diy::Master::Proxy::dequeue(from, x, n, load);
}
};
} // diy namespace
@ -219,10 +251,38 @@ void
diy::Master::Proxy::
incoming(std::vector<int>& v) const
{
for (IncomingQueues::const_iterator it = incoming_->begin(); it != incoming_->end(); ++it)
v.push_back(it->first);
for (auto& x : incoming_)
v.push_back(x.first);
}
bool
diy::Master::Proxy::
empty_incoming_queues() const
{
for (auto& x : *incoming())
if (x.second)
return false;
return true;
}
bool
diy::Master::Proxy::
empty_outgoing_queues() const
{
for (auto& x : *outgoing())
if (x.second.size())
return false;
return true;
}
bool
diy::Master::Proxy::
empty_queues() const
{
return empty_incoming_queues() && empty_outgoing_queues();
}
template<class T, class Op>
void
diy::Master::Proxy::
@ -265,8 +325,7 @@ diy::Master::Proxy::
enqueue(const BlockID& to, const T* x, size_t n,
void (*save)(BinaryBuffer&, const T&)) const
{
OutgoingQueues& out = *outgoing_;
BinaryBuffer& bb = out[to];
BinaryBuffer& bb = outgoing_[to];
if (save == (void (*)(BinaryBuffer&, const T&)) &::diy::save<T>)
diy::save(bb, x, n); // optimized for unspecialized types
else
@ -280,8 +339,7 @@ diy::Master::Proxy::
dequeue(int from, T* x, size_t n,
void (*load)(BinaryBuffer&, T&)) const
{
IncomingQueues& in = *incoming_;
BinaryBuffer& bb = in[from];
BinaryBuffer& bb = incoming_[from];
if (load == (void (*)(BinaryBuffer&, T&)) &::diy::load<T>)
diy::load(bb, x, n); // optimized for unspecialized types
else

@ -16,13 +16,13 @@ struct ReduceProxy: public Master::Proxy
{
typedef std::vector<int> GIDVector;
ReduceProxy(const Master::Proxy& proxy, //!< parent proxy
ReduceProxy(Master::Proxy&& proxy, //!< parent proxy
void* block, //!< diy block
unsigned round, //!< current round
const Assigner& assigner, //!< assigner
const GIDVector& incoming_gids, //!< incoming gids in this group
const GIDVector& outgoing_gids): //!< outgoing gids in this group
Master::Proxy(proxy),
Master::Proxy(std::move(proxy)),
block_(block),
round_(round),
assigner_(assigner)
@ -46,13 +46,13 @@ struct ReduceProxy: public Master::Proxy
}
}
ReduceProxy(const Master::Proxy& proxy, //!< parent proxy
ReduceProxy(Master::Proxy&& proxy, //!< parent proxy
void* block, //!< diy block
unsigned round, //!< current round
const Assigner& assigner,
const Link& in_link,
const Link& out_link):
Master::Proxy(proxy),
Master::Proxy(std::move(proxy)),
block_(block),
round_(round),
assigner_(assigner),
@ -170,7 +170,7 @@ namespace detail
{
using Callback = std::function<void(Block*, const ReduceProxy&, const Partners&)>;
ReductionFunctor(unsigned round_, const Callback& reduce_, const Partners& partners_, const Assigner& assigner_):
ReductionFunctor(int round_, const Callback& reduce_, const Partners& partners_, const Assigner& assigner_):
round(round_), reduce(reduce_), partners(partners_), assigner(assigner_) {}
void operator()(Block* b, const Master::ProxyWithLink& cp) const
@ -180,20 +180,20 @@ namespace detail
std::vector<int> incoming_gids, outgoing_gids;
if (round > 0)
partners.incoming(round, cp.gid(), incoming_gids, *cp.master()); // receive from the previous round
if (round < partners.rounds())
if (round < static_cast<int>(partners.rounds()))
partners.outgoing(round, cp.gid(), outgoing_gids, *cp.master()); // send to the next round
ReduceProxy rp(cp, b, round, assigner, incoming_gids, outgoing_gids);
ReduceProxy rp(std::move(const_cast<Master::ProxyWithLink&>(cp)), b, round, assigner, incoming_gids, outgoing_gids);
reduce(b, rp, partners);
// touch the outgoing queues to make sure they exist
Master::OutgoingQueues& outgoing = *cp.outgoing();
if (outgoing.size() < (size_t) rp.out_link().size())
for (int j = 0; j < rp.out_link().size(); ++j)
outgoing[rp.out_link().target(j)]; // touch the outgoing queue, creating it if necessary
Master::Proxy::OutgoingQueues& outgoing = *rp.outgoing();
if (outgoing.size() < static_cast<size_t>(rp.out_link().size()))
for (BlockID target : rp.out_link().neighbors())
outgoing[target]; // touch the outgoing queue, creating it if necessary
}
unsigned round;
int round;
Callback reduce;
Partners partners;
const Assigner& assigner;
@ -213,4 +213,4 @@ namespace detail
} // diy
#endif // DIY_REDUCE_HPP
#endif // VTKMDIY_REDUCE_HPP

@ -13,6 +13,8 @@
#include <unordered_set>
#include <type_traits> // this is used for a safety check for default serialization
#include <cassert>
namespace diy
{
//! A serialization buffer. \ingroup Serialization
@ -30,6 +32,11 @@ namespace diy
MemoryBuffer(size_t position_ = 0):
position(position_) {}
MemoryBuffer(MemoryBuffer&&) =default;
MemoryBuffer(const MemoryBuffer&) =delete;
MemoryBuffer& operator=(MemoryBuffer&&) =default;
MemoryBuffer& operator=(const MemoryBuffer&) =delete;
virtual inline void save_binary(const char* x, size_t count) override; //!< copy `count` bytes from `x` into the buffer
virtual inline void append_binary(const char* x, size_t count) override; //!< append `count` bytes from `x` to end of buffer
virtual inline void load_binary(char* x, size_t count) override; //!< copy `count` bytes into `x` from the buffer
@ -52,7 +59,7 @@ namespace diy
static float growth_multiplier() { return 1.5; }
// simple file IO
void write(const std::string& fn) const { std::ofstream out(fn.c_str()); out.write(&buffer[0], size()); }
void write(const std::string& fn) const { std::ofstream out(fn.c_str()); out.write(&buffer[0], static_cast<std::streamsize>(size())); }
void read(const std::string& fn)
{
std::ifstream in(fn.c_str(), std::ios::binary | std::ios::ate);
@ -99,22 +106,16 @@ namespace diy
// 20150422 == 5.1
// 20141030 == 4.9.2
// See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html#abi.versioning.__GLIBCXX__
#if defined(__GLIBCXX__) && \
#if !(defined(__GLIBCXX__) && \
(__GLIBCXX__ < 20150422 || __GLIBCXX__ == 20160726 || __GLIBCXX__ == 20150626 || \
__GLIBCXX__ == 20150623)
#define VTKMDIY_USING_GLIBCXX_4
#endif
#if !defined(VTKMDIY_USING_GLIBCXX_4)
__GLIBCXX__ == 20150623))
//exempt glibcxx-4 variants as they don't have is_trivially_copyable implemented
static_assert(std::is_trivially_copyable<T>::value, "Default serialization works only for trivially copyable types");
#else
# undef VTKMDIY_USING_GLIBCXX_4
#endif
static void save(BinaryBuffer& bb, const T& x) { bb.save_binary((const char*) &x, sizeof(T)); }
static void load(BinaryBuffer& bb, T& x) { bb.load_binary((char*) &x, sizeof(T)); }
static size_t size(const T& x) { return sizeof(T); }
static size_t size(const T&) { return sizeof(T); }
};
//! Saves `x` to `bb` by calling `diy::Serialization<T>::save(bb,x)`.
@ -185,14 +186,16 @@ namespace diy
static void save(BinaryBuffer& bb, const MemoryBuffer& x)
{
diy::save(bb, x.position);
diy::save(bb, &x.buffer[0], x.position);
if (x.position > 0)
diy::save(bb, &x.buffer[0], x.position);
}
static void load(BinaryBuffer& bb, MemoryBuffer& x)
{
diy::load(bb, x.position);
x.buffer.resize(x.position);
diy::load(bb, &x.buffer[0], x.position);
if (x.position > 0)
diy::load(bb, &x.buffer[0], x.position);
}
static size_t size(const MemoryBuffer& x)
@ -219,7 +222,7 @@ namespace diy
{
size_t s;
diy::load(bb, s);
v.resize(s);
v.resize(s, U());
if (s > 0)
diy::load(bb, &v[0], s);
}
@ -242,7 +245,7 @@ namespace diy
{
size_t s;
diy::load(bb, s);
v.resize(s);
v.resize(s, U());
if (s > 0)
diy::load(bb, &v[0], s);
}

@ -4,15 +4,83 @@
#include <chrono>
#include <string>
#include <vector>
#include <unordered_map>
#include "log.hpp" // need this for format
#include "log.hpp"
#if defined(VTKMDIY_USE_CALIPER)
#include <caliper/cali.h>
#include <caliper/common/Variant.h>
#endif
namespace diy
{
namespace stats
{
#if defined(DIY_PROFILE)
inline
std::ostream&
operator<<(std::ostream& out, const std::chrono::high_resolution_clock::duration& d)
{
auto time = std::chrono::duration_cast<std::chrono::microseconds>(d).count();
fmt::print(out, "{:02d}:{:02d}:{:02d}.{:06d}",
time/1000000/60/60,
time/1000000/60 % 60,
time/1000000 % 60,
time % 1000000);
return out;
}
struct DurationAccumulator
{
using Clock = std::chrono::high_resolution_clock;
using Time = Clock::time_point;
using Duration = Clock::duration;
void operator<<(std::string name) { last[name] = Clock::now(); }
void operator>>(std::string name) { duration[name] += Clock::now() - last[name]; }
void clear() { last.clear(); duration.clear(); }
std::unordered_map<std::string, Time> last;
std::unordered_map<std::string, Duration> duration;
void output(std::ostream& out, std::string prefix = "") const
{
if (!prefix.empty())
prefix += " ";
for (auto& x : duration)
out << prefix << x.second << ' ' << x.first << '\n';
}
};
template<class Profiler>
struct ScopedProfile
{
ScopedProfile(Profiler& prof_, std::string name_):
prof(prof_), name(name_), active(true) { prof << name; }
~ScopedProfile() { if (active) prof >> name; }
ScopedProfile(ScopedProfile&& other):
prof(other.prof),
name(other.name),
active(other.active) { other.active = false; }
ScopedProfile&
operator=(ScopedProfile&& other) = delete;
ScopedProfile(const ScopedProfile&) = delete;
ScopedProfile&
operator=(const ScopedProfile&) = delete;
Profiler& prof;
std::string name;
bool active;
};
#if !defined(VTKMDIY_USE_CALIPER)
#if defined(VTKMDIY_PROFILE)
struct Profiler
{
using Clock = std::chrono::high_resolution_clock;
@ -32,28 +100,7 @@ struct Profiler
};
using EventsVector = std::vector<Event>;
struct Scoped
{
Scoped(Profiler& prof_, std::string name_):
prof(prof_), name(name_), active(true) { prof << name; }
~Scoped() { if (active) prof >> name; }
Scoped(Scoped&& other):
prof(other.prof),
name(other.name),
active(other.active) { other.active = false; }
Scoped&
operator=(Scoped&& other) = delete;
Scoped(const Scoped&) = delete;
Scoped&
operator=(const Scoped&) = delete;
Profiler& prof;
std::string name;
bool active;
};
using Scoped = ScopedProfile<Profiler>;
Profiler() { reset_time(); }
@ -62,10 +109,10 @@ struct Profiler
void operator<<(std::string name) { enter(name); }
void operator>>(std::string name) { exit(name); }
void enter(std::string name) { events.push_back(Event(name, true)); }
void exit(std::string name) { events.push_back(Event(name, false)); }
void enter(std::string name) { events.push_back(Event(name, true)); total << name; }
void exit(std::string name) { events.push_back(Event(name, false)); total >> name; }
void output(std::ostream& out, std::string prefix = "")
void output(std::ostream& out, std::string prefix = "") const
{
if (!prefix.empty())
prefix += " ";
@ -73,44 +120,103 @@ struct Profiler
for (size_t i = 0; i < events.size(); ++i)
{
const Event& e = events[i];
auto time = std::chrono::duration_cast<std::chrono::microseconds>(e.stamp - start).count();
fmt::print(out, "{}{:02d}:{:02d}:{:02d}.{:06d} {}{}\n",
prefix,
time/1000000/60/60,
time/1000000/60 % 60,
time/1000000 % 60,
time % 1000000,
(e.begin ? '<' : '>'),
e.name);
out << prefix << (e.stamp - start) << ' ' << (e.begin ? '<' : '>') << e.name << '\n';
}
out << "# Total times:\n";
total.output(out, "# ");
}
Scoped scoped(std::string name) { return Scoped(*this, name); }
void clear() { events.clear(); }
void clear() { events.clear(); total.clear(); }
const DurationAccumulator& totals() const { return total; }
private:
Time start;
EventsVector events;
Time start;
EventsVector events;
DurationAccumulator total;
};
#else
#else // VTKMDIY_PROFILE
struct Profiler
{
struct Scoped {};
using Scoped = ScopedProfile<Profiler>;
void reset_time() {}
void reset_time() {}
void operator<<(std::string) {}
void operator>>(std::string) {}
void operator<<(std::string name) { enter(name); }
void operator>>(std::string name) { exit(name); }
void enter(const std::string&) {}
void exit(const std::string&) {}
void enter(std::string) {}
void exit(std::string) {}
void output(std::ostream&, std::string = "") {}
void clear() {}
void output(std::ostream& out, std::string = "") const
{
out << "# Total times:\n";
total.output(out, "# ");
}
void clear() { total.clear(); }
Scoped scoped(std::string) { return Scoped(); }
Scoped scoped(std::string name) { return Scoped(*this, name); }
const DurationAccumulator&
totals() const { return total; }
private:
DurationAccumulator total;
};
#endif // VTKMDIY_PROFILE
// Annotations don't do anything without Caliper
struct Annotation
{
struct Guard
{
Guard(Annotation&) {}
};
Annotation(const char*) {}
template<class T>
Annotation& set(T) { return *this; }
};
struct Variant
{
template<class T>
Variant(T) {}
};
#else // VTKMDIY_USE_CALIPER
using Annotation = cali::Annotation;
using Variant = cali::Variant;
struct Profiler
{
using Scoped = ScopedProfile<Profiler>;
void reset_time() {}
void operator<<(std::string name) { enter(name); }
void operator>>(std::string name) { exit(name); }
void enter(std::string name) { CALI_MARK_BEGIN(name.c_str()); }
void exit(std::string name) { CALI_MARK_END(name.c_str()); }
void output(std::ostream& out, std::string = "") const {}
void clear() {}
Scoped scoped(std::string name) { return Scoped(*this, name); }
// unused
const DurationAccumulator&
totals() const { return total; }
private:
DurationAccumulator total;
};
#endif
}

@ -26,14 +26,14 @@ namespace diy
virtual inline void save_binary(const char* x, size_t count) override { fwrite(x, 1, count, file); head += count; }
virtual inline void append_binary(const char* x, size_t count) override
{
size_t temp_pos = ftell(file);
auto temp_pos = ftell(file);
fseek(file, static_cast<long>(tail), SEEK_END);
fwrite(x, 1, count, file);
tail += count;
fseek(file, temp_pos, SEEK_SET);
}
virtual inline void load_binary(char* x, size_t count) override { auto n = fread(x, 1, count, file); (void) n;}
virtual inline void load_binary_back(char* x, size_t count) override { fseek(file, static_cast<long>(tail), SEEK_END); auto n = fread(x, 1, count, file); tail += count; fseek(file, static_cast<long>(head), SEEK_SET); (void) n;}
virtual inline void load_binary(char* x, size_t count) override { auto n = fread(x, 1, count, file); VTKMDIY_UNUSED(n);}
virtual inline void load_binary_back(char* x, size_t count) override { fseek(file, static_cast<long>(tail), SEEK_END); auto n = fread(x, 1, count, file); tail += count; fseek(file, static_cast<long>(head), SEEK_SET); VTKMDIY_UNUSED(n);}
size_t size() const { return head; }
@ -135,7 +135,8 @@ namespace diy
_read(fh, &bb.buffer[0], static_cast<unsigned int>(fr.size));
#else
int fh = open(fr.name.c_str(), O_RDONLY | O_SYNC, 0600);
auto n = read(fh, &bb.buffer[0], fr.size); (void) n;
auto n = read(fh, &bb.buffer[0], fr.size);
VTKMDIY_UNUSED(n);
#endif
io::utils::close(fh);
remove_file(fr);

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More