From 6d69301b785b7e7f1493bba0cb42ad5248a0bcc3 Mon Sep 17 00:00:00 2001 From: Vicente Adolfo Bolea Sanchez Date: Fri, 3 Mar 2023 16:37:54 -0500 Subject: [PATCH 1/3] DIY: bump new version --- vtkm/thirdparty/diy/update.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vtkm/thirdparty/diy/update.sh b/vtkm/thirdparty/diy/update.sh index 3af40e5cd..5d7706f72 100755 --- a/vtkm/thirdparty/diy/update.sh +++ b/vtkm/thirdparty/diy/update.sh @@ -8,7 +8,7 @@ readonly name="diy" readonly ownership="Diy Upstream " readonly subtree="vtkm/thirdparty/$name/vtkm$name" readonly repo="https://gitlab.kitware.com/third-party/diy2.git" -readonly tag="for/vtk-m-20220914-master-g0f1c387" +readonly tag="for/vtk-m-20230328-g9bea15a1" readonly paths=" cmake include From 928900c63df9a1dc8d84089cf51f62558c6e3581 Mon Sep 17 00:00:00 2001 From: Diy Upstream Date: Tue, 28 Mar 2023 16:35:18 -0400 Subject: [PATCH 2/3] diy 2023-03-28 (6837fb55) Code extracted from: https://gitlab.kitware.com/third-party/diy2.git at commit 6837fb55f24a9a38dfb2b6a481cc4de5f7ac455d (for/vtk-m-20230328-g9bea15a1). --- CMakeLists.txt | 42 +- cmake/diy-config.cmake.in | 6 +- include/vtkmdiy/collection.hpp | 8 +- .../vtkmdiy/detail/master/communication.hpp | 31 +- include/vtkmdiy/detail/master/execution.hpp | 2 + include/vtkmdiy/dynamic-point.hpp | 6 +- include/vtkmdiy/io/shared.hpp | 4 +- include/vtkmdiy/io/utils.hpp | 2 + include/vtkmdiy/log.hpp | 4 +- include/vtkmdiy/master.hpp | 131 ++- include/vtkmdiy/mpi/config.hpp | 17 +- include/vtkmdiy/mpi/mpi_cast.hpp | 7 +- include/vtkmdiy/mpi/mpitypes.hpp.in | 31 +- include/vtkmdiy/mpi/no-mpi.hpp | 35 +- include/vtkmdiy/mpi/optional.hpp | 2 +- include/vtkmdiy/mpi/window.cpp | 30 +- include/vtkmdiy/mpi/window.hpp | 45 +- include/vtkmdiy/proxy.hpp | 24 + include/vtkmdiy/reduce.hpp | 2 +- include/vtkmdiy/serialization.hpp | 106 ++- include/vtkmdiy/storage.hpp | 14 +- .../{chobo => itlib}/small_vector.hpp | 859 ++++-------------- include/vtkmdiy/thread.hpp | 3 + include/vtkmdiy/version.hpp | 2 +- 24 files changed, 607 insertions(+), 806 deletions(-) rename include/vtkmdiy/thirdparty/{chobo => itlib}/small_vector.hpp (50%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 46861ebdc..c02928b3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,13 @@ macro (diy_dependent_option variable) endif () endmacro () +set (compiler_supports_sanitizers OFF) +if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR + CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR + CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set (compiler_supports_sanitizers ON) +endif () + diy_option (threads "Build DIY with threading" ON) diy_option (log "Build DIY with logging" OFF) diy_option (profile "Build DIY with profiling" OFF) @@ -44,6 +51,8 @@ diy_dependent_option (BUILD_SHARED_LIBS "Create shared libraries if on" diy_dependent_option (build_diy_nompi_lib "Also build the nompi version of diy::mpi" OFF "mpi;build_diy_mpi_lib" OFF) diy_option (build_examples "Build DIY examples" ON) diy_option (build_tests "Build DIY tests" ON) +diy_option (python "Build Python bindings" OFF) +cmake_dependent_option (enable_sanitizers "Build DIY with sanitizer support" OFF "compiler_supports_sanitizers" OFF) # Default to Release if (NOT CMAKE_BUILD_TYPE) @@ -64,9 +73,8 @@ endif () # Logging if (log) - list (APPEND diy_definitions "-DVTKMDIY_USE_SPDLOG") - find_path (SPDLOG_INCLUDE_DIR spdlog/spdlog.h) - list (APPEND diy_include_thirdparty_directories $) + list (APPEND diy_definitions "-DVTMDIY_USE_SPDLOG") + find_package (spdlog REQUIRED) endif() # Profiling @@ -114,8 +122,12 @@ if (NOT DEFINED diy_export_name) set(diy_export_name "diy_targets") endif() -set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib") -set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib") +if (NOT DEFINED CMAKE_ARCHIVE_OUTPUT_DIRECTORY) + set (CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib") +endif() +if (NOT DEFINED CMAKE_LIBRARY_OUTPUT_DIRECTORY) + set (CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib") +endif() # for diy_developer_flags include(DIYCompilerFlags) @@ -152,6 +164,9 @@ function(add_diy_mpi_library use_mpi) target_include_directories(${lib_name} SYSTEM PRIVATE ${diy_include_directories}) # for mpitypes.hpp target_include_directories(${lib_name} SYSTEM PRIVATE ${diy_include_thirdparty_directories}) target_link_libraries(${lib_name} PRIVATE diy_developer_flags) + if (log) + target_link_libraries(${lib_name} PUBLIC spdlog::spdlog_header_only) + endif () if (use_mpi AND TARGET MPI::MPI_CXX) target_link_libraries(${lib_name} PRIVATE MPI::MPI_CXX) endif() @@ -195,6 +210,9 @@ target_include_directories(${diy_prefix} SYSTEM INTERFACE ${diy_include_thirdpar if (diy_include_directories) target_include_directories(${diy_prefix} SYSTEM INTERFACE ${diy_include_directories}) endif() +if (log) + target_link_libraries(${diy_prefix} INTERFACE spdlog::spdlog_header_only) +endif () target_link_libraries(${diy_prefix} INTERFACE ${diy_libraries}) if (NOT build_diy_mpi_lib) if (mpi) @@ -224,6 +242,16 @@ elseif (${diy_prefix}mpi_nompi IN_LIST diy_targets) endif() list(APPEND libraries diy_developer_flags) +# Sanitizers +if (enable_sanitizers) + set(sanitizer "address" CACHE STRING "The sanitizer to use") + + string (APPEND CMAKE_CXX_FLAGS " -fsanitize=${sanitizer}") + string (APPEND CMAKE_C_FLAGS " -fsanitize=${sanitizer}") + string (APPEND CMAKE_EXE_LINKER_FLAGS " -fsanitize=${sanitizer}") + string (APPEND CMAKE_SHARED_LINKER_FLAGS " -fsanitize=${sanitizer}") +endif () + # enable testing and CDash dashboard submission enable_testing () include (CTest) @@ -262,3 +290,7 @@ if (CMAKE_SOURCE_DIR STREQUAL PROJECT_SOURCE_DIR) # Only generate these files wh install(EXPORT ${diy_export_name} NAMESPACE DIY:: DESTINATION "." FILE diy-targets.cmake) install(FILES "${PROJECT_BINARY_DIR}/diy-config.cmake" DESTINATION ".") endif() + +if (python) + add_subdirectory(bindings/python) +endif (python) diff --git a/cmake/diy-config.cmake.in b/cmake/diy-config.cmake.in index 9feb91b65..6f0b07168 100644 --- a/cmake/diy-config.cmake.in +++ b/cmake/diy-config.cmake.in @@ -25,12 +25,10 @@ if (threads) endif() if (log) - find_path(SPDLOG_INCLUDE_DIR "spdlog/spdlog.h") - if (SPDLOG_INCLUDE_DIR STREQUAL "SPDLOG_INCLUDE_DIR-NOTFOUND") + find_package(spdlog ${_diy_find_quietly}) + if (NOT spdlog_FOUND) list(APPEND "${CMAKE_FIND_PACKAGE_NAME}_NOT_FOUND_MESSAGE" "SPDLOG not found") set("${CMAKE_FIND_PACKAGE_NAME}_FOUND" 0) - else() - target_include_directories(DIY::@diy_prefix@ INTERFACE $) endif() endif() diff --git a/include/vtkmdiy/collection.hpp b/include/vtkmdiy/collection.hpp index 206b94b76..e03f061c5 100644 --- a/include/vtkmdiy/collection.hpp +++ b/include/vtkmdiy/collection.hpp @@ -17,10 +17,10 @@ namespace diy typedef std::vector Elements; typedef critical_resource CInt; - typedef void* (*Create)(); - typedef void (*Destroy)(void*); - typedef detail::Save Save; - typedef detail::Load Load; + using Create = std::function; + using Destroy = std::function; + using Save = detail::Save; + using Load = detail::Load; public: Collection(Create create__, diff --git a/include/vtkmdiy/detail/master/communication.hpp b/include/vtkmdiy/detail/master/communication.hpp index c9133ed6f..1f6b0800f 100644 --- a/include/vtkmdiy/detail/master/communication.hpp +++ b/include/vtkmdiy/detail/master/communication.hpp @@ -5,11 +5,13 @@ namespace diy int from, to; int nparts; int round; + int nblobs; }; struct Master::InFlightSend { std::shared_ptr message; + BinaryBlob blob; mpi::request request; MessageInfo info; // for debug purposes @@ -18,12 +20,18 @@ namespace diy struct Master::InFlightRecv { MemoryBuffer message; - MessageInfo info { -1, -1, -1, -1 }; + MessageInfo info { -1, -1, -1, -1, -1 }; bool done = false; + MemoryManagement mem; inline bool recv(mpi::communicator& comm, const mpi::status& status); inline void place(IncomingRound* in, bool unload, ExternalStorage* storage, IExchangeInfo* iexchange); - void reset() { *this = InFlightRecv(); } + void reset() + { + MemoryManagement mem_ = mem; + *this = InFlightRecv(); + mem = mem_; + } }; struct Master::InFlightRecvsMap: public std::map @@ -111,7 +119,7 @@ recv(mpi::communicator& comm, const mpi::status& status) result = true; } - else + else if (info.nparts > 0) { size_t start_idx = message.buffer.size(); size_t count = status.count(); @@ -124,9 +132,24 @@ recv(mpi::communicator& comm, const mpi::status& status) comm.recv(status.source(), status.tag(), window); info.nparts--; + } else if (info.nblobs > 0) + { + size_t count = status.count(); + detail::VectorWindow window; + + char* buffer = mem.allocate(info.to, count); + + window.begin = buffer; + window.count = count; + + comm.recv(status.source(), status.tag(), window); + + message.save_binary_blob(buffer, count, mem.deallocate); + + info.nblobs--; } - if (info.nparts == 0) + if (info.nparts == 0 && info.nblobs == 0) done = true; return result; diff --git a/include/vtkmdiy/detail/master/execution.hpp b/include/vtkmdiy/detail/master/execution.hpp index 4a382a562..85750e9aa 100644 --- a/include/vtkmdiy/detail/master/execution.hpp +++ b/include/vtkmdiy/detail/master/execution.hpp @@ -1,3 +1,5 @@ +#include + struct diy::Master::ProcessBlock { ProcessBlock(Master& master_, diff --git a/include/vtkmdiy/dynamic-point.hpp b/include/vtkmdiy/dynamic-point.hpp index 21a9f6367..4c24551ad 100644 --- a/include/vtkmdiy/dynamic-point.hpp +++ b/include/vtkmdiy/dynamic-point.hpp @@ -7,17 +7,17 @@ #include #include "constants.h" -#include "thirdparty/chobo/small_vector.hpp" +#include "thirdparty/itlib/small_vector.hpp" namespace diy { template -class DynamicPoint: public chobo::small_vector +class DynamicPoint: public itlib::small_vector { public: using Coordinate = Coordinate_; - using Parent = chobo::small_vector; + using Parent = itlib::small_vector; template struct rebind { typedef DynamicPoint type; }; diff --git a/include/vtkmdiy/io/shared.hpp b/include/vtkmdiy/io/shared.hpp index c143b241f..4c2e34444 100644 --- a/include/vtkmdiy/io/shared.hpp +++ b/include/vtkmdiy/io/shared.hpp @@ -30,9 +30,9 @@ class SharedOutFile: public std::ostringstream diy::mpi::gather(world_, contents, all_contents, root_); // write the file serially - std::ofstream out(filename_); + std::ofstream fout(filename_); for (auto& cntnts : all_contents) - out.write(cntnts.data(), cntnts.size()); + fout.write(cntnts.data(), cntnts.size()); } else diy::mpi::gather(world_, contents, root_); } diff --git a/include/vtkmdiy/io/utils.hpp b/include/vtkmdiy/io/utils.hpp index b096289c6..1474e6057 100644 --- a/include/vtkmdiy/io/utils.hpp +++ b/include/vtkmdiy/io/utils.hpp @@ -5,6 +5,8 @@ #include #include #include +#define NOMINMAX +#include #else #include // mkstemp() on Mac #include diff --git a/include/vtkmdiy/log.hpp b/include/vtkmdiy/log.hpp index 292851ccc..57e017c31 100644 --- a/include/vtkmdiy/log.hpp +++ b/include/vtkmdiy/log.hpp @@ -55,8 +55,8 @@ set_logger(Args...) #include #include -#include -#include +#include +#include namespace diy { diff --git a/include/vtkmdiy/master.hpp b/include/vtkmdiy/master.hpp index ccca263f9..bf53df591 100644 --- a/include/vtkmdiy/master.hpp +++ b/include/vtkmdiy/master.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include "link.hpp" #include "collection.hpp" @@ -28,6 +29,23 @@ namespace diy { + + struct MemoryManagement + { + using Allocate = std::function; + using Deallocate = BinaryBlob::Deleter; + using MemCopy = std::function; + + MemoryManagement() = default; + MemoryManagement(Allocate allocate_, Deallocate deallocate_, MemCopy copy_): + allocate(allocate_), deallocate(deallocate_), copy(copy_) {} + + Allocate allocate = [](int /* gid */, size_t n) { return new char[n]; }; + Deallocate deallocate = [](const char* p) { delete[] p; }; + MemCopy copy = [](char* dest, const char* src, size_t count) { std::memcpy(dest, src, count); }; + }; + + // Stores and manages blocks; initiates serialization and communication when necessary. // // Provides a foreach function, which is meant as the main entry point. @@ -126,6 +144,8 @@ namespace diy void unload(ExternalStorage* storage) { size_ = buffer_.size(); external_ = storage->put(buffer_); } void load(ExternalStorage* storage) { storage->get(external_, buffer_); external_ = -1; } + MemoryBuffer& buffer() { return buffer_; } + private: size_t size_; int external_; @@ -147,7 +167,6 @@ namespace diy }; typedef std::map IncomingRoundMap; - public: /** * \ingroup Initialization @@ -173,6 +192,7 @@ namespace diy inline void destroy(int i) { if (blocks_.own()) blocks_.destroy(i); } inline int add(int gid, void* b, Link* l); //!< add a block + inline int add(int gid, void* b, const Link& l){ return add(gid, b, l.clone()); } inline void* release(int i); //!< release ownership of the block //!< return the `i`-th block @@ -213,17 +233,17 @@ namespace diy bool local(int gid__) const { return lids_.find(gid__) != lids_.end(); } //! exchange the queues between all the blocks (collective operation) - inline void exchange(bool remote = false); + inline void exchange(bool remote = false, MemoryManagement mem = MemoryManagement()); //! nonblocking exchange of the queues between all the blocks template - void iexchange_(const ICallback& f); + void iexchange_(const ICallback& f, MemoryManagement mem); template - void iexchange(const F& f) + void iexchange(const F& f, MemoryManagement mem = MemoryManagement()) { using Block = typename detail::block_traits::type; - iexchange_(f); + iexchange_(f, mem); } inline void process_collectives(); @@ -283,29 +303,30 @@ namespace diy public: // Communicator functionality - inline void flush(bool remote = false); // makes sure all the serialized queues migrate to their target processors + inline void flush(bool remote, MemoryManagement mem = MemoryManagement()); // makes sure all the serialized queues migrate to their target processors private: // Communicator functionality - inline void comm_exchange(GidSendOrder& gid_order, IExchangeInfo* iex = 0); - inline void rcomm_exchange(); // possibly called in between block computations + inline void comm_exchange(GidSendOrder& gid_order, MemoryManagement mem, IExchangeInfo* iex = 0); + inline void rcomm_exchange(MemoryManagement mem); // possibly called in between block computations inline bool nudge(IExchangeInfo* iex = 0); - inline void send_queue(int from_gid, int to_gid, int to_proc, QueueRecord& qr, bool remote, IExchangeInfo* iex); + inline void send_queue(int from_gid, int to_gid, int to_proc, QueueRecord& qr, bool remote, MemoryManagement mem, IExchangeInfo* iex); inline void send_outgoing_queues(GidSendOrder& gid_order, bool remote, + MemoryManagement mem, IExchangeInfo* iex = 0); - inline void check_incoming_queues(IExchangeInfo* iex = 0); + inline void check_incoming_queues(MemoryManagement mem, IExchangeInfo* iex = 0); inline GidSendOrder order_gids(); inline void touch_queues(); - inline void send_same_rank(int from, int to, QueueRecord& qr, IExchangeInfo* iex); + inline void send_same_rank(int from, int to, QueueRecord& qr, MemoryManagement mem, IExchangeInfo* iex); inline void send_different_rank(int from, int to, int proc, QueueRecord& qr, bool remote, IExchangeInfo* iex); inline InFlightRecv& inflight_recv(int proc); inline InFlightSendsList& inflight_sends(); // iexchange commmunication - inline void icommunicate(IExchangeInfo* iex); // async communication + inline void icommunicate(IExchangeInfo* iex, MemoryManagement mem); // async communication struct tags { enum { queue, @@ -607,7 +628,7 @@ foreach_(const Callback& f, const Skip& skip) void diy::Master:: -exchange(bool remote) +exchange(bool remote, MemoryManagement mem) { auto scoped = prof.scoped("exchange"); VTKMDIY_UNUSED(scoped); @@ -625,7 +646,7 @@ exchange(bool remote) if (!remote) touch_queues(); - flush(remote); + flush(remote, mem); log->debug("Finished exchange"); } @@ -658,7 +679,7 @@ touch_queues() template void diy::Master:: -iexchange_(const ICallback& f) +iexchange_(const ICallback& f, MemoryManagement mem) { auto scoped = prof.scoped("iexchange"); VTKMDIY_UNUSED(scoped); @@ -685,11 +706,11 @@ iexchange_(const ICallback& f) thread comm_thread; if (threads() > 1) - comm_thread = thread([this,&iex]() + comm_thread = thread([this,&iex,mem]() { while(!iex.all_done()) { - icommunicate(&iex); + icommunicate(&iex, mem); iex.control(); //std::this_thread::sleep_for(std::chrono::microseconds(1)); } @@ -713,7 +734,7 @@ iexchange_(const ICallback& f) stats::Annotation::Guard g( stats::Annotation("diy.block").set(gid) ); if (threads() == 1) - icommunicate(&iex); + icommunicate(&iex, mem); bool done = done_result[gid]; if (!done || !empty_incoming(gid)) { @@ -762,17 +783,17 @@ iexchange_(const ICallback& f) /* Communicator */ void diy::Master:: -comm_exchange(GidSendOrder& gid_order, IExchangeInfo* iex) +comm_exchange(GidSendOrder& gid_order, MemoryManagement mem, IExchangeInfo* iex) { auto scoped = prof.scoped("comm-exchange"); VTKMDIY_UNUSED(scoped); - send_outgoing_queues(gid_order, false, iex); + send_outgoing_queues(gid_order, false, mem, iex); while(nudge(iex)) // kick requests ; - check_incoming_queues(iex); + check_incoming_queues(mem, iex); } /* Remote communicator */ @@ -803,7 +824,7 @@ comm_exchange(GidSendOrder& gid_order, IExchangeInfo* iex) // void diy::Master:: -rcomm_exchange() +rcomm_exchange(MemoryManagement mem) { bool done = false; bool ibarr_act = false; @@ -814,12 +835,12 @@ rcomm_exchange() while (!done) { - send_outgoing_queues(gid_order, true, 0); + send_outgoing_queues(gid_order, true, mem, 0); // kick requests nudge(); - check_incoming_queues(); + check_incoming_queues(mem); if (ibarr_act) { if (ibarr_req.test()) @@ -877,7 +898,7 @@ order_gids() // iexchange communicator void diy::Master:: -icommunicate(IExchangeInfo* iex) +icommunicate(IExchangeInfo* iex, MemoryManagement mem) { auto scoped = prof.scoped("icommunicate"); VTKMDIY_UNUSED(scoped); @@ -887,7 +908,7 @@ icommunicate(IExchangeInfo* iex) auto gid_order = order_gids(); // exchange - comm_exchange(gid_order, iex); + comm_exchange(gid_order, mem, iex); // cleanup @@ -906,6 +927,7 @@ send_queue(int from_gid, int to_proc, QueueRecord& qr, bool remote, + MemoryManagement mem, IExchangeInfo* iex) { stats::Annotation::Guard gb( stats::Annotation("diy.block").set(from_gid) ); @@ -917,7 +939,7 @@ send_queue(int from_gid, log->debug("[{}] Sending queue: {} <- {} of size {}, iexchange = {}", comm_.rank(), to_gid, from_gid, qr.size(), iex ? 1 : 0); if (to_proc == comm_.rank()) // sending to same rank, simply swap buffers - send_same_rank(from_gid, to_gid, qr, iex); + send_same_rank(from_gid, to_gid, qr, mem, iex); else // sending to an actual message to a different rank send_different_rank(from_gid, to_gid, to_proc, qr, remote, iex); } @@ -926,6 +948,7 @@ void diy::Master:: send_outgoing_queues(GidSendOrder& gid_order, bool remote, // TODO: are remote and iexchange mutually exclusive? If so, use single enum? + MemoryManagement mem, IExchangeInfo* iex) { auto scoped = prof.scoped("send-outgoing-queues"); @@ -950,7 +973,7 @@ send_outgoing_queues(GidSendOrder& gid_order, access.unlock(); // others can push on this queue, while we are working assert(!qr.external()); log->debug("Processing queue: {} <- {} of size {}", to_gid, from, qr.size()); - send_queue(from, to_gid, to_proc, qr, remote, iex); + send_queue(from, to_gid, to_proc, qr, remote, mem, iex); access.lock(); } } @@ -978,7 +1001,7 @@ send_outgoing_queues(GidSendOrder& gid_order, // NB: send only front auto& qr = access->front(); log->debug("Processing queue: {} <- {} of size {}", to_gid, from_gid, qr.size()); - send_queue(from_gid, to_gid, to_proc, qr, remote, iex); + send_queue(from_gid, to_gid, to_proc, qr, remote, mem, iex); access->pop_front(); } } @@ -987,7 +1010,7 @@ send_outgoing_queues(GidSendOrder& gid_order, void diy::Master:: -send_same_rank(int from, int to, QueueRecord& qr, IExchangeInfo*) +send_same_rank(int from, int to, QueueRecord& qr, MemoryManagement mem, IExchangeInfo*) { auto scoped = prof.scoped("send-same-rank"); @@ -997,9 +1020,24 @@ send_same_rank(int from, int to, QueueRecord& qr, IExchangeInfo*) auto access_incoming = current_incoming.map[to][from].access(); + // save blobs to copy them explicitly + std::vector blobs; + qr.buffer().blobs.swap(blobs); + qr.buffer().blob_position = 0; + access_incoming->emplace_back(std::move(qr)); QueueRecord& in_qr = access_incoming->back(); + // copy blobs explicitly; we cannot just move them in place, since we don't + // own their memory and must guarantee that it's safe to free, once + // exchange() is done + for (BinaryBlob& blob : blobs) + { + char* p = mem.allocate(to, blob.size); + mem.copy(p, blob.pointer.get(), blob.size); + in_qr.buffer().save_binary_blob(p, blob.size, mem.deallocate); + } + if (!in_qr.external()) { in_qr.reset(); @@ -1029,7 +1067,7 @@ send_different_rank(int from, int to, int proc, QueueRecord& qr, bool remote, IE // sending to a different rank std::shared_ptr buffer = std::make_shared(qr.move()); - MessageInfo info{from, to, 1, exchange_round_}; + MessageInfo info{from, to, 1, exchange_round_, static_cast(buffer->nblobs())}; // size fits in one message if (Serialization::size(*buffer) + Serialization::size(info) <= MAX_MPI_MESSAGE_COUNT) { @@ -1103,11 +1141,33 @@ send_different_rank(int from, int to, int proc, QueueRecord& qr, bool remote, IE inflight_send.message = buffer; } } // large message broken into pieces + + // send binary blobs + for (size_t i = 0; i < buffer->nblobs(); ++i) + { + auto blob = buffer->load_binary_blob(); + assert(blob.size < MAX_MPI_MESSAGE_COUNT); // for now assume blobs are small enough that we don't need to break them into multiple parts + + inflight_sends().emplace_back(); + auto& inflight_send = inflight_sends().back(); + + inflight_send.info = info; + + detail::VectorWindow window; + window.begin = const_cast(blob.pointer.get()); + window.count = blob.size; + + if (remote || iex) + inflight_send.request = comm_.issend(proc, tags::queue, window); + else + inflight_send.request = comm_.isend(proc, tags::queue, window); + inflight_send.blob = std::move(blob); + } } void diy::Master:: -check_incoming_queues(IExchangeInfo* iex) +check_incoming_queues(MemoryManagement mem, IExchangeInfo* iex) { auto scoped = prof.scoped("check-incoming-queues"); VTKMDIY_UNUSED(scoped); @@ -1116,6 +1176,7 @@ check_incoming_queues(IExchangeInfo* iex) while (ostatus) { InFlightRecv& ir = inflight_recv(ostatus->source()); + ir.mem = mem; if (iex) iex->inc_work(); // increment work before sender's issend request can complete (so we are now responsible for the queue) @@ -1141,7 +1202,7 @@ check_incoming_queues(IExchangeInfo* iex) void diy::Master:: -flush(bool remote) +flush(bool remote, MemoryManagement mem) { #ifdef VTKMDIY_DEBUG time_type start = get_time(); @@ -1155,13 +1216,13 @@ flush(bool remote) if (remote) - rcomm_exchange(); + rcomm_exchange(mem); else { auto gid_order = order_gids(); do { - comm_exchange(gid_order); + comm_exchange(gid_order, mem); #ifdef VTKMDIY_DEBUG time_type cur = get_time(); diff --git a/include/vtkmdiy/mpi/config.hpp b/include/vtkmdiy/mpi/config.hpp index b8a6643af..13fa54c81 100644 --- a/include/vtkmdiy/mpi/config.hpp +++ b/include/vtkmdiy/mpi/config.hpp @@ -1,6 +1,8 @@ #ifndef VTKMDIY_MPI_CONFIG_HPP #define VTKMDIY_MPI_CONFIG_HPP +#include + /// We want to allow the use of `diy::mpi` in either header-only or library mode. /// VTKMDIY_MPI_AS_LIB is defined when using library mode. /// This file contains some configuration macros. To maintain backwards compatibility @@ -49,13 +51,26 @@ struct DIY_##mpitype { \ mpitype data; \ }; +#define DEFINE_DIY_MPI_TYPE_MOVE(mpitype) \ +struct DIY_##mpitype { \ + DIY_##mpitype() = default; \ + DIY_##mpitype(const mpitype&) = delete; \ + DIY_##mpitype(mpitype&& obj) : data(std::move(obj)) {} \ + DIY_##mpitype& operator=(const mpitype&) = delete; \ + DIY_##mpitype& operator=(mpitype&& obj) { data = std::move(obj); return *this; } \ + operator const mpitype&() const { return data; } \ + void reset() { data = mpitype(); } \ +private: \ + mpitype data; \ +}; + DEFINE_DIY_MPI_TYPE(MPI_Comm) DEFINE_DIY_MPI_TYPE(MPI_Datatype) DEFINE_DIY_MPI_TYPE(MPI_Status) DEFINE_DIY_MPI_TYPE(MPI_Request) DEFINE_DIY_MPI_TYPE(MPI_Op) DEFINE_DIY_MPI_TYPE(MPI_File) -DEFINE_DIY_MPI_TYPE(MPI_Win) +DEFINE_DIY_MPI_TYPE_MOVE(MPI_Win) #undef DEFINE_DIY_MPI_TYPE diff --git a/include/vtkmdiy/mpi/mpi_cast.hpp b/include/vtkmdiy/mpi/mpi_cast.hpp index 960742ae1..6802a2d23 100644 --- a/include/vtkmdiy/mpi/mpi_cast.hpp +++ b/include/vtkmdiy/mpi/mpi_cast.hpp @@ -18,13 +18,18 @@ inline mpitype& mpi_cast(DIY_##mpitype& obj) { return *reinterpret_cast(&obj); } \ inline DIY_##mpitype make_DIY_##mpitype(const mpitype& obj) { DIY_##mpitype ret; mpi_cast(ret) = obj; return ret; } +#define DEFINE_MPI_CAST_MOVE(mpitype) \ +inline mpitype& mpi_cast(DIY_##mpitype& obj) { return *reinterpret_cast(&obj); } \ +inline const mpitype& mpi_cast(const DIY_##mpitype& obj) { return *reinterpret_cast(&obj); } \ +inline DIY_##mpitype make_DIY_##mpitype(mpitype&& obj) { DIY_##mpitype ret = std::move(obj); return ret; } + DEFINE_MPI_CAST(MPI_Comm) DEFINE_MPI_CAST(MPI_Datatype) DEFINE_MPI_CAST(MPI_Status) DEFINE_MPI_CAST(MPI_Request) DEFINE_MPI_CAST(MPI_Op) DEFINE_MPI_CAST(MPI_File) -DEFINE_MPI_CAST(MPI_Win) +DEFINE_MPI_CAST_MOVE(MPI_Win) #undef DEFINE_MPI_CAST diff --git a/include/vtkmdiy/mpi/mpitypes.hpp.in b/include/vtkmdiy/mpi/mpitypes.hpp.in index bea375fd8..aa467b448 100644 --- a/include/vtkmdiy/mpi/mpitypes.hpp.in +++ b/include/vtkmdiy/mpi/mpitypes.hpp.in @@ -1,6 +1,8 @@ #ifndef VTKMDIY_MPI_MPITYPES_H #define VTKMDIY_MPI_MPITYPES_H +#include + #cmakedefine TYPESIZE_MPI_Comm @TYPESIZE_MPI_Comm@ #cmakedefine TYPESIZE_MPI_Datatype @TYPESIZE_MPI_Datatype@ #cmakedefine TYPESIZE_MPI_Status @TYPESIZE_MPI_Status@ @@ -18,6 +20,7 @@ namespace mpi # define ASSERT_MPI_TYPE_SIZE(mpitype) static_assert(sizeof(mpitype) <= sizeof(DIY_##mpitype), ""); #else # define ASSERT_MPI_TYPE_SIZE(mpitype) +struct MPI_Win; #endif #define DEFINE_DIY_MPI_TYPE(mpitype) \ @@ -26,15 +29,41 @@ struct DIY_##mpitype { \ }; \ ASSERT_MPI_TYPE_SIZE(mpitype) +#define DEFINE_DIY_MPI_TYPE_MOVE(mpitype) \ + struct DIY_##mpitype \ + { \ + DIY_##mpitype() = default; \ + DIY_##mpitype(const mpitype&) = delete; \ + DIY_##mpitype& operator=(const mpitype&) = delete; \ + DIY_##mpitype(mpitype&& obj) \ + { \ + std::memcpy(data, &obj, TYPESIZE_##mpitype); \ + std::memset(&obj, 0, TYPESIZE_##mpitype); \ + } \ + DIY_##mpitype& operator=(mpitype&& obj) \ + { \ + std::memcpy(data, &obj, TYPESIZE_##mpitype); \ + std::memset(&obj, 0, TYPESIZE_##mpitype); \ + return *this; \ + } \ + operator const mpitype&() const { return *reinterpret_cast(data); } \ + void reset() { std::memset(data, 0, TYPESIZE_##mpitype); } \ + \ + private: \ + char* data[TYPESIZE_##mpitype]; \ + }; \ + ASSERT_MPI_TYPE_SIZE(mpitype); + DEFINE_DIY_MPI_TYPE(MPI_Comm) DEFINE_DIY_MPI_TYPE(MPI_Datatype) DEFINE_DIY_MPI_TYPE(MPI_Status) DEFINE_DIY_MPI_TYPE(MPI_Request) DEFINE_DIY_MPI_TYPE(MPI_Op) DEFINE_DIY_MPI_TYPE(MPI_File) -DEFINE_DIY_MPI_TYPE(MPI_Win) +DEFINE_DIY_MPI_TYPE_MOVE(MPI_Win) #undef DEFINE_DIY_MPI_TYPE +#undef DEFINE_DIY_MPI_TYPE_MOVE #undef ASSERT_MPI_TYPE_SIZE } diff --git a/include/vtkmdiy/mpi/no-mpi.hpp b/include/vtkmdiy/mpi/no-mpi.hpp index 8e7af241b..6809281ea 100644 --- a/include/vtkmdiy/mpi/no-mpi.hpp +++ b/include/vtkmdiy/mpi/no-mpi.hpp @@ -1,6 +1,7 @@ #ifndef VTKMDIY_MPI_NO_MPI_HPP #define VTKMDIY_MPI_NO_MPI_HPP +#include // std::assert #include // std::runtime_error @@ -75,7 +76,39 @@ static const int MPI_MODE_APPEND = 128; static const int MPI_MODE_SEQUENTIAL = 256; /* define window type */ -using MPI_Win = void*; +struct MPI_Win { + MPI_Win(): data_(0) {} + MPI_Win(void* data, bool owned = false): data_(uintptr_t(data) | (owned ? 0x1 : 0x0)) + { + // We assume that pointers have at least some higher-byte alignment. + assert(!(uintptr_t(data) & 0x1)); + } + void* data() const { return (void*)(data_ & ~0x1); } + bool owned() const { return data_ & 0x1; } + + // We cannot copy owned windows. + MPI_Win(MPI_Win const&) = delete; + MPI_Win& operator=(MPI_Win const&) = delete; + + // We cannot move owned windows (we don't know how to delete them in general). + MPI_Win(MPI_Win&& rhs): data_(rhs.data_) + { + rhs.data_ = 0; + } + MPI_Win& operator=(MPI_Win&& rhs) + { + if (this == &rhs) + return *this; + + data_ = rhs.data_; + rhs.data_ = 0; + + return *this; + } +private: + uintptr_t data_; +}; +#define MPI_WIN_NULL MPI_Win() /* window fence assertions */ static const int MPI_MODE_NOSTORE = 1; diff --git a/include/vtkmdiy/mpi/optional.hpp b/include/vtkmdiy/mpi/optional.hpp index aee4d269a..80eb63094 100644 --- a/include/vtkmdiy/mpi/optional.hpp +++ b/include/vtkmdiy/mpi/optional.hpp @@ -37,8 +37,8 @@ namespace mpi const void* address() const { return buf_; } private: + alignas(T) char buf_[sizeof(T)]; bool init_; - char buf_[sizeof(T)]; }; } } diff --git a/include/vtkmdiy/mpi/window.cpp b/include/vtkmdiy/mpi/window.cpp index 12b97bf7f..2805bd38d 100644 --- a/include/vtkmdiy/mpi/window.cpp +++ b/include/vtkmdiy/mpi/window.cpp @@ -22,6 +22,21 @@ EXPORT_MACRO const int nocheck = MPI_MODE_NOCHECK; namespace detail { +DIY_MPI_Win win_allocate(const communicator& comm, void** base, unsigned size, int disp) +{ +#if VTKMDIY_HAS_MPI + DIY_MPI_Win win; + MPI_Win_allocate(size, disp, MPI_INFO_NULL, mpi_cast(comm.handle()), base, &mpi_cast(win)); + return win; +#else + (void)comm; (void)disp; + *base = malloc(size); + auto mpi_win = MPI_Win(*base, true); + auto win = make_DIY_MPI_Win(std::move(mpi_win)); + return win; +#endif +} + DIY_MPI_Win win_create(const communicator& comm, void* base, unsigned size, int disp) { #if VTKMDIY_HAS_MPI @@ -30,7 +45,8 @@ DIY_MPI_Win win_create(const communicator& comm, void* base, unsigned size, int return win; #else (void)comm; (void)size; (void)disp; - auto win = make_DIY_MPI_Win(base); + auto mpi_win = MPI_Win(base); + auto win = make_DIY_MPI_Win(std::move(mpi_win)); return win; #endif } @@ -40,7 +56,9 @@ void win_free(DIY_MPI_Win& win) #if VTKMDIY_HAS_MPI MPI_Win_free(&mpi_cast(win)); #else - (void)win; + auto& mpi_win = mpi_cast(win); + if (mpi_win.owned()) + free(mpi_win.data()); #endif } @@ -49,7 +67,7 @@ void put(const DIY_MPI_Win& win, const void* data, int count, const datatype& ty #if VTKMDIY_HAS_MPI MPI_Put(data, count, mpi_cast(type.handle), rank, offset, count, mpi_cast(type.handle), mpi_cast(win)); #else - void* buffer = mpi_cast(win); + void* buffer = mpi_cast(win).data(); size_t size = mpi_cast(type.handle); std::copy_n(static_cast(data), size * static_cast(count), @@ -63,7 +81,7 @@ void get(const DIY_MPI_Win& win, void* data, int count, const datatype& type, in #if VTKMDIY_HAS_MPI MPI_Get(data, count, mpi_cast(type.handle), rank, offset, count, mpi_cast(type.handle), mpi_cast(win)); #else - const void* buffer = mpi_cast(win); + const void* buffer = mpi_cast(win).data(); size_t size = mpi_cast(type.handle); std::copy_n(static_cast(buffer) + (offset * size), size * static_cast(count), @@ -136,7 +154,7 @@ void fetch(const DIY_MPI_Win& win, void* result, const datatype& type, int rank, MPI_Fetch_and_op(nullptr, result, mpi_cast(type.handle), rank, offset, MPI_NO_OP, mpi_cast(win)); #else (void) rank; - const void* buffer = mpi_cast(win); + const void* buffer = mpi_cast(win).data(); size_t size = mpi_cast(type.handle); std::copy_n(static_cast(buffer) + (offset * size), size, @@ -150,7 +168,7 @@ void replace(const DIY_MPI_Win& win, const void* value, const datatype& type, in MPI_Fetch_and_op(value, nullptr, mpi_cast(type.handle), rank, offset, MPI_REPLACE, mpi_cast(win)); #else (void) rank; - void* buffer = mpi_cast(win); + void* buffer = mpi_cast(win).data(); size_t size = mpi_cast(type.handle); std::copy_n(static_cast(value), size, diff --git a/include/vtkmdiy/mpi/window.hpp b/include/vtkmdiy/mpi/window.hpp index 730d7c439..041fe2d44 100644 --- a/include/vtkmdiy/mpi/window.hpp +++ b/include/vtkmdiy/mpi/window.hpp @@ -22,6 +22,9 @@ VTKMDIY_MPI_EXPORT extern const int nocheck; namespace detail { +VTKMDIY_MPI_EXPORT_FUNCTION +DIY_MPI_Win win_allocate(const communicator& comm, void** base, unsigned size, int disp); + VTKMDIY_MPI_EXPORT_FUNCTION DIY_MPI_Win win_create(const communicator& comm, void* base, unsigned size, int disp); @@ -96,8 +99,8 @@ void flush_local_all(const DIY_MPI_Win& win); inline ~window(); // moving is Ok - window(window&&) = default; - window& operator=(window&&) = default; + inline window(window&&); + inline window& operator=(window&&); // cannot copy because of the buffer_ window(const window&) = delete; @@ -129,7 +132,7 @@ void flush_local_all(const DIY_MPI_Win& win); inline void flush_local_all(); private: - std::vector buffer_; + void* buffer_; int rank_; DIY_MPI_Win window_; }; @@ -140,16 +143,46 @@ void flush_local_all(const DIY_MPI_Win& win); template diy::mpi::window:: window(const diy::mpi::communicator& comm, unsigned size): - buffer_(size), rank_(comm.rank()) + buffer_(nullptr), rank_(comm.rank()) { - window_ = detail::win_create(comm, buffer_.data(), static_cast(buffer_.size()*sizeof(T)), static_cast(sizeof(T))); + window_ = detail::win_allocate(comm, &buffer_, static_cast(size*sizeof(T)), static_cast(sizeof(T))); } template diy::mpi::window:: ~window() { - detail::win_free(window_); + if (buffer_) + detail::win_free(window_); +} + +template +diy::mpi::window:: +window(window&& rhs): + buffer_(rhs.buffer_), rank_(rhs.rank_), window_(std::move(rhs.window_)) +{ + rhs.buffer_ = nullptr; + rhs.window_.reset(); +} + +template +diy::mpi::window& +diy::mpi::window:: +operator=(window&& rhs) +{ + if (this == &rhs) + return *this; + + if (buffer_) + detail::win_free(window_); + + buffer_ = rhs.buffer_; + rhs.buffer_ = nullptr; + rank_ = rhs.rank_; + window_ = std::move(rhs.window_); + rhs.window_.reset(); + + return *this; } template diff --git a/include/vtkmdiy/proxy.hpp b/include/vtkmdiy/proxy.hpp index 4de89d1cf..f843352d6 100644 --- a/include/vtkmdiy/proxy.hpp +++ b/include/vtkmdiy/proxy.hpp @@ -105,6 +105,12 @@ namespace diy void (*save)(BinaryBuffer&, const T&) = &::diy::save //!< optional serialization function ) const; + void inline enqueue_blob + (const BlockID& to, //!< target block (gid,proc) + const char* x, //!< pointer to the data + size_t n //!< size in data elements (eg. ints) + ) const; + //! Dequeue data whose size can be determined automatically (e.g., STL vector) and that was //! previously enqueued so that diy knows its size when it is received. //! In this case, diy will allocate the receive buffer; the user does not need to do so. @@ -142,6 +148,9 @@ namespace diy void (*load)(BinaryBuffer&, T&) = &::diy::load //!< optional serialization function ) const { dequeue(from.gid, x, n, load); } + BinaryBlob inline dequeue_blob + (int from) const; + template EnqueueIterator enqueuer(const T& x, void (*save)(BinaryBuffer&, const T&) = &::diy::save ) const @@ -347,5 +356,20 @@ dequeue(int from, T* x, size_t n, load(bb, x[i]); } +void +diy::Master::Proxy:: +enqueue_blob(const BlockID& to, const char* x, size_t n) const +{ + BinaryBuffer& bb = outgoing_[to]; + bb.save_binary_blob(x,n); +} + +diy::BinaryBlob +diy::Master::Proxy:: +dequeue_blob(int from) const +{ + BinaryBuffer& bb = incoming_[from]; + return bb.load_binary_blob(); +} #endif diff --git a/include/vtkmdiy/reduce.hpp b/include/vtkmdiy/reduce.hpp index 8f106ed49..b261ad5f8 100644 --- a/include/vtkmdiy/reduce.hpp +++ b/include/vtkmdiy/reduce.hpp @@ -138,7 +138,7 @@ void reduce(Master& master, //!< master object } } master.set_expected(expected); - master.flush(); + master.flush(false); } // final round log->debug("Round {}", round); diff --git a/include/vtkmdiy/serialization.hpp b/include/vtkmdiy/serialization.hpp index 992517608..0492fc32a 100644 --- a/include/vtkmdiy/serialization.hpp +++ b/include/vtkmdiy/serialization.hpp @@ -1,22 +1,30 @@ #ifndef VTKMDIY_SERIALIZATION_HPP #define VTKMDIY_SERIALIZATION_HPP -#include -#include +#include +#include +#include #include +#include #include #include -#include - #include +#include // this is used for a safety check for default serialization #include #include -#include // this is used for a safety check for default serialization - -#include +#include +#include namespace diy { + struct BinaryBlob + { + using Deleter = std::function; + using Pointer = std::unique_ptr; + Pointer pointer; + size_t size; + }; + //! A serialization buffer. \ingroup Serialization struct BinaryBuffer { @@ -25,10 +33,18 @@ namespace diy virtual inline void append_binary(const char* x, size_t count) =0; //!< append `count` bytes from `x` to end of buffer virtual void load_binary(char* x, size_t count) =0; //!< copy `count` bytes into `x` from the buffer virtual void load_binary_back(char* x, size_t count) =0; //!< copy `count` bytes into `x` from the back of the buffer + virtual char* grow(size_t count) =0; //!< allocate enough space for `count` bytes and return the pointer to the beginning + virtual char* advance(size_t count) =0; //!< advance buffer position by `count` bytes and return the pointer to the beginning + + virtual void save_binary_blob(const char*, size_t) =0; + virtual void save_binary_blob(const char*, size_t, BinaryBlob::Deleter) = 0; + virtual BinaryBlob load_binary_blob() =0; }; struct MemoryBuffer: public BinaryBuffer { + using Blob = BinaryBlob; + MemoryBuffer(size_t position_ = 0): position(position_) {} @@ -41,6 +57,13 @@ namespace diy virtual inline void append_binary(const char* x, size_t count) override; //!< append `count` bytes from `x` to end of buffer virtual inline void load_binary(char* x, size_t count) override; //!< copy `count` bytes into `x` from the buffer virtual inline void load_binary_back(char* x, size_t count) override; //!< copy `count` bytes into `x` from the back of the buffer + virtual inline char* grow(size_t count) override; //!< allocate enough space for `count` bytes and return the pointer to the beginning + virtual inline char* advance(size_t count) override; //!< advance buffer position by `count` bytes and return the pointer to the beginning + + virtual inline void save_binary_blob(const char* x, size_t count) override; + virtual inline void save_binary_blob(const char* x, size_t count, Blob::Deleter deleter) override; + virtual inline Blob load_binary_blob() override; + size_t nblobs() const { return blobs.size(); } void clear() { buffer.clear(); reset(); } void wipe() { std::vector().swap(buffer); reset(); } @@ -71,6 +94,9 @@ namespace diy size_t position; std::vector buffer; + + size_t blob_position = 0; + std::vector blobs; }; namespace detail @@ -140,7 +166,7 @@ namespace diy template void load_back(BinaryBuffer& bb, T& x) { bb.load_binary_back((char*) &x, sizeof(T)); } - //@} + //!@} namespace detail @@ -444,17 +470,7 @@ void diy::MemoryBuffer:: save_binary(const char* x, size_t count) { - if (position + count > buffer.capacity()) - { - double newsize = static_cast(position + count) * growth_multiplier(); // if we have to grow, grow geometrically - buffer.reserve(static_cast(newsize)); - } - - if (position + count > buffer.size()) - buffer.resize(position + count); - - std::copy_n(x, count, &buffer[position]); - position += count; + std::copy_n(x, count, grow(count)); } void @@ -509,6 +525,58 @@ load_binary_back(char* x, size_t count) buffer.resize(buffer.size() - count); } +char* +diy::MemoryBuffer:: +grow(size_t count) +{ + if (position + count > buffer.capacity()) + { + double newsize = static_cast(position + count) * growth_multiplier(); // if we have to grow, grow geometrically + buffer.reserve(static_cast(newsize)); + } + + if (position + count > buffer.size()) + buffer.resize(position + count); + + char* destination = &buffer[position]; + + position += count; + + return destination; +} + +char* +diy::MemoryBuffer:: +advance(size_t count) +{ + char* origin = &buffer[position]; + position += count; + return origin; +} + + +void +diy::MemoryBuffer:: +save_binary_blob(const char* x, size_t count) +{ + // empty deleter means we don't take ownership + save_binary_blob(x, count, [](const char[]) {}); +} + +void +diy::MemoryBuffer:: +save_binary_blob(const char* x, size_t count, Blob::Deleter deleter) +{ + blobs.emplace_back(Blob { Blob::Pointer {x, deleter}, count }); +} + +diy::MemoryBuffer::Blob +diy::MemoryBuffer:: +load_binary_blob() +{ + return std::move(blobs[blob_position++]); +} + void diy::MemoryBuffer:: copy(MemoryBuffer& from, MemoryBuffer& to) diff --git a/include/vtkmdiy/storage.hpp b/include/vtkmdiy/storage.hpp index cb541f546..f0e5bc984 100644 --- a/include/vtkmdiy/storage.hpp +++ b/include/vtkmdiy/storage.hpp @@ -15,8 +15,8 @@ namespace diy { namespace detail { - typedef void (*Save)(const void*, BinaryBuffer& buf); - typedef void (*Load)(void*, BinaryBuffer& buf); + using Save = std::function; + using Load = std::function; struct FileBuffer: public BinaryBuffer { @@ -34,6 +34,16 @@ namespace diy } virtual inline void load_binary(char* x, size_t count) override { auto n = fread(x, 1, count, file); VTKMDIY_UNUSED(n);} virtual inline void load_binary_back(char* x, size_t count) override { fseek(file, static_cast(tail), SEEK_END); auto n = fread(x, 1, count, file); tail += count; fseek(file, static_cast(head), SEEK_SET); VTKMDIY_UNUSED(n);} + virtual inline char* grow(size_t) override { throw std::runtime_error("Cannot grow a FileBuffer"); } + virtual inline char* advance(size_t) override { throw std::runtime_error("Cannot advance a FileBuffer"); } + + // TODO: for now, we just throw, but obviously it should be possile to store binary blobs in a file; might want to fall back + using Blob = BinaryBlob; + virtual inline void save_binary_blob(const char*, size_t) override { throw std::runtime_error("Cannot save binary blobs in a FileBuffer"); } + + virtual inline void save_binary_blob(const char*, size_t, Blob::Deleter) override { throw std::runtime_error("Cannot save binary blobs in a FileBuffer"); } + + virtual inline Blob load_binary_blob() override { throw std::runtime_error("Cannot load binary blobs from a FileBuffer"); } size_t size() const { return head; } diff --git a/include/vtkmdiy/thirdparty/chobo/small_vector.hpp b/include/vtkmdiy/thirdparty/itlib/small_vector.hpp similarity index 50% rename from include/vtkmdiy/thirdparty/chobo/small_vector.hpp rename to include/vtkmdiy/thirdparty/itlib/small_vector.hpp index 784534bbe..37386cbc6 100644 --- a/include/vtkmdiy/thirdparty/chobo/small_vector.hpp +++ b/include/vtkmdiy/thirdparty/itlib/small_vector.hpp @@ -1,9 +1,11 @@ -// chobo-small-vector v1.02 +// itlib-small-vector v1.04 // // std::vector-like class with a static buffer for initial capacity // +// SPDX-License-Identifier: MIT // MIT License: // Copyright(c) 2016-2018 Chobolabs Inc. +// Copyright(c) 2020-2022 Borislav Stanimirov // // Permission is hereby granted, free of charge, to any person obtaining // a copy of this software and associated documentation files(the @@ -27,19 +29,20 @@ // // VERSION HISTORY // -// 1.02 (2018-04-24) Class inehrits from its allocator to make use of the -// empty base class optimization. -// emplace_back returns a reference to the inserted element -// as per the c++17 standard. -// 1.01 (2017-04-02) Fixed compilation error on (count, value) constructor and -// assign, and insert methods when count or value is 0 -// 1.00 (2016-11-08) First public release +// 1.04 (2022-04-14) Noxcept move construct and assign +// 1.03 (2021-10-05) Use allocator member instead of inheriting from allocator +// Allow compare with small_vector of different static_size +// Don't rely on operator!= from T. Use operator== instead +// 1.02 (2021-09-15) Bugfix! Fixed bad deallocation when reverting to +// static size on resize() +// 1.01 (2021-08-05) Bugfix! Fixed return value of erase +// 1.00 (2020-10-14) Rebranded release from chobo-small-vector // // // DOCUMENTATION // // Simply include this file wherever you need. -// It defines the class chobo::small_vector, which is a drop-in replacement of +// It defines the class itlib::small_vector, which is a drop-in replacement of // std::vector, but with an initial capacity as a template argument. // It gives you the benefits of using std::vector, at the cost of having a statically // allocated buffer for the initial capacity, which gives you cache-local data @@ -58,7 +61,7 @@ // // Example: // -// chobo::small_vector myvec; // a small_vector of size 0, initial capacity 4, and revert size 4 (smaller than 5) +// itlib::small_vector myvec; // a small_vector of size 0, initial capacity 4, and revert size 4 (smaller than 5) // myvec.resize(2); // vector is {0,0} in static buffer // myvec[1] = 11; // vector is {0,11} in static buffer // myvec.push_back(7); // vector is {0,11,7} in static buffer @@ -70,7 +73,7 @@ // // Reference: // -// chobo::small_vector is fully compatible with std::vector with +// itlib::small_vector is fully compatible with std::vector with // the following exceptions: // * when reducing the size with erase or resize the new size may fall below // RevertToStaticSize (if it is not 0). In such a case the vector will @@ -104,18 +107,18 @@ // called with an iterator that doesn't belong to the vector's current range. // For example: vec.erase(vec.end() + 1); // -// This is set by defining CHOBO_SMALL_VECTOR_ERROR_HANDLING to one of the +// This is set by defining ITLIB_SMALL_VECTOR_ERROR_HANDLING to one of the // following values: -// * CHOBO_SMALL_VECTOR_ERROR_HANDLING_NONE - no error handling. Crashes WILL +// * ITLIB_SMALL_VECTOR_ERROR_HANDLING_NONE - no error handling. Crashes WILL // ensue if the error is triggered. -// * CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW - std::out_of_range is thrown. -// * CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT - asserions are triggered. -// * CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW - combines assert and +// * ITLIB_SMALL_VECTOR_ERROR_HANDLING_THROW - std::out_of_range is thrown. +// * ITLIB_SMALL_VECTOR_ERROR_HANDLING_ASSERT - asserions are triggered. +// * ITLIB_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW - combines assert and // throw to catch errors more easily in debug mode // // To set this setting by editing the file change the line: // ``` -// # define CHOBO_SMALL_VECTOR_ERROR_HANDLING CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW +// # define ITLIB_SMALL_VECTOR_ERROR_HANDLING ITLIB_SMALL_VECTOR_ERROR_HANDLING_THROW // ``` // to the default setting of your choice // @@ -124,15 +127,14 @@ // By default bounds checks are made in debug mode (via an asser) when accessing // elements (with `at` or `[]`). Iterators are not checked (yet...) // -// To disable them, you can define CHOBO_SMALL_VECTOR_NO_DEBUG_BOUNDS_CHECK +// To disable them, you can define ITLIB_SMALL_VECTOR_NO_DEBUG_BOUNDS_CHECK // before including the header. // // // TESTS // -// The tests are included in the header file and use doctest (https://github.com/onqtam/doctest). -// To run them, define CHOBO_SMALL_VECTOR_TEST_WITH_DOCTEST before including -// the header in a file which has doctest.h already included. +// You can find unit tests for small_vector in its official repo: +// https://github.com/iboB/itlib/blob/master/test/ // #pragma once @@ -140,58 +142,59 @@ #include #include -#define CHOBO_SMALL_VECTOR_ERROR_HANDLING_NONE 0 -#define CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW 1 -#define CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT 2 -#define CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW 3 +#define ITLIB_SMALL_VECTOR_ERROR_HANDLING_NONE 0 +#define ITLIB_SMALL_VECTOR_ERROR_HANDLING_THROW 1 +#define ITLIB_SMALL_VECTOR_ERROR_HANDLING_ASSERT 2 +#define ITLIB_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW 3 -#if !defined(CHOBO_SMALL_VECTOR_ERROR_HANDLING) -# define CHOBO_SMALL_VECTOR_ERROR_HANDLING CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW +#if !defined(ITLIB_SMALL_VECTOR_ERROR_HANDLING) +# define ITLIB_SMALL_VECTOR_ERROR_HANDLING ITLIB_SMALL_VECTOR_ERROR_HANDLING_THROW #endif -#if CHOBO_SMALL_VECTOR_ERROR_HANDLING == CHOBO_SMALL_VECTOR_ERROR_HANDLING_NONE -# define _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(cond) -#elif CHOBO_SMALL_VECTOR_ERROR_HANDLING == CHOBO_SMALL_VECTOR_ERROR_HANDLING_THROW +#if ITLIB_SMALL_VECTOR_ERROR_HANDLING == ITLIB_SMALL_VECTOR_ERROR_HANDLING_NONE +# define I_ITLIB_SMALL_VECTOR_OUT_OF_RANGE_IF(cond) +#elif ITLIB_SMALL_VECTOR_ERROR_HANDLING == ITLIB_SMALL_VECTOR_ERROR_HANDLING_THROW # include -# define _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(cond) if (cond) throw std::out_of_range("chobo::small_vector out of range") -#elif CHOBO_SMALL_VECTOR_ERROR_HANDLING == CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT +# define I_ITLIB_SMALL_VECTOR_OUT_OF_RANGE_IF(cond) if (cond) throw std::out_of_range("itlib::small_vector out of range") +#elif ITLIB_SMALL_VECTOR_ERROR_HANDLING == ITLIB_SMALL_VECTOR_ERROR_HANDLING_ASSERT # include -# define _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(cond, rescue_return) assert(!(cond) && "chobo::small_vector out of range") -#elif CHOBO_SMALL_VECTOR_ERROR_HANDLING == CHOBO_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW +# define I_ITLIB_SMALL_VECTOR_OUT_OF_RANGE_IF(cond, rescue_return) assert(!(cond) && "itlib::small_vector out of range") +#elif ITLIB_SMALL_VECTOR_ERROR_HANDLING == ITLIB_SMALL_VECTOR_ERROR_HANDLING_ASSERT_AND_THROW # include # include -# define _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(cond, rescue_return) \ - do { if (cond) { assert(false && "chobo::small_vector out of range"); throw std::out_of_range("chobo::small_vector out of range"); } } while(false) +# define I_ITLIB_SMALL_VECTOR_OUT_OF_RANGE_IF(cond, rescue_return) \ + do { if (cond) { assert(false && "itlib::small_vector out of range"); throw std::out_of_range("itlib::small_vector out of range"); } } while(false) #else -#error "Unknown CHOBO_SMALL_VECTOR_ERRROR_HANDLING" +#error "Unknown ITLIB_SMALL_VECTOR_ERRROR_HANDLING" #endif -#if defined(CHOBO_SMALL_VECTOR_NO_DEBUG_BOUNDS_CHECK) -# define _CHOBO_SMALL_VECTOR_BOUNDS_CHECK(i) +#if defined(ITLIB_SMALL_VECTOR_NO_DEBUG_BOUNDS_CHECK) +# define I_ITLIB_SMALL_VECTOR_BOUNDS_CHECK(i) #else # include -# define _CHOBO_SMALL_VECTOR_BOUNDS_CHECK(i) assert((i) < this->size()) +# define I_ITLIB_SMALL_VECTOR_BOUNDS_CHECK(i) assert((i) < this->size()) #endif -namespace chobo +namespace itlib { template> -struct small_vector: Alloc +struct small_vector { - static_assert(RevertToStaticSize <= StaticCapacity + 1, "chobo::small_vector: the revert-to-static size shouldn't exceed the static capacity by more than one"); + static_assert(RevertToStaticSize <= StaticCapacity + 1, "itlib::small_vector: the revert-to-static size shouldn't exceed the static capacity by more than one"); + using atraits = std::allocator_traits; public: using allocator_type = Alloc; - using value_type = typename Alloc::value_type; - using size_type = typename Alloc::size_type; - using difference_type = typename Alloc::difference_type; - using reference = typename Alloc::reference; - using const_reference = typename Alloc::const_reference; - using pointer = typename Alloc::pointer; - using const_pointer = typename Alloc::const_pointer; + using value_type = typename atraits::value_type; + using size_type = typename atraits::size_type; + using difference_type = typename atraits::difference_type; + using reference = T&; + using const_reference = const T&; + using pointer = typename atraits::pointer; + using const_pointer = typename atraits::const_pointer; using iterator = pointer; using const_iterator = const_pointer; using reverse_iterator = std::reverse_iterator; @@ -205,7 +208,7 @@ public: {} small_vector(const Alloc& alloc) - : Alloc(alloc) + : m_alloc(alloc) , m_capacity(StaticCapacity) , m_dynamic_capacity(0) , m_dynamic_data(nullptr) @@ -239,18 +242,18 @@ public: } small_vector(const small_vector& v) - : small_vector(v, std::allocator_traits::select_on_container_copy_construction(v.get_allocator())) + : small_vector(v, atraits::select_on_container_copy_construction(v.get_allocator())) {} small_vector(const small_vector& v, const Alloc& alloc) - : Alloc(alloc) + : m_alloc(alloc) , m_dynamic_capacity(0) , m_dynamic_data(nullptr) { if (v.size() > StaticCapacity) { m_dynamic_capacity = v.size(); - m_begin = m_end = m_dynamic_data = get_alloc().allocate(m_dynamic_capacity); + m_begin = m_end = m_dynamic_data = atraits::allocate(get_alloc(), m_dynamic_capacity); m_capacity = v.size(); } else @@ -261,13 +264,13 @@ public: for (auto p = v.m_begin; p != v.m_end; ++p) { - get_alloc().construct(m_end, *p); + atraits::construct(get_alloc(), m_end, *p); ++m_end; } } - small_vector(small_vector&& v) - : Alloc(std::move(v.get_alloc())) + small_vector(small_vector&& v) noexcept + : m_alloc(std::move(v.get_alloc())) , m_capacity(v.m_capacity) , m_dynamic_capacity(v.m_dynamic_capacity) , m_dynamic_data(v.m_dynamic_data) @@ -277,7 +280,7 @@ public: m_begin = m_end = static_begin_ptr(); for (auto p = v.m_begin; p != v.m_end; ++p) { - get_alloc().construct(m_end, std::move(*p)); + atraits::construct(get_alloc(), m_end, std::move(*p)); ++m_end; } @@ -301,7 +304,7 @@ public: if (m_dynamic_data) { - get_alloc().deallocate(m_dynamic_data, m_dynamic_capacity); + atraits::deallocate(get_alloc(), m_dynamic_data, m_dynamic_capacity); } } @@ -319,7 +322,7 @@ public: for (auto p = v.m_begin; p != v.m_end; ++p) { - get_alloc().construct(m_end, *p); + atraits::construct(get_alloc(), m_end, *p); ++m_end; } @@ -328,7 +331,7 @@ public: return *this; } - small_vector& operator=(small_vector&& v) + small_vector& operator=(small_vector&& v) noexcept { clear(); @@ -342,7 +345,7 @@ public: m_begin = m_end = static_begin_ptr(); for (auto p = v.m_begin; p != v.m_end; ++p) { - get_alloc().construct(m_end, std::move(*p)); + atraits::construct(get_alloc(), m_end, std::move(*p)); ++m_end; } @@ -388,13 +391,13 @@ public: const_reference at(size_type i) const { - _CHOBO_SMALL_VECTOR_BOUNDS_CHECK(i); + I_ITLIB_SMALL_VECTOR_BOUNDS_CHECK(i); return *(m_begin + i); } reference at(size_type i) { - _CHOBO_SMALL_VECTOR_BOUNDS_CHECK(i); + I_ITLIB_SMALL_VECTOR_BOUNDS_CHECK(i); return *(m_begin + i); } @@ -512,7 +515,7 @@ public: size_t max_size() const noexcept { - return get_alloc().max_size(); + return atraits::max_size(); } void reserve(size_type new_cap) @@ -534,19 +537,19 @@ public: // now we need to transfer the existing elements into the new buffer for (size_type i = 0; i < s; ++i) { - get_alloc().construct(new_buf + i, std::move(*(m_begin + i))); + atraits::construct(get_alloc(), new_buf + i, std::move(*(m_begin + i))); } // free old elements for (size_type i = 0; i < s; ++i) { - get_alloc().destroy(m_begin + i); + atraits::destroy(get_alloc(), m_begin + i); } if (m_begin != static_begin_ptr()) { // we've moved from dyn to dyn memory, so deallocate the old one - get_alloc().deallocate(m_begin, m_capacity); + atraits::deallocate(get_alloc(), m_begin, m_capacity); } m_begin = new_buf; @@ -577,18 +580,18 @@ public: else { // alloc new smaller buffer - m_begin = m_end = get_alloc().allocate(s); + m_begin = m_end = atraits::allocate(get_alloc(), s); m_capacity = s; } for (auto p = m_dynamic_data; p != old_end; ++p) { - get_alloc().construct(m_end, std::move(*p)); + atraits::construct(get_alloc(), m_end, std::move(*p)); ++m_end; - get_alloc().destroy(p); + atraits::destroy(get_alloc(), p); } - get_alloc().deallocate(m_dynamic_data, m_dynamic_capacity); + atraits::deallocate(get_alloc(), m_dynamic_data, m_dynamic_capacity); m_dynamic_data = nullptr; m_dynamic_capacity = 0; } @@ -605,9 +608,9 @@ public: m_capacity = StaticCapacity; for (auto p = m_dynamic_data; p != old_end; ++p) { - get_alloc().construct(m_end, std::move(*p)); + atraits::construct(get_alloc(), m_end, std::move(*p)); ++m_end; - get_alloc().destroy(p); + atraits::destroy(get_alloc(), p); } } @@ -616,7 +619,7 @@ public: { for (auto p = m_begin; p != m_end; ++p) { - get_alloc().destroy(p); + atraits::destroy(get_alloc(), p); } if (RevertToStaticSize > 0) @@ -633,14 +636,14 @@ public: iterator insert(const_iterator position, const value_type& val) { auto pos = grow_at(position, 1); - get_alloc().construct(pos, val); + atraits::construct(get_alloc(), pos, val); return pos; } iterator insert(const_iterator position, value_type&& val) { auto pos = grow_at(position, 1); - get_alloc().construct(pos, std::move(val)); + atraits::construct(get_alloc(), pos, std::move(val)); return pos; } @@ -649,7 +652,7 @@ public: auto pos = grow_at(position, count); for (size_type i = 0; i < count; ++i) { - get_alloc().construct(pos + i, val); + atraits::construct(get_alloc(), pos + i, val); } return pos; } @@ -662,7 +665,7 @@ public: auto np = pos; for (auto p = first; p != last; ++p, ++np) { - get_alloc().construct(np, *p); + atraits::construct(get_alloc(), np, *p); } return pos; } @@ -673,7 +676,7 @@ public: size_type i = 0; for (auto& elem : ilist) { - get_alloc().construct(pos + i, elem); + atraits::construct(get_alloc(), pos + i, elem); ++i; } return pos; @@ -683,7 +686,7 @@ public: iterator emplace(const_iterator position, Args&&... args) { auto pos = grow_at(position, 1); - get_alloc().construct(pos, std::forward(args)...); + atraits::construct(get_alloc(), pos, std::forward(args)...); return pos; } @@ -694,27 +697,27 @@ public: iterator erase(const_iterator first, const_iterator last) { - _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(first > last); + I_ITLIB_SMALL_VECTOR_OUT_OF_RANGE_IF(first > last); return shrink_at(first, last - first); } void push_back(const_reference val) { auto pos = grow_at(m_end, 1); - get_alloc().construct(pos, val); + atraits::construct(get_alloc(), pos, val); } void push_back(T&& val) { auto pos = grow_at(m_end, 1); - get_alloc().construct(pos, std::move(val)); + atraits::construct(get_alloc(), pos, std::move(val)); } template reference emplace_back(Args&&... args) { auto pos = grow_at(m_end, 1); - get_alloc().construct(pos, std::forward(args)...); + atraits::construct(get_alloc(), pos, std::forward(args)...); return *pos; } @@ -735,12 +738,12 @@ public: while (m_end > new_end) { - get_alloc().destroy(--m_end); + atraits::destroy(get_alloc(), --m_end); } while (new_end > m_end) { - get_alloc().construct(m_end++, v); + atraits::construct(get_alloc(), m_end++, v); } } else @@ -752,25 +755,19 @@ public: for (size_type i = 0; i < num_transfer; ++i) { - get_alloc().construct(new_buf + i, std::move(*(m_begin + i))); + atraits::construct(get_alloc(), new_buf + i, std::move(*(m_begin + i))); } // free obsoletes for (size_type i = 0; i < s; ++i) { - get_alloc().destroy(m_begin + i); + atraits::destroy(get_alloc(), m_begin + i); } // construct new elements for (size_type i = num_transfer; i < n; ++i) { - get_alloc().construct(new_buf + i, v); - } - - if (m_begin != static_begin_ptr()) - { - // we've moved from dyn to dyn memory, so deallocate the old one - get_alloc().deallocate(m_begin, m_capacity); + atraits::construct(get_alloc(), new_buf + i, v); } if (new_buf == static_begin_ptr()) @@ -779,6 +776,11 @@ public: } else { + if (m_begin != static_begin_ptr()) + { + // we've moved from dyn to dyn memory, so deallocate the old one + atraits::deallocate(get_alloc(), m_begin, m_capacity); + } m_capacity = m_dynamic_capacity; } @@ -799,12 +801,12 @@ public: while (m_end > new_end) { - get_alloc().destroy(--m_end); + atraits::destroy(get_alloc(), --m_end); } while (new_end > m_end) { - get_alloc().construct(m_end++); + atraits::construct(get_alloc(), m_end++); } } else @@ -816,25 +818,19 @@ public: for (size_type i = 0; i < num_transfer; ++i) { - get_alloc().construct(new_buf + i, std::move(*(m_begin + i))); + atraits::construct(get_alloc(), new_buf + i, std::move(*(m_begin + i))); } // free obsoletes - for (size_type i = 0; i < n; ++i) + for (size_type i = 0; i < s; ++i) { - get_alloc().destroy(m_begin + i); + atraits::destroy(get_alloc(), m_begin + i); } // construct new elements - for (size_type i = num_transfer; i < s; ++i) + for (size_type i = num_transfer; i < n; ++i) { - get_alloc().construct(new_buf + i); - } - - if (m_begin != static_begin_ptr()) - { - // we've moved from dyn to dyn memory, so deallocate the old one - get_alloc().deallocate(m_begin, m_capacity); + atraits::construct(get_alloc(), new_buf + i); } if (new_buf == static_begin_ptr()) @@ -843,6 +839,11 @@ public: } else { + if (m_begin != static_begin_ptr()) + { + // we've moved from dyn to dyn memory, so deallocate the old one + atraits::deallocate(get_alloc(), m_begin, m_capacity); + } m_capacity = m_dynamic_capacity; } @@ -864,7 +865,7 @@ private: { auto position = const_cast(cp); - _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(position < m_begin || position > m_end); + I_ITLIB_SMALL_VECTOR_OUT_OF_RANGE_IF(position < m_begin || position > m_end); const auto s = size(); auto new_buf = choose_data(s + num); @@ -877,8 +878,8 @@ private: for (auto p = m_end - num - 1; p >= position; --p) { - get_alloc().construct(p + num, std::move(*p)); - get_alloc().destroy(p); + atraits::construct(get_alloc(), p + num, std::move(*p)); + atraits::destroy(get_alloc(), p); } return position; @@ -894,25 +895,25 @@ private: for (; np != position; ++p, ++np) { - get_alloc().construct(np, std::move(*p)); + atraits::construct(get_alloc(), np, std::move(*p)); } np += num; for (; p != m_end; ++p, ++np) { - get_alloc().construct(np, std::move(*p)); + atraits::construct(get_alloc(), np, std::move(*p)); } // destroy old for (p = m_begin; p != m_end; ++p) { - get_alloc().destroy(p); + atraits::destroy(get_alloc(), p); } if (m_begin != static_begin_ptr()) { // we've moved from dyn to dyn memory, so deallocate the old one - get_alloc().deallocate(m_begin, m_capacity); + atraits::deallocate(get_alloc(), m_begin, m_capacity); } m_capacity = m_dynamic_capacity; @@ -928,7 +929,7 @@ private: { auto position = const_cast(cp); - _CHOBO_SMALL_VECTOR_OUT_OF_RANGE_IF(position < m_begin || position > m_end || position + num > m_end); + I_ITLIB_SMALL_VECTOR_OUT_OF_RANGE_IF(position < m_begin || position > m_end || position + num > m_end); const auto s = size(); if (s - num == 0) @@ -945,13 +946,13 @@ private: for (auto p = position, np = position + num; np != m_end; ++p, ++np) { - get_alloc().destroy(p); - get_alloc().construct(p, std::move(*np)); + atraits::destroy(get_alloc(), p); + atraits::construct(get_alloc(), p, std::move(*np)); } for (auto p = m_end - num; p != m_end; ++p) { - get_alloc().destroy(p); + atraits::destroy(get_alloc(), p); } m_end -= num; @@ -967,19 +968,19 @@ private: auto p = m_begin, np = new_buf; for (; p != position; ++p, ++np) { - get_alloc().construct(np, std::move(*p)); - get_alloc().destroy(p); + atraits::construct(get_alloc(), np, std::move(*p)); + atraits::destroy(get_alloc(), p); } for (; p != position + num; ++p) { - get_alloc().destroy(p); + atraits::destroy(get_alloc(), p); } for (; np != new_buf + s - num; ++p, ++np) { - get_alloc().construct(np, std::move(*p)); - get_alloc().destroy(p); + atraits::construct(get_alloc(), np, std::move(*p)); + atraits::destroy(get_alloc(), p); } position = new_buf + (position - m_begin); @@ -987,7 +988,7 @@ private: m_end = np; } - return ++position; + return position; } void assign_impl(size_type count, const T& value) @@ -998,7 +999,7 @@ private: m_begin = m_end = choose_data(count); for (size_type i = 0; i < count; ++i) { - get_alloc().construct(m_end, value); + atraits::construct(get_alloc(), m_end, value); ++m_end; } @@ -1014,7 +1015,7 @@ private: m_begin = m_end = choose_data(last - first); for (auto p = first; p != last; ++p) { - get_alloc().construct(m_end, *p); + atraits::construct(get_alloc(), m_end, *p); ++m_end; } @@ -1029,7 +1030,7 @@ private: m_begin = m_end = choose_data(ilist.size()); for (auto& elem : ilist) { - get_alloc().construct(m_end, elem); + atraits::construct(get_alloc(), m_end, elem); ++m_end; } @@ -1064,7 +1065,7 @@ private: m_dynamic_capacity /= 2; } - m_dynamic_data = get_alloc().allocate(m_dynamic_capacity); + m_dynamic_data = atraits::allocate(get_alloc(), m_dynamic_capacity); return m_dynamic_data; } else if (desired_capacity < RevertToStaticSize) @@ -1093,11 +1094,11 @@ private: // we don't have anything to destroy, so we can also deallocate the buffer if (m_dynamic_data) { - get_alloc().deallocate(m_dynamic_data, m_dynamic_capacity); + atraits::deallocate(get_alloc(), m_dynamic_data, m_dynamic_capacity); } m_dynamic_capacity = desired_capacity; - m_dynamic_data = get_alloc().allocate(m_dynamic_capacity); + m_dynamic_data = atraits::allocate(get_alloc(), m_dynamic_capacity); } return m_dynamic_data; @@ -1110,8 +1111,10 @@ private: } } - allocator_type& get_alloc() { return static_cast(*this); } - const allocator_type& get_alloc() const { return static_cast(*this); } + allocator_type& get_alloc() { return m_alloc; } + const allocator_type& get_alloc() const { return m_alloc; } + + allocator_type m_alloc; pointer m_begin; pointer m_end; @@ -1123,9 +1126,12 @@ private: pointer m_dynamic_data; }; -template -bool operator==(const small_vector& a, - const small_vector& b) +template +bool operator==(const small_vector& a, + const small_vector& b) { if (a.size() != b.size()) { @@ -1134,583 +1140,22 @@ bool operator==(const small_vector for (size_t i = 0; i < a.size(); ++i) { - if (a[i] != b[i]) + if (!(a[i] == b[i])) return false; } return true; } -template -bool operator!=(const small_vector& a, - const small_vector& b) +template +bool operator!=(const small_vector& a, + const small_vector& b) + { - if (a.size() != b.size()) - { - return true; - } - - for (size_t i = 0; i < a.size(); ++i) - { - if (a[i] != b[i]) - return true; - } - - return false; + return !operator==(a, b); } } - - -#if defined(CHOBO_SMALL_VECTOR_TEST_WITH_DOCTEST) - -#include -#include - -namespace chobo_small_vector_test -{ - -size_t allocations = 0; -size_t deallocations = 0; -size_t allocated_bytes = 0; -size_t deallocated_bytes = 0; -size_t constructions = 0; -size_t destructions = 0; - -template -class counting_allocator : public std::allocator -{ -public: - typedef std::allocator super; - - T* allocate(size_t n, std::allocator::const_pointer hint = 0) - { - ++allocations; - allocated_bytes += n * sizeof(T); - return super::allocate(n, hint); - } - - void deallocate(T* p, size_t n) - { - ++deallocations; - deallocated_bytes += n * sizeof(T); - return super::deallocate(p, n); - } - - template< class U, class... Args > - void construct(U* p, Args&&... args) - { - ++constructions; - return super::construct(p, std::forward(args)...); - } - - template< class U > - void destroy(U* p) - { - ++destructions; - return super::destroy(p); - } -}; -} - -TEST_CASE("[small_vector] static") -{ - using namespace chobo; - using namespace chobo_small_vector_test; - using namespace std; - - static_assert(sizeof(small_vector) - sizeof(small_vector) == sizeof(void*) * 7, "small_vector needs to have a static buffer"); - { - small_vector> ivec; - CHECK(ivec.size() == 0); - CHECK(ivec.capacity() == 10); - CHECK(ivec.begin() == ivec.end()); - CHECK(ivec.cbegin() == ivec.cend()); - CHECK(ivec.empty()); - - auto d = ivec.data(); - ivec.reserve(9); - CHECK(ivec.capacity() == 10); - CHECK(d == ivec.data()); - - ivec.resize(2, 8); - CHECK(ivec.size() == 2); - CHECK(ivec.front() == 8); - CHECK(ivec.back() == 8); - CHECK(d == ivec.data()); - - ivec.clear(); - CHECK(ivec.size() == 0); - CHECK(ivec.capacity() == 10); - CHECK(ivec.begin() == ivec.end()); - CHECK(ivec.cbegin() == ivec.cend()); - CHECK(ivec.empty()); - CHECK(d == ivec.data()); - - ivec.push_back(5); - CHECK(ivec.size() == 1); - CHECK(ivec[0] == 5); - auto it = ivec.begin(); - CHECK(it == ivec.data()); - CHECK(it == ivec.cbegin()); - CHECK(*it == 5); - ++it; - CHECK(it == ivec.end()); - CHECK(it == ivec.cend()); - - auto& back = ivec.emplace_back(3); - CHECK(ivec.size() == 2); - auto rit = ivec.rbegin(); - CHECK(*rit == 3); - ++rit; - *rit = 12; - ++rit; - CHECK(rit == ivec.rend()); - CHECK(rit == ivec.crend()); - CHECK(ivec.front() == 12); - CHECK(ivec.back() == 3); - CHECK(back == 3); - CHECK(&back == &ivec.back()); - - ivec.insert(ivec.begin(), 53); - ivec.insert(ivec.begin() + 2, 90); - ivec.insert(ivec.begin() + 4, 17); - ivec.insert(ivec.end(), 6); - ivec.insert(ivec.begin(), { 1, 2 }); - - int ints[] = { 1, 2, 53, 12, 90, 3, 17, 6 }; - CHECK(ivec.size() == 8); - CHECK(memcmp(ivec.data(), ints, sizeof(ints)) == 0); - - ivec.shrink_to_fit(); - CHECK(ivec.size() == 8); - CHECK(ivec.capacity() == 10); - CHECK(d == ivec.data()); - - ivec.revert_to_static(); - CHECK(ivec.size() == 8); - CHECK(ivec.capacity() == 10); - CHECK(d == ivec.data()); - - ivec.pop_back(); - CHECK(ivec.size() == 7); - CHECK(memcmp(ivec.data(), ints, sizeof(ints) - sizeof(int)) == 0); - - ivec.resize(8); - CHECK(ivec.size() == 8); - ints[7] = 0; - CHECK(memcmp(ivec.data(), ints, sizeof(ints)) == 0); - - const small_vector> ivec2 = { 1, 2, 3, 4 }; - CHECK(ivec2.size() == 4); - CHECK(*ivec2.begin() == 1); - CHECK(ivec2[1] == 2); - CHECK(ivec2.at(2) == 3); - CHECK(*ivec2.rbegin() == 4); - - ivec.erase(ivec.begin()); - CHECK(ivec.size() == 7); - CHECK(ivec.front() == 2); - CHECK(memcmp(ivec.data(), ints + 1, ivec.size() * sizeof(int)) == 0); - - ivec.erase(ivec.begin() + 2, ivec.begin() + 4); - CHECK(ivec.size() == 5); - CHECK(ivec[3] == 17); - - small_vector> svec; - svec.assign({ "as", "df" }); - CHECK(svec.size() == 2); - string s1 = "the quick brown fox jumped over the lazy dog 1234567890"; - auto& rs = svec.emplace_back(s1); - CHECK(svec.back() == s1); - CHECK(rs == s1); - CHECK(&rs == &svec.back()); - - auto svec1 = svec; - CHECK(svec1 == svec); - - const void* cstr = svec.back().c_str(); - auto svec2 = std::move(svec); - CHECK(svec2.size() == 3); - CHECK(svec2.back() == s1); - - CHECK(svec.empty()); - CHECK(svec2.back().c_str() == cstr); - - svec = std::move(svec2); - CHECK(svec2.empty()); - CHECK(svec.back().c_str() == cstr); - - svec2 = svec; - CHECK(svec2.back() == s1); - CHECK(svec.back() == s1); - CHECK(svec == svec2); - - svec.insert(svec.begin(), s1); - CHECK(svec.size() == 4); - CHECK(svec.back().c_str() == cstr); - CHECK(svec.front() == svec.back()); - - cstr = s1.c_str(); - svec.emplace(svec.begin() + 2, std::move(s1)); - CHECK(svec.size() == 5); - CHECK(svec.front() == svec[2]); - CHECK(svec[2].c_str() == cstr); - - svec.clear(); - CHECK(svec.empty()); - svec2.clear(); - CHECK(svec2.empty()); - CHECK(svec == svec2); - - svec.resize(svec.capacity()); - CHECK(svec.size() == svec.capacity()); - - for (auto& s : svec) - { - CHECK(s.empty()); - } - - s1 = "asdf"; - small_vector> cvec(s1.begin(), s1.end()); - CHECK(cvec.size() == 4); - CHECK(cvec.front() == 'a'); - CHECK(cvec.back() == 'f'); - - cvec.clear(); - CHECK(cvec.size() == 0); - CHECK(cvec.empty()); - - s1 = "baz"; - cvec.assign(s1.begin(), s1.end()); - CHECK(cvec.size() == 3); - CHECK(cvec.front() == 'b'); - CHECK(cvec.back() == 'z'); - - // 0 is implicitly castable to nullptr_t which can be an iterator in our case - small_vector nullptr_test(2, 0); - CHECK(nullptr_test.size() == 2); - CHECK(nullptr_test.front() == 0); - CHECK(nullptr_test.back() == 0); - - nullptr_test.assign(3, 0); - CHECK(nullptr_test.size() == 3); - CHECK(nullptr_test.front() == 0); - CHECK(nullptr_test.back() == 0); - - nullptr_test.insert(nullptr_test.begin(), 1, 0); - CHECK(nullptr_test.size() == 4); - CHECK(nullptr_test.front() == 0); - } - - CHECK(allocations == 0); - CHECK(deallocations == 0); - CHECK(allocated_bytes == 0); - CHECK(deallocated_bytes == 0); - CHECK(constructions == destructions); - - constructions = destructions = 0; -} - - -TEST_CASE("[small_vector] dynamic") -{ - using namespace chobo; - using namespace chobo_small_vector_test; - using namespace std; - { - small_vector> ivec; - CHECK(ivec.size() == 0); - CHECK(ivec.capacity() == 1); - CHECK(ivec.begin() == ivec.end()); - CHECK(ivec.cbegin() == ivec.cend()); - CHECK(ivec.empty()); - - auto d = ivec.data(); - ivec.reserve(2); - CHECK(ivec.capacity() == 2); - CHECK(d != ivec.data()); - CHECK(allocations == 1); - - ivec.resize(3, 8); - CHECK(ivec.capacity() == 3); - CHECK(ivec.size() == 3); - CHECK(ivec.front() == 8); - CHECK(ivec.back() == 8); - CHECK(d != ivec.data()); - CHECK(allocations == 2); - - ivec.clear(); - CHECK(ivec.size() == 0); - CHECK(ivec.capacity() == 3); - CHECK(d != ivec.data()); - CHECK(ivec.begin() == ivec.end()); - CHECK(ivec.cbegin() == ivec.cend()); - CHECK(ivec.empty()); - - ivec.push_back(5); - CHECK(ivec.size() == 1); - CHECK(ivec[0] == 5); - auto it = ivec.begin(); - CHECK(it == ivec.data()); - CHECK(it == ivec.cbegin()); - CHECK(*it == 5); - ++it; - CHECK(it == ivec.end()); - CHECK(it == ivec.cend()); - - auto& back = ivec.emplace_back(3); - CHECK(ivec.size() == 2); - auto rit = ivec.rbegin(); - CHECK(*rit == 3); - ++rit; - *rit = 12; - ++rit; - CHECK(rit == ivec.rend()); - CHECK(rit == ivec.crend()); - CHECK(ivec.front() == 12); - CHECK(ivec.back() == 3); - CHECK(back == 3); - CHECK(&back == &ivec.back()); - - ivec.insert(ivec.begin(), 53); - CHECK(ivec.capacity() == 3); - - ivec.insert(ivec.begin() + 2, 90); - ivec.insert(ivec.begin() + 4, 17); - ivec.insert(ivec.end(), 6); - ivec.insert(ivec.begin(), { 1, 2 }); - - int ints[] = { 1, 2, 53, 12, 90, 3, 17, 6 }; - CHECK(ivec.capacity() >= 8); - CHECK(ivec.size() == 8); - CHECK(memcmp(ivec.data(), ints, sizeof(ints)) == 0); - - ivec.pop_back(); - CHECK(ivec.size() == 7); - CHECK(memcmp(ivec.data(), ints, sizeof(ints) - sizeof(int)) == 0); - - ivec.resize(8); - CHECK(ivec.size() == 8); - ints[7] = 0; - CHECK(memcmp(ivec.data(), ints, sizeof(ints)) == 0); - - const small_vector> ivec2 = { 1, 2, 3, 4 }; - CHECK(ivec2.size() == 4); - CHECK(*ivec2.begin() == 1); - CHECK(ivec2[1] == 2); - CHECK(ivec2.at(2) == 3); - CHECK(*ivec2.rbegin() == 4); - - ivec.erase(ivec.begin()); - CHECK(ivec.size() == 7); - CHECK(ivec.front() == 2); - CHECK(memcmp(ivec.data(), ints + 1, ivec.size() * sizeof(int)) == 0); - - ivec.erase(ivec.begin() + 2, ivec.begin() + 4); - CHECK(ivec.size() == 5); - CHECK(ivec[3] == 17); - - small_vector> svec; - svec.assign({ "as", "df" }); - CHECK(svec.size() == 2); - string s1 = "the quick brown fox jumped over the lazy dog 1234567890"; - auto& rs = svec.emplace_back(s1); - CHECK(svec.back() == s1); - CHECK(rs == s1); - CHECK(&rs == &svec.back()); - - auto svec1 = svec; - CHECK(svec1 == svec); - - const void* cstr = svec.back().c_str(); - auto svec2 = std::move(svec); - CHECK(svec2.size() == 3); - CHECK(svec2.back() == s1); - - CHECK(svec.empty()); - CHECK(svec2.back().c_str() == cstr); - - svec = std::move(svec2); - CHECK(svec2.empty()); - CHECK(svec.back().c_str() == cstr); - - svec2 = svec; - CHECK(svec2.back() == s1); - CHECK(svec.back() == s1); - CHECK(svec == svec2); - - svec.insert(svec.begin(), s1); - CHECK(svec.size() == 4); - CHECK(svec.back().c_str() == cstr); - CHECK(svec.front() == svec.back()); - - cstr = s1.c_str(); - svec.emplace(svec.begin() + 2, std::move(s1)); - CHECK(svec.size() == 5); - CHECK(svec.front() == svec[2]); - CHECK(svec[2].c_str() == cstr); - - svec.clear(); - CHECK(svec.empty()); - svec2.clear(); - CHECK(svec2.empty()); - CHECK(svec == svec2); - - svec.resize(svec.capacity()); - CHECK(svec.size() == svec.capacity()); - - for (auto& s : svec) - { - CHECK(s.empty()); - } - - s1 = "asdf"; - small_vector> cvec(s1.begin(), s1.end()); - CHECK(cvec.size() == 4); - CHECK(cvec.front() == 'a'); - CHECK(cvec.back() == 'f'); - - cvec.clear(); - CHECK(cvec.size() == 0); - CHECK(cvec.empty()); - - s1 = "baz"; - cvec.assign(s1.begin(), s1.end()); - CHECK(cvec.size() == 3); - CHECK(cvec.front() == 'b'); - CHECK(cvec.back() == 'z'); - } - - CHECK(allocations == deallocations); - CHECK(allocated_bytes == deallocated_bytes); - CHECK(constructions == destructions); - - allocations = deallocations = allocated_bytes = deallocated_bytes = constructions = destructions = 0; -} - -TEST_CASE("[small_vector] static-dynamic") -{ - using namespace chobo; - using namespace chobo_small_vector_test; - using namespace std; - - { - small_vector> ivec; - auto d = ivec.data(); - ivec.reserve(20); - CHECK(ivec.data() == d); - - ivec.push_back(1); - ivec.push_back(2); - ivec.push_back(3); - - CHECK(ivec.data() == d); - - ivec.insert(ivec.end(), 3u, 8); - - CHECK(ivec.size() == 6); - CHECK(ivec.capacity() == 20); - - auto dd = ivec.data(); - - ivec.erase(ivec.begin(), ivec.begin() + 6); - CHECK(ivec.data() == d); - CHECK(ivec.empty()); - - ivec.resize(19, 11); - CHECK(ivec.size() == 19); - CHECK(ivec.capacity() == 20); - CHECK(ivec.data() == dd); - - ivec.resize(4); - CHECK(ivec.size() == 4); - CHECK(ivec.capacity() == 20); - CHECK(ivec.data() == dd); - - ivec.revert_to_static(); - CHECK(ivec.size() == 4); - CHECK(ivec.capacity() == 5); - CHECK(ivec.data() == d); - - ivec.reserve(10); - CHECK(ivec.size() == 4); - CHECK(ivec.capacity() == 20); - CHECK(ivec.data() == dd); - - ivec.shrink_to_fit(); - CHECK(ivec.size() == 4); - CHECK(ivec.capacity() == 5); - CHECK(ivec.data() == d); - - ivec.reserve(10); - CHECK(ivec.size() == 4); - CHECK(ivec.capacity() == 10); - CHECK(ivec.data() != d); - - dd = ivec.data(); - ivec.insert(ivec.begin() + 3, 5u, 88); - CHECK(ivec.size() == 9); - CHECK(ivec.capacity() == 10); - CHECK(ivec.data() == dd); - CHECK(ivec[2] == 11); - CHECK(ivec[7] == 88); - CHECK(ivec[8] == 11); - - small_vector> ivec2(ivec.begin(), ivec.end()); - CHECK(ivec2.size() == 9); - CHECK(ivec2.size() == 9); - CHECK(ivec2.capacity() == 9); - CHECK(ivec2[2] == 11); - CHECK(ivec2[7] == 88); - CHECK(ivec2[8] == 11); - - ivec.erase(ivec.begin() + 1, ivec.end() - 2); - CHECK(ivec.size() == 3); - ivec.erase(ivec.end() - 1); - CHECK(ivec.size() == 2); - CHECK(ivec.capacity() == 5); - CHECK(ivec.data() == d); - - ivec2.erase(ivec2.begin() + 1, ivec2.end() - 2); - CHECK(ivec2.size() == 3); - CHECK(ivec2.capacity() == 3); - } - - CHECK(allocations == deallocations); - CHECK(allocated_bytes == deallocated_bytes); - CHECK(constructions == destructions); - - allocations = deallocations = allocated_bytes = deallocated_bytes = constructions = destructions = 0; -} - -#if !defined(__EMSCRIPTEN__) || !defined(NDEBUG) // emscripten allows exceptions with -O0 -TEST_CASE("[small_vector] out of range") -{ - using namespace chobo; - small_vector ivec; - ivec.resize(4); - CHECK(ivec.capacity() == 5); - - CHECK_THROWS_AS(ivec.insert(ivec.begin() - 1, 1), std::out_of_range); - CHECK(ivec.size() == 4); - CHECK_THROWS_AS(ivec.insert(ivec.end() + 1, 1), std::out_of_range); - CHECK(ivec.size() == 4); - CHECK_THROWS_AS(ivec.erase(ivec.begin() - 1), std::out_of_range); - CHECK(ivec.size() == 4); - CHECK_THROWS_AS(ivec.erase(ivec.end() + 1), std::out_of_range); - CHECK(ivec.size() == 4); - CHECK_THROWS_AS(ivec.erase(ivec.begin() - 1, ivec.begin() + 1), std::out_of_range); - CHECK(ivec.size() == 4); - CHECK_THROWS_AS(ivec.erase(ivec.begin() + 2, ivec.end() + 1), std::out_of_range); - CHECK(ivec.size() == 4); - CHECK_THROWS_AS(ivec.erase(ivec.end() + 1, ivec.end() + 3), std::out_of_range); - CHECK(ivec.size() == 4); - CHECK_THROWS_AS(ivec.erase(ivec.end() - 1, ivec.begin() + 1), std::out_of_range); - CHECK(ivec.size() == 4); - -} -#endif - - -#endif diff --git a/include/vtkmdiy/thread.hpp b/include/vtkmdiy/thread.hpp index 8c35b2d8e..430fedc80 100644 --- a/include/vtkmdiy/thread.hpp +++ b/include/vtkmdiy/thread.hpp @@ -41,6 +41,9 @@ namespace diy #include "critical-resource.hpp" #if !defined(VTKMDIY_NO_THREADS) + +#include // for shared_ptr + template struct diy::concurrent_map { diff --git a/include/vtkmdiy/version.hpp b/include/vtkmdiy/version.hpp index 7bbe4df16..d3a1a3c6b 100644 --- a/include/vtkmdiy/version.hpp +++ b/include/vtkmdiy/version.hpp @@ -3,6 +3,6 @@ #define VTKMDIY_VERSION_MAJOR 3 #define VTKMDIY_VERSION_MINOR 5 -#define DIY_VERSION_PATCH dev1 +#define VTKMDIY_VERSION_PATCH dev1 #endif From 1e2749580997032400ee8b2330a88418c60ee332 Mon Sep 17 00:00:00 2001 From: Vicente Adolfo Bolea Sanchez Date: Wed, 22 Mar 2023 19:14:21 -0400 Subject: [PATCH 3/3] diy,mpi: Enable GPU AWARE MPI buffers This commit adds the flag VTKm_ENABLE_GPU_MPI which when enable it will use GPU AWARE MPI. - This will only work with GPUs and MPI implementation that supports GPU AWARE MPI calls. - Enabling VTKm_ENABLE_GPU_MPI without MPI/GPU support might results in errors when running VTK-m with DIY/MPI. - Only the following tests can run with this feature if enabled: - UnitTestSerializationDataSet - UnitTestSerializationArrayHandle --- CMakeLists.txt | 5 ++ vtkm/cont/CMakeLists.txt | 2 + vtkm/cont/DIYMemoryManagement.cxx | 78 +++++++++++++++++++ vtkm/cont/DIYMemoryManagement.h | 30 +++++++ vtkm/cont/RuntimeDeviceInformation.cxx | 5 ++ .../DeviceAdapterMemoryManagerCuda.cu | 5 ++ .../internal/DeviceAdapterMemoryManagerCuda.h | 2 + vtkm/cont/internal/Buffer.cxx | 41 +++++++--- .../internal/DeviceAdapterMemoryManager.cxx | 37 ++++++--- .../internal/DeviceAdapterMemoryManager.h | 39 ++++++++++ .../DeviceAdapterMemoryManagerShared.cxx | 6 ++ .../DeviceAdapterMemoryManagerShared.h | 2 + .../DeviceAdapterMemoryManagerKokkos.cxx | 26 +++++++ .../DeviceAdapterMemoryManagerKokkos.h | 6 ++ vtkm/cont/testing/TestingSerialization.h | 5 +- vtkm/internal/CMakeLists.txt | 1 + vtkm/internal/Configure.h.in | 3 + 17 files changed, 270 insertions(+), 23 deletions(-) create mode 100644 vtkm/cont/DIYMemoryManagement.cxx create mode 100644 vtkm/cont/DIYMemoryManagement.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0065590b1..b92561523 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,6 +186,11 @@ vtkm_option(VTKm_SKIP_LIBRARY_VERSIONS "Skip versioning VTK-m libraries" OFF) # through ctest's command-line. Doesn't affect CI unless enabled. vtkm_option(VTKm_OVERRIDE_CTEST_TIMEOUT "Disable default ctest timeout" OFF) +# VTKm_ENABLE_GPU_MPI makes VTK-m to use DIY routines that enables GPU aware +# MPI. By default, this option is disabled. Also, this option is hidden unless +# VTKm_ENABLE_MPI=ON. +cmake_dependent_option(VTKm_ENABLE_GPU_MPI "Enable GPU AWARE MPI support" OFF "VTKm_ENABLE_MPI" OFF) + mark_as_advanced( VTKm_ENABLE_LOGGING VTKm_NO_ASSERT diff --git a/vtkm/cont/CMakeLists.txt b/vtkm/cont/CMakeLists.txt index 53b5308d5..9e824ff95 100644 --- a/vtkm/cont/CMakeLists.txt +++ b/vtkm/cont/CMakeLists.txt @@ -86,6 +86,7 @@ set(headers DeviceAdapterAlgorithm.h DeviceAdapterList.h DeviceAdapterTag.h + DIYMemoryManagement.h EnvironmentTracker.h Error.h ErrorBadAllocation.h @@ -154,6 +155,7 @@ set(sources DataSetBuilderRectilinear.cxx DataSetBuilderUniform.cxx DeviceAdapterTag.cxx + DIYMemoryManagement.cxx EnvironmentTracker.cxx ErrorBadDevice.cxx ErrorBadType.cxx diff --git a/vtkm/cont/DIYMemoryManagement.cxx b/vtkm/cont/DIYMemoryManagement.cxx new file mode 100644 index 000000000..647a285fd --- /dev/null +++ b/vtkm/cont/DIYMemoryManagement.cxx @@ -0,0 +1,78 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +//============================================================================ + +#include + +#include +#include +#include +#include +#ifdef VTKM_ENABLE_GPU_MPI +#include +#endif + +namespace +{ + +thread_local vtkm::cont::DeviceAdapterId DIYCurrentDeviceAdaptor = + vtkm::cont::DeviceAdapterTagSerial(); + +vtkm::cont::internal::DeviceAdapterMemoryManagerBase& GetMemoryManager( + vtkm::cont::DeviceAdapterId device) +{ + return vtkm::cont::RuntimeDeviceInformation().GetMemoryManager(device); +} + +vtkmdiy::MemoryManagement GetDIYMemoryManagement(vtkm::cont::DeviceAdapterId device) +{ + return vtkmdiy::MemoryManagement( + [device](int, size_t n) { + return static_cast(GetMemoryManager(device).AllocateRawPointer(n)); + }, + [device](const char* p) { GetMemoryManager(device).DeleteRawPointer(const_cast(p)); }, + [device](char* dest, const char* src, size_t count) { + GetMemoryManager(device).CopyDeviceToDeviceRawPointer(src, dest, count); + }); +} + +} + +namespace vtkm +{ +namespace cont +{ + +vtkm::cont::DeviceAdapterId GetDIYDeviceAdapter() +{ + return DIYCurrentDeviceAdaptor; +} + +void DIYMasterExchange(vtkmdiy::Master& master, bool remote) +{ +#ifdef VTKM_ENABLE_GPU_MPI + try + { + DIYCurrentDeviceAdaptor = vtkm::cont::DeviceAdapterTagKokkos(); + master.exchange(remote, GetDIYMemoryManagement(vtkm::cont::DeviceAdapterTagKokkos())); + DIYCurrentDeviceAdaptor = vtkm::cont::DeviceAdapterTagSerial(); + } + catch (...) + { + DIYCurrentDeviceAdaptor = vtkm::cont::DeviceAdapterTagSerial(); + throw; + } +#else + DIYCurrentDeviceAdaptor = vtkm::cont::DeviceAdapterTagSerial(); + master.exchange(remote); +#endif +} + +} +} diff --git a/vtkm/cont/DIYMemoryManagement.h b/vtkm/cont/DIYMemoryManagement.h new file mode 100644 index 000000000..ad502a412 --- /dev/null +++ b/vtkm/cont/DIYMemoryManagement.h @@ -0,0 +1,30 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +//============================================================================ +#ifndef vtk_m_cont_internal_DIYMemoryManagement_h +#define vtk_m_cont_internal_DIYMemoryManagement_h + +#include +#include +#include + +namespace vtkm +{ +namespace cont +{ + +VTKM_CONT_EXPORT vtkm::cont::DeviceAdapterId GetDIYDeviceAdapter(); + +/// \brief Wraps vtkmdiy::Master::exchange by setting its appropiate vtkmdiy::MemoryManagement. +VTKM_CONT_EXPORT void DIYMasterExchange(vtkmdiy::Master& master, bool remote = false); + +} +} + +#endif diff --git a/vtkm/cont/RuntimeDeviceInformation.cxx b/vtkm/cont/RuntimeDeviceInformation.cxx index f8e95eb3b..d9d0f8996 100644 --- a/vtkm/cont/RuntimeDeviceInformation.cxx +++ b/vtkm/cont/RuntimeDeviceInformation.cxx @@ -78,6 +78,11 @@ public: { throw vtkm::cont::ErrorBadDevice("Tried to manage memory on an invalid device."); } + + VTKM_CONT virtual void DeleteRawPointer(void*) const override + { + throw vtkm::cont::ErrorBadDevice("Tried to manage memory on an invalid device."); + } }; class RuntimeDeviceConfigurationInvalid final diff --git a/vtkm/cont/cuda/internal/DeviceAdapterMemoryManagerCuda.cu b/vtkm/cont/cuda/internal/DeviceAdapterMemoryManagerCuda.cu index c972505a8..e12abc95e 100644 --- a/vtkm/cont/cuda/internal/DeviceAdapterMemoryManagerCuda.cu +++ b/vtkm/cont/cuda/internal/DeviceAdapterMemoryManagerCuda.cu @@ -240,6 +240,11 @@ void DeviceAdapterMemoryManager::CopyDeviceToD cudaMemcpyDeviceToDevice, cudaStreamPerThread)); } + +void DeviceAdapterMemoryManager::DeleteRawPointer(void* mem) const +{ + CudaDelete(mem); +}; } } } // namespace vtkm::cont::internal diff --git a/vtkm/cont/cuda/internal/DeviceAdapterMemoryManagerCuda.h b/vtkm/cont/cuda/internal/DeviceAdapterMemoryManagerCuda.h index d819a5762..af0139a1c 100644 --- a/vtkm/cont/cuda/internal/DeviceAdapterMemoryManagerCuda.h +++ b/vtkm/cont/cuda/internal/DeviceAdapterMemoryManagerCuda.h @@ -50,6 +50,8 @@ public: VTKM_CONT virtual void CopyDeviceToDevice( const vtkm::cont::internal::BufferInfo& src, const vtkm::cont::internal::BufferInfo& dest) const override; + + VTKM_CONT virtual void DeleteRawPointer(void* mem) const override; }; } } diff --git a/vtkm/cont/internal/Buffer.cxx b/vtkm/cont/internal/Buffer.cxx index 9879daf14..7320c6187 100644 --- a/vtkm/cont/internal/Buffer.cxx +++ b/vtkm/cont/internal/Buffer.cxx @@ -10,6 +10,7 @@ #include +#include #include #include #include @@ -1158,30 +1159,46 @@ void Serialization::save(BinaryBuffer& bb, const vtkm::cont::internal::Buffer& obj) { vtkm::BufferSizeType size = obj.GetNumberOfBytes(); - vtkmdiy::save(bb, size); + std::unique_ptr token; + const void* ptr = nullptr; - if (size) + if (size > 0) { - // NOTE: If size == 0, obj.ReadPointerHost will be a nullptr, and saving that via - // vtkmdiy causes test failure on osheim - vtkm::cont::Token token; - const vtkm::UInt8* data = reinterpret_cast(obj.ReadPointerHost(token)); - vtkmdiy::save(bb, data, static_cast(size)); + token.reset(new vtkm::cont::Token); + ptr = obj.ReadPointerDevice(vtkm::cont::GetDIYDeviceAdapter(), *token); } + + // We need to keep the token alive until the data is consumed by DIY, + // otherwise the pointed data could be freed before it is consumed. + // Note that we cannot simply have the unique_ptr captured by the below + // lambda since save_binary_blob 3rd argument is a std::function and + // std::function needs for every parameter to be CopyAsignable, which + // vtkm::cont::Token is not. + bb.save_binary_blob(static_cast(ptr), + static_cast(size), + [token = token.release()](const char[]) { + if (token != nullptr) + { + token->DetachFromAll(); + delete token; + } + }); } void Serialization::load(BinaryBuffer& bb, vtkm::cont::internal::Buffer& obj) { - vtkm::BufferSizeType size; - vtkmdiy::load(bb, size); - vtkm::cont::Token token; + auto blob = bb.load_binary_blob(); + vtkm::BufferSizeType size = blob.size; obj.SetNumberOfBytes(size, vtkm::CopyFlag::Off, token); + if (size) { - vtkm::UInt8* data = reinterpret_cast(obj.WritePointerHost(token)); - vtkmdiy::load(bb, data, static_cast(size)); + auto device = vtkm::cont::GetDIYDeviceAdapter(); + void* ptr = obj.WritePointerDevice(device, token); + vtkm::cont::RuntimeDeviceInformation().GetMemoryManager(device).CopyDeviceToDeviceRawPointer( + blob.pointer.get(), ptr, size); } } diff --git a/vtkm/cont/internal/DeviceAdapterMemoryManager.cxx b/vtkm/cont/internal/DeviceAdapterMemoryManager.cxx index c37056cc5..15345e3d5 100644 --- a/vtkm/cont/internal/DeviceAdapterMemoryManager.cxx +++ b/vtkm/cont/internal/DeviceAdapterMemoryManager.cxx @@ -42,7 +42,11 @@ #include #include -namespace +namespace vtkm +{ +namespace cont +{ +namespace internal { /// A deleter object that can be used with our aligned mallocs @@ -120,15 +124,6 @@ void HostReallocate(void*& memory, memory = container = newBuffer; } -} // anonymous namespace - -namespace vtkm -{ -namespace cont -{ -namespace internal -{ - VTKM_CONT void InvalidRealloc(void*&, void*&, vtkm::BufferSizeType, vtkm::BufferSizeType) { throw vtkm::cont::ErrorBadAllocation("User provided memory does not have a reallocater."); @@ -340,6 +335,28 @@ vtkm::cont::internal::BufferInfo DeviceAdapterMemoryManagerBase::ManageArray( return vtkm::cont::internal::BufferInfo( this->GetDevice(), memory, container, size, deleter, reallocater); } + +void* DeviceAdapterMemoryManagerBase::AllocateRawPointer(vtkm::BufferSizeType size) const +{ + return this->Allocate(size).TransferOwnership().Memory; +} + +void DeviceAdapterMemoryManagerBase::CopyDeviceToDeviceRawPointer(const void* src, + void* dest, + vtkm::BufferSizeType size) const +{ + this->CopyDeviceToDevice( + vtkm::cont::internal::BufferInfo( + this->GetDevice(), + const_cast(src), + const_cast(src), + size, + [](void*) {}, + vtkm::cont::internal::InvalidRealloc), + vtkm::cont::internal::BufferInfo( + this->GetDevice(), dest, dest, size, [](void*) {}, vtkm::cont::internal::InvalidRealloc)); +} + } } } // namespace vtkm::cont::internal diff --git a/vtkm/cont/internal/DeviceAdapterMemoryManager.h b/vtkm/cont/internal/DeviceAdapterMemoryManager.h index b27e537f2..146dd4b20 100644 --- a/vtkm/cont/internal/DeviceAdapterMemoryManager.h +++ b/vtkm/cont/internal/DeviceAdapterMemoryManager.h @@ -196,6 +196,37 @@ public: /// objects were created by a previous call to this object. VTKM_CONT virtual void CopyDeviceToDevice(const vtkm::cont::internal::BufferInfo& src, const vtkm::cont::internal::BufferInfo& dest) const = 0; + + + /// \brief Low-level method to allocate memory on the device. + /// + /// This method allocates an array of the given number of bytes on the device and returns + /// a void pointer to the array. The preferred method to allocate memory is to use the + /// `Allocate` method, which returns a `BufferInfo` that manages its own memory. However, + /// for cases where you are interfacing with code outside of VTK-m and need just a raw + /// pointer, this method can be used. The returned memory can be freed with + /// `DeleteRawPointer`. + VTKM_CONT virtual void* AllocateRawPointer(vtkm::BufferSizeType size) const; + + /// \brief Low-level method to copy data on the device. + /// + /// This method copies data from one raw pointer to another. It performs the same + /// function as `CopyDeviceToDevice`, except that it operates on raw pointers + /// instead of `BufferInfo` objects. This is a useful low-level mechanism to move + /// data on a device in memory locations created externally to VTK-m. + VTKM_CONT virtual void CopyDeviceToDeviceRawPointer(const void* src, + void* dest, + vtkm::BufferSizeType size) const; + + /// \brief Low-level method to delete memory on the device. + /// + /// This method takes a pointer to memory allocated on the device and frees it. + /// The preferred method to delete memory is to use the deallocation routines in + /// `BufferInfo` objects created with `Allocate`. But for cases where you only + /// have a raw pointer to the data, this method can be used to manage it. This + /// method should only be used on memory allocated with this + /// `DeviceAdaperMemoryManager`. + VTKM_CONT virtual void DeleteRawPointer(void*) const = 0; }; /// \brief The device adapter memory manager. @@ -207,6 +238,14 @@ public: template class DeviceAdapterMemoryManager; +VTKM_CONT_EXPORT VTKM_CONT void HostDeleter(void*); +VTKM_CONT_EXPORT VTKM_CONT void* HostAllocate(vtkm::BufferSizeType); +VTKM_CONT_EXPORT VTKM_CONT void HostReallocate(void*&, + void*&, + vtkm::BufferSizeType, + vtkm::BufferSizeType); + + VTKM_CONT_EXPORT VTKM_CONT void InvalidRealloc(void*&, void*&, vtkm::BufferSizeType, diff --git a/vtkm/cont/internal/DeviceAdapterMemoryManagerShared.cxx b/vtkm/cont/internal/DeviceAdapterMemoryManagerShared.cxx index aa3a7ed59..52e37b05c 100644 --- a/vtkm/cont/internal/DeviceAdapterMemoryManagerShared.cxx +++ b/vtkm/cont/internal/DeviceAdapterMemoryManagerShared.cxx @@ -83,6 +83,12 @@ void DeviceAdapterMemoryManagerShared::CopyDeviceToDevice( std::memcpy(dest.GetPointer(), src.GetPointer(), static_cast(src.GetSize())); } + +void DeviceAdapterMemoryManagerShared::DeleteRawPointer(void* mem) const +{ + vtkm::cont::internal::HostDeleter(mem); +} + } } } // namespace vtkm::cont::internal diff --git a/vtkm/cont/internal/DeviceAdapterMemoryManagerShared.h b/vtkm/cont/internal/DeviceAdapterMemoryManagerShared.h index c17dead0b..254a0bd7f 100644 --- a/vtkm/cont/internal/DeviceAdapterMemoryManagerShared.h +++ b/vtkm/cont/internal/DeviceAdapterMemoryManagerShared.h @@ -50,6 +50,8 @@ public: VTKM_CONT virtual void CopyDeviceToDevice( const vtkm::cont::internal::BufferInfo& src, const vtkm::cont::internal::BufferInfo& dest) const override; + + VTKM_CONT virtual void DeleteRawPointer(void* mem) const override; }; } } diff --git a/vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.cxx b/vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.cxx index 64885b0e9..764e58205 100644 --- a/vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.cxx +++ b/vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.cxx @@ -153,6 +153,32 @@ void DeviceAdapterMemoryManager::CopyDeviceT static_cast(dest.GetPointer()), static_cast(size)); Kokkos::deep_copy(vtkm::cont::kokkos::internal::GetExecutionSpaceInstance(), destView, srcView); } + +// Low level memory management methods +void* DeviceAdapterMemoryManager::AllocateRawPointer( + vtkm::BufferSizeType size) const +{ + return vtkm::cont::kokkos::internal::Allocate(size); +} + +void DeviceAdapterMemoryManager::CopyDeviceToDeviceRawPointer( + const void* src, + void* dest, + vtkm::BufferSizeType size) const +{ + Kokkos::View> destView(static_cast(dest), + size); + Kokkos::View> srcView( + static_cast(src), size); + Kokkos::deep_copy(vtkm::cont::kokkos::internal::GetExecutionSpaceInstance(), destView, srcView); +} + +void DeviceAdapterMemoryManager::DeleteRawPointer( + void* mem) const +{ + vtkm::cont::kokkos::internal::Free(mem); +} + } } } // vtkm::cont::internal diff --git a/vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.h b/vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.h index 5a8ff0ce8..a1002d626 100644 --- a/vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.h +++ b/vtkm/cont/kokkos/internal/DeviceAdapterMemoryManagerKokkos.h @@ -50,6 +50,12 @@ public: VTKM_CONT virtual void CopyDeviceToDevice( const vtkm::cont::internal::BufferInfo& src, const vtkm::cont::internal::BufferInfo& dest) const override; + + VTKM_CONT void* AllocateRawPointer(vtkm::BufferSizeType size) const override; + VTKM_CONT void CopyDeviceToDeviceRawPointer(const void* src, + void* dest, + vtkm::BufferSizeType size) const override; + VTKM_CONT void DeleteRawPointer(void* mem) const override; }; } } diff --git a/vtkm/cont/testing/TestingSerialization.h b/vtkm/cont/testing/TestingSerialization.h index 6275f84b1..1f8455b85 100644 --- a/vtkm/cont/testing/TestingSerialization.h +++ b/vtkm/cont/testing/TestingSerialization.h @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -189,7 +190,9 @@ void TestSerialization(const T& obj, const TestEqualFunctor& test) master.foreach ([](Block* b, const vtkmdiy::Master::ProxyWithLink& cp) { cp.enqueue(cp.link()->target(0), b->send); }); - master.exchange(); + + vtkm::cont::DIYMasterExchange(master); + master.foreach ([](Block* b, const vtkmdiy::Master::ProxyWithLink& cp) { cp.dequeue(cp.link()->target(1).gid, b->received); }); diff --git a/vtkm/internal/CMakeLists.txt b/vtkm/internal/CMakeLists.txt index 8e6989c3a..4b4463607 100755 --- a/vtkm/internal/CMakeLists.txt +++ b/vtkm/internal/CMakeLists.txt @@ -26,6 +26,7 @@ set(VTKM_ENABLE_OPENMP ${VTKm_ENABLE_OPENMP}) set(VTKM_ENABLE_TBB ${VTKm_ENABLE_TBB}) set(VTKM_ENABLE_MPI ${VTKm_ENABLE_MPI}) +set(VTKM_ENABLE_GPU_MPI ${VTKm_ENABLE_GPU_MPI}) if(VTKM_ENABLE_CUDA) string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" VTKM_CUDA_VERSION_MAJOR ${CMAKE_CUDA_COMPILER_VERSION}) diff --git a/vtkm/internal/Configure.h.in b/vtkm/internal/Configure.h.in index f2caf2a64..7a4b15c25 100644 --- a/vtkm/internal/Configure.h.in +++ b/vtkm/internal/Configure.h.in @@ -308,6 +308,9 @@ //Mark if we are building with MPI enabled. #cmakedefine VTKM_ENABLE_MPI +//Mark if we are building with GPU AWARE MPI enabled. +#cmakedefine VTKM_ENABLE_GPU_MPI + //Mark what version of the CUDA compiler we have. This is needed to correctly //choose consistent implementation ( so we don't violate ODR ) when we compile //with CUDA 7.5