Merge branch 'sl_comm_probe' into 'master'

Draft: Use async termination. See merge request vtk/vtk-m!3182
2024-07-06 07:17:25 +00:00 · 2024-07-02 16:33:03 -04:00 · 2024-07-02 16:33:03 -04:00 · d804d0fcdf
commit d804d0fcdf
parent 5df372db2b c75dcace78
5 changed files with 146 additions and 71 deletions
--- a/vtkm/filter/flow/FilterParticleAdvection.h
+++ b/vtkm/filter/flow/FilterParticleAdvection.h
@ -15,6 +15,7 @@
 #include <vtkm/cont/ErrorFilterExecution.h>
 #include <vtkm/filter/Filter.h>
 #include <vtkm/filter/flow/FlowTypes.h>
+#include <vtkm/filter/flow/internal/BoundsMap.h>
 #include <vtkm/filter/flow/vtkm_filter_flow_export.h>

 namespace vtkm
@ -104,7 +105,7 @@ protected:

  bool BlockIdsSet = false;
  std::vector<vtkm::Id> BlockIds;
-
+  vtkm::filter::flow::internal::BoundsMap BoundsMap;
  vtkm::Id NumberOfSteps = 0;
  vtkm::cont::UnknownArrayHandle Seeds;
  vtkm::filter::flow::IntegrationSolverType SolverType =
--- a/vtkm/filter/flow/FilterParticleAdvectionSteadyState.cxx
+++ b/vtkm/filter/flow/FilterParticleAdvectionSteadyState.cxx
@ -58,13 +58,15 @@ FilterParticleAdvectionSteadyState<Derived>::DoExecutePartitions(
    DataSetIntegratorSteadyState<ParticleType, FieldType, TerminationType, AnalysisType>;

  this->ValidateOptions();
+  if (this->BlockIdsSet)
+    this->BoundsMap = vtkm::filter::flow::internal::BoundsMap(input, this->BlockIds);
+  else
+    this->BoundsMap = vtkm::filter::flow::internal::BoundsMap(input);

-
-  vtkm::filter::flow::internal::BoundsMap boundsMap(input);
  std::vector<DSIType> dsi;
  for (vtkm::Id i = 0; i < input.GetNumberOfPartitions(); i++)
  {
-    vtkm::Id blockId = boundsMap.GetLocalBlockId(i);
+    vtkm::Id blockId = this->BoundsMap.GetLocalBlockId(i);
    auto dataset = input.GetPartition(i);

    // Build the field for the current dataset
@ -78,7 +80,7 @@ FilterParticleAdvectionSteadyState<Derived>::DoExecutePartitions(
  }

  vtkm::filter::flow::internal::ParticleAdvector<DSIType> pav(
-    boundsMap, dsi, this->UseThreadedAlgorithm, this->UseAsynchronousCommunication);
+    this->BoundsMap, dsi, this->UseThreadedAlgorithm, this->UseAsynchronousCommunication);

  vtkm::cont::ArrayHandle<ParticleType> particles;
  this->Seeds.AsArrayHandle(particles);
--- a/vtkm/filter/flow/FilterParticleAdvectionUnsteadyState.cxx
+++ b/vtkm/filter/flow/FilterParticleAdvectionUnsteadyState.cxx
@ -55,12 +55,15 @@ FilterParticleAdvectionUnsteadyState<Derived>::DoExecutePartitions(
  using DSIType = vtkm::filter::flow::internal::
    DataSetIntegratorUnsteadyState<ParticleType, FieldType, TerminationType, AnalysisType>;

-  vtkm::filter::flow::internal::BoundsMap boundsMap(input);
+  if (this->BlockIdsSet)
+    this->BoundsMap = vtkm::filter::flow::internal::BoundsMap(input, this->BlockIds);
+  else
+    this->BoundsMap = vtkm::filter::flow::internal::BoundsMap(input);

  std::vector<DSIType> dsi;
  for (vtkm::Id i = 0; i < input.GetNumberOfPartitions(); i++)
  {
-    vtkm::Id blockId = boundsMap.GetLocalBlockId(i);
+    vtkm::Id blockId = this->BoundsMap.GetLocalBlockId(i);
    auto ds1 = input.GetPartition(i);
    auto ds2 = this->Input2.GetPartition(i);

@ -85,7 +88,7 @@ FilterParticleAdvectionUnsteadyState<Derived>::DoExecutePartitions(
                     analysis);
  }
  vtkm::filter::flow::internal::ParticleAdvector<DSIType> pav(
-    boundsMap, dsi, this->UseThreadedAlgorithm, this->UseAsynchronousCommunication);
+    this->BoundsMap, dsi, this->UseThreadedAlgorithm, this->UseAsynchronousCommunication);

  vtkm::cont::ArrayHandle<ParticleType> particles;
  this->Seeds.AsArrayHandle(particles);
--- a/vtkm/filter/flow/internal/AdvectAlgorithm.h
+++ b/vtkm/filter/flow/internal/AdvectAlgorithm.h
@ -15,6 +15,10 @@
 #include <vtkm/filter/flow/internal/BoundsMap.h>
 #include <vtkm/filter/flow/internal/DataSetIntegrator.h>
 #include <vtkm/filter/flow/internal/ParticleMessenger.h>
+#ifdef VTKM_ENABLE_MPI
+#include <vtkm/thirdparty/diy/diy.h>
+#include <vtkm/thirdparty/diy/mpi-cast.h>
+#endif

 namespace vtkm
 {
@ -25,6 +29,87 @@ namespace flow
 namespace internal
 {

+class AdvectAlgorithmTerminator
+{
+public:
+#ifdef VTKM_ENABLE_MPI
+  AdvectAlgorithmTerminator(vtkmdiy::mpi::communicator& comm)
+    : MPIComm(vtkmdiy::mpi::mpi_cast(comm.handle()))
+#else
+  AdvectAlgorithmTerminator(vtkmdiy::mpi::communicator& vtkmNotUsed(comm))
+#endif
+  {
+  }
+
+  void AddWork()
+  {
+#ifdef VTKM_ENABLE_MPI
+    this->Dirty = 1;
+#endif
+  }
+
+  bool Done() const { return this->State == AdvectAlgorithmTerminatorState::DONE; }
+
+  void Control(bool haveLocalWork)
+  {
+#ifdef VTKM_ENABLE_MPI
+    if (this->State == STATE_0 && !haveLocalWork)
+    {
+      MPI_Ibarrier(this->MPIComm, &this->StateReq);
+      this->Dirty = 0;
+      this->State = STATE_1;
+    }
+    else if (this->State == STATE_1)
+    {
+      MPI_Status status;
+      int flag;
+      MPI_Test(&this->StateReq, &flag, &status);
+      if (flag == 1)
+      {
+        int localDirty = this->Dirty;
+        MPI_Iallreduce(
+          &localDirty, &this->AllDirty, 1, MPI_INT, MPI_LOR, this->MPIComm, &this->StateReq);
+        this->State = STATE_2;
+      }
+    }
+    else if (this->State == STATE_2)
+    {
+      MPI_Status status;
+      int flag;
+      MPI_Test(&this->StateReq, &flag, &status);
+      if (flag == 1)
+      {
+        if (this->AllDirty == 0) //done
+          this->State = DONE;
+        else
+          this->State = STATE_0; //reset.
+      }
+    }
+#else
+    if (!haveLocalWork)
+      this->State = DONE;
+#endif
+  }
+
+private:
+  enum AdvectAlgorithmTerminatorState
+  {
+    STATE_0,
+    STATE_1,
+    STATE_2,
+    DONE
+  };
+
+  AdvectAlgorithmTerminatorState State = AdvectAlgorithmTerminatorState::STATE_0;
+
+#ifdef VTKM_ENABLE_MPI
+  std::atomic<int> Dirty;
+  int AllDirty = 0;
+  MPI_Request StateReq;
+  MPI_Comm MPIComm;
+#endif
+};
+
 template <typename DSIType>
 class AdvectAlgorithm
 {
@ -39,6 +124,7 @@ public:
    , NumRanks(this->Comm.size())
    , Rank(this->Comm.rank())
    , UseAsynchronousCommunication(useAsyncComm)
+    , Terminator(this->Comm)
  {
  }

@ -97,27 +183,21 @@ public:
    vtkm::filter::flow::internal::ParticleMessenger<ParticleType> messenger(
      this->Comm, this->UseAsynchronousCommunication, this->BoundsMap, 1, 128);

-    this->ComputeTotalNumParticles();
-
-    while (this->TotalNumTerminatedParticles < this->TotalNumParticles)
+    while (!this->Terminator.Done())
    {
      std::vector<ParticleType> v;
-      vtkm::Id numTerm = 0, blockId = -1;
+      vtkm::Id blockId = -1;
      if (this->GetActiveParticles(v, blockId))
      {
        //make this a pointer to avoid the copy?
        auto& block = this->GetDataSet(blockId);
        DSIHelperInfo<ParticleType> bb(v, this->BoundsMap, this->ParticleBlockIDsMap);
        block.Advect(bb, this->StepSize);
-        numTerm = this->UpdateResult(bb);
+        this->UpdateResult(bb);
      }

-      vtkm::Id numTermMessages = 0;
-      this->Communicate(messenger, numTerm, numTermMessages);
-
-      this->TotalNumTerminatedParticles += (numTerm + numTermMessages);
-      if (this->TotalNumTerminatedParticles > this->TotalNumParticles)
-        throw vtkm::cont::ErrorFilterExecution("Particle count error");
+      this->Communicate(messenger);
+      this->Terminator.Control(!this->Active.empty());
    }
  }

@ -128,19 +208,6 @@ public:
    this->ParticleBlockIDsMap.clear();
  }

-  void ComputeTotalNumParticles()
-  {
-    vtkm::Id numLocal = static_cast<vtkm::Id>(this->Inactive.size());
-    for (const auto& it : this->Active)
-      numLocal += it.second.size();
-
-#ifdef VTKM_ENABLE_MPI
-    vtkmdiy::mpi::all_reduce(this->Comm, numLocal, this->TotalNumParticles, std::plus<vtkm::Id>{});
-#else
-    this->TotalNumParticles = numLocal;
-#endif
-  }
-
  DataSetIntegrator<DSIType, ParticleType>& GetDataSet(vtkm::Id id)
  {
    for (auto& it : this->Blocks)
@ -213,9 +280,7 @@ public:
    return !particles.empty();
  }

-  void Communicate(vtkm::filter::flow::internal::ParticleMessenger<ParticleType>& messenger,
-                   vtkm::Id numLocalTerminations,
-                   vtkm::Id& numTermMessages)
+  void Communicate(vtkm::filter::flow::internal::ParticleMessenger<ParticleType>& messenger)
  {
    std::vector<ParticleType> outgoing;
    std::vector<vtkm::Id> outgoingRanks;
@ -224,16 +289,17 @@ public:

    std::vector<ParticleType> incoming;
    std::unordered_map<vtkm::Id, std::vector<vtkm::Id>> incomingBlockIDs;
-    numTermMessages = 0;
+
    bool block = false;
 #ifdef VTKM_ENABLE_MPI
-    block = this->GetBlockAndWait(messenger.UsingSyncCommunication(), numLocalTerminations);
+    block = this->GetBlockAndWait(messenger.UsingSyncCommunication());
 #endif

+    vtkm::Id numTermMessages;
    messenger.Exchange(outgoing,
                       outgoingRanks,
                       this->ParticleBlockIDsMap,
-                       numLocalTerminations,
+                       0,
                       incoming,
                       incomingBlockIDs,
                       numTermMessages,
@ -311,17 +377,22 @@ public:
  {
    VTKM_ASSERT(particles.size() == idsMap.size());

-    for (auto pit = particles.begin(); pit != particles.end(); pit++)
+    if (!particles.empty())
    {
-      vtkm::Id particleID = pit->GetID();
-      const auto& it = idsMap.find(particleID);
-      VTKM_ASSERT(it != idsMap.end() && !it->second.empty());
-      vtkm::Id blockId = it->second[0];
-      this->Active[blockId].emplace_back(*pit);
-    }
+      this->Terminator.AddWork();

-    for (const auto& it : idsMap)
-      this->ParticleBlockIDsMap[it.first] = it.second;
+      for (auto pit = particles.begin(); pit != particles.end(); pit++)
+      {
+        vtkm::Id particleID = pit->GetID();
+        const auto& it = idsMap.find(particleID);
+        VTKM_ASSERT(it != idsMap.end() && !it->second.empty());
+        vtkm::Id blockId = it->second[0];
+        this->Active[blockId].emplace_back(*pit);
+      }
+
+      for (const auto& it : idsMap)
+        this->ParticleBlockIDsMap[it.first] = it.second;
+    }
  }

  virtual void UpdateInactive(const std::vector<ParticleType>& particles,
@ -351,7 +422,7 @@ public:
  }


-  virtual bool GetBlockAndWait(const bool& syncComm, const vtkm::Id& numLocalTerm)
+  virtual bool GetBlockAndWait(const bool& syncComm)
  {
    bool haveNoWork = this->Active.empty() && this->Inactive.empty();

@ -367,9 +438,11 @@ public:
      //2. numLocalTerm + this->TotalNumberOfTerminatedParticles == this->TotalNumberOfParticles
      //So, if neither are true, we can safely block and wait for communication to come in.

-      if (haveNoWork &&
-          (numLocalTerm + this->TotalNumTerminatedParticles < this->TotalNumParticles))
-        return true;
+      //      if (this->Terminator.State == AdvectAlgorithmTerminator::AdvectAlgorithmTerminatorState::STATE_2)
+      //        return true;
+
+      //      if (haveNoWork && (numLocalTerm + this->TotalNumTerminatedParticles < this->TotalNumParticles))
+      //        return true;

      return false;
    }
@ -388,9 +461,8 @@ public:
  std::unordered_map<vtkm::Id, std::vector<vtkm::Id>> ParticleBlockIDsMap;
  vtkm::Id Rank;
  vtkm::FloatDefault StepSize;
-  vtkm::Id TotalNumParticles = 0;
-  vtkm::Id TotalNumTerminatedParticles = 0;
  bool UseAsynchronousCommunication = true;
+  AdvectAlgorithmTerminator Terminator;
 };

 }
--- a/vtkm/filter/flow/internal/AdvectAlgorithmThreaded.h
+++ b/vtkm/filter/flow/internal/AdvectAlgorithmThreaded.h
@ -39,7 +39,6 @@ public:
                          bool useAsyncComm)
    : AdvectAlgorithm<DSIType>(bm, blocks, useAsyncComm)
    , Done(false)
-    , WorkerActivate(false)
  {
    //For threaded algorithm, the particles go out of scope in the Work method.
    //When this happens, they are destructed by the time the Manage thread gets them.
@ -50,8 +49,6 @@ public:

  void Go() override
  {
-    this->ComputeTotalNumParticles();
-
    std::vector<std::thread> workerThreads;
    workerThreads.emplace_back(std::thread(AdvectAlgorithmThreaded::Worker, this));
    this->Manage();
@ -63,6 +60,13 @@ public:
  }

 protected:
+  bool HaveAnyWork()
+  {
+    std::lock_guard<std::mutex> lock(this->Mutex);
+    //We have work if there particles in any queues or a worker is busy.
+    return !this->Active.empty() || !this->Inactive.empty() || this->WorkerActivate;
+  }
+
  bool GetActiveParticles(std::vector<ParticleType>& particles, vtkm::Id& blockId) override
  {
    std::lock_guard<std::mutex> lock(this->Mutex);
@ -144,38 +148,31 @@ protected:
    vtkm::filter::flow::internal::ParticleMessenger<ParticleType> messenger(
      this->Comm, useAsync, this->BoundsMap, 1, 128);

-    while (this->TotalNumTerminatedParticles < this->TotalNumParticles)
+    while (!this->Terminator.Done())
    {
      std::unordered_map<vtkm::Id, std::vector<DSIHelperInfo<ParticleType>>> workerResults;
      this->GetWorkerResults(workerResults);

-      vtkm::Id numTerm = 0;
      for (auto& it : workerResults)
-      {
        for (auto& r : it.second)
-          numTerm += this->UpdateResult(r);
-      }
+          this->UpdateResult(r);

-      vtkm::Id numTermMessages = 0;
-      this->Communicate(messenger, numTerm, numTermMessages);
-
-      this->TotalNumTerminatedParticles += (numTerm + numTermMessages);
-      if (this->TotalNumTerminatedParticles > this->TotalNumParticles)
-        throw vtkm::cont::ErrorFilterExecution("Particle count error");
+      this->Communicate(messenger);
+      this->Terminator.Control(this->HaveAnyWork());
    }

    //Let the workers know that we are done.
    this->SetDone();
  }

-  bool GetBlockAndWait(const bool& syncComm, const vtkm::Id& numLocalTerm) override
+  bool GetBlockAndWait(const bool& syncComm) override
  {
    std::lock_guard<std::mutex> lock(this->Mutex);
    if (this->Done)
      return true;

-    return (this->AdvectAlgorithm<DSIType>::GetBlockAndWait(syncComm, numLocalTerm) &&
-            !this->WorkerActivate && this->WorkerResults.empty());
+    return (this->AdvectAlgorithm<DSIType>::GetBlockAndWait(syncComm) && !this->WorkerActivate &&
+            this->WorkerResults.empty());
  }

  void GetWorkerResults(
@ -193,7 +190,7 @@ protected:

  std::atomic<bool> Done;
  std::mutex Mutex;
-  bool WorkerActivate;
+  bool WorkerActivate = false;
  std::condition_variable WorkerActivateCondition;
  std::unordered_map<vtkm::Id, std::vector<DSIHelperInfo<ParticleType>>> WorkerResults;
 };