Remove uses of ScopedCudaStackSize

Since we have (hopefully) gotten rid of all unbounded recursion and calls to function pointers or virtual methods, the CUDA compiler should be able to statically determine the size of the stack needed. Thus, we shouldn't need `ScopedCudaStackSize` at all. However, there is one odd case where using it seems to be necessary. It is unclear why, but that is an issue for another day.
2024-09-16 17:22:55 +00:00 · 2021-07-29 12:47:29 -06:00 · 2021-07-29 12:47:29 -06:00 · 869535b23f
commit 869535b23f
parent 4bf8bfb1fa
2 changed files with 45 additions and 3 deletions
--- a/vtkm/filter/testing/UnitTestStreamlineFilterMPI.cxx
+++ b/vtkm/filter/testing/UnitTestStreamlineFilterMPI.cxx
@ -60,6 +60,22 @@ void SetFilter(FilterType& filter,

 void TestAMRStreamline(FilterType fType, bool useThreaded)
 {
+  switch (fType)
+  {
+    case PARTICLE_ADVECTION:
+      std::cout << "Particle advection";
+      break;
+    case STREAMLINE:
+      std::cout << "Streamline";
+      break;
+    case PATHLINE:
+      std::cout << "Pathline";
+      break;
+  }
+  if (useThreaded)
+    std::cout << " - using threaded";
+  std::cout << " - on an AMR data set" << std::endl;
+
  auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
  if (comm.size() < 2)
    return;
@ -296,6 +312,24 @@ void ValidateOutput(const vtkm::cont::DataSet& out,

 void TestPartitionedDataSet(vtkm::Id nPerRank, bool useGhost, FilterType fType, bool useThreaded)
 {
+  switch (fType)
+  {
+    case PARTICLE_ADVECTION:
+      std::cout << "Particle advection";
+      break;
+    case STREAMLINE:
+      std::cout << "Streamline";
+      break;
+    case PATHLINE:
+      std::cout << "Pathline";
+      break;
+  }
+  if (useGhost)
+    std::cout << " - using ghost cells";
+  if (useThreaded)
+    std::cout << " - using threaded";
+  std::cout << " - on a partitioned data set" << std::endl;
+
  auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();

  vtkm::Id numDims = 5;
--- a/vtkm/worklet/particleadvection/ParticleAdvectionWorklets.h
+++ b/vtkm/worklet/particleadvection/ParticleAdvectionWorklets.h
@ -118,6 +118,11 @@ public:
    vtkm::cont::ArrayHandleConstant<vtkm::Id> maxSteps(MaxSteps, numSeeds);
    vtkm::cont::ArrayHandleIndex idxArray(numSeeds);

+    // TODO: The particle advection sometimes behaves incorrectly on CUDA if the stack size
+    // is not changed thusly. This is concerning as the compiler should be able to determine
+    // staticly the required stack depth. What is even more concerning is that the runtime
+    // does not report a stack overflow. Rather, the worklet just silently reports the wrong
+    // value. Until we determine the root cause, other problems may pop up.
 #ifdef VTKM_CUDA
    // This worklet needs some extra space on CUDA.
    vtkm::cont::cuda::internal::ScopedCudaStackSize stack(16 * 1024);
@ -193,10 +198,13 @@ public:
    vtkm::worklet::DispatcherMapField<detail::GetSteps> getStepDispatcher{ (detail::GetSteps{}) };
    getStepDispatcher.Invoke(particles, initialStepsTaken);

+    // This method uses the same workklet as ParticleAdvectionWorklet::Run (and more). Yet for
+    // some reason ParticleAdvectionWorklet::Run needs this adjustment while this method does
+    // not.
 #ifdef VTKM_CUDA
-    // This worklet needs some extra space on CUDA.
-    vtkm::cont::cuda::internal::ScopedCudaStackSize stack(16 * 1024);
-    (void)stack;
+    // // This worklet needs some extra space on CUDA.
+    // vtkm::cont::cuda::internal::ScopedCudaStackSize stack(16 * 1024);
+    // (void)stack;
 #endif // VTKM_CUDA

    //Run streamline worklet