Merge topic 'fix_particle_advection_cuda_stack'

b9affb7ed Disable copy for RAII helper. ea0bbfeef Increase CUDA stack size for ParticleAdvection worklets. Acked-by: Kitware Robot <kwrobot@kitware.com> Acked-by: Robert Maynard <robert.maynard@kitware.com> Merge-request: !1832
2024-09-20 02:55:47 +00:00 · 2019-09-09 23:38:32 +00:00 · 2019-09-09 23:38:32 +00:00 · 9e7da35c8e
commit 9e7da35c8e
parent 9e38f02546 b9affb7edc
3 changed files with 47 additions and 17 deletions
--- a/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
+++ b/vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h
@ -67,6 +67,38 @@ namespace cont
 {
 namespace cuda
 {
+
+/// \brief RAII helper for temporarily changing CUDA stack size in an
+/// exception-safe way.
+struct ScopedCudaStackSize
+{
+  ScopedCudaStackSize(std::size_t newStackSize)
+  {
+    cudaDeviceGetLimit(&this->OldStackSize, cudaLimitStackSize);
+    VTKM_LOG_S(vtkm::cont::LogLevel::Info,
+               "Temporarily changing Cuda stack size from "
+                 << vtkm::cont::GetHumanReadableSize(static_cast<vtkm::UInt64>(this->OldStackSize))
+                 << " to "
+                 << vtkm::cont::GetHumanReadableSize(static_cast<vtkm::UInt64>(newStackSize)));
+    cudaDeviceSetLimit(cudaLimitStackSize, newStackSize);
+  }
+
+  ~ScopedCudaStackSize()
+  {
+    VTKM_LOG_S(vtkm::cont::LogLevel::Info,
+               "Restoring Cuda stack size to " << vtkm::cont::GetHumanReadableSize(
+                 static_cast<vtkm::UInt64>(this->OldStackSize)));
+    cudaDeviceSetLimit(cudaLimitStackSize, this->OldStackSize);
+  }
+
+  // Disable copy
+  ScopedCudaStackSize(const ScopedCudaStackSize&) = delete;
+  ScopedCudaStackSize& operator=(const ScopedCudaStackSize&) = delete;
+
+private:
+  std::size_t OldStackSize;
+};
+
 /// \brief Represents how to schedule 1D, 2D, and 3D Cuda kernels
 ///
 /// \c ScheduleParameters represents how VTK-m should schedule different
--- a/vtkm/worklet/particleadvection/ParticleAdvectionWorklets.h
+++ b/vtkm/worklet/particleadvection/ParticleAdvectionWorklets.h
@ -124,6 +124,12 @@ public:
    vtkm::cont::ArrayHandleIndex idxArray(numSeeds);
    ParticleType particles(seedArray, stepsTaken, statusArray, timeArray, maxSteps);

+#ifdef VTKM_CUDA
+    // This worklet needs some extra space on CUDA.
+    vtkm::cont::cuda::ScopedCudaStackSize stack(16 * 1024);
+    (void)stack;
+#endif // VTKM_CUDA
+
    //Invoke particle advection worklet
    ParticleWorkletDispatchType particleWorkletDispatch;
    particleWorkletDispatch.Invoke(idxArray, integrator, particles);
@ -186,6 +192,12 @@ private:
      vtkm::worklet::particleadvection::ParticleAdvectWorklet>;
    using StreamlineType = vtkm::worklet::particleadvection::StateRecordingParticles;

+#ifdef VTKM_CUDA
+    // This worklet needs some extra space on CUDA.
+    vtkm::cont::cuda::ScopedCudaStackSize stack(4 * 1024);
+    (void)stack;
+#endif // VTKM_CUDA
+
    vtkm::cont::ArrayHandle<vtkm::Id> initialStepsTaken;
    vtkm::cont::ArrayCopy(stepsTaken, initialStepsTaken);

--- a/vtkm/worklet/spatialstructure/KdTree3DNNSearch.h
+++ b/vtkm/worklet/spatialstructure/KdTree3DNNSearch.h
@ -194,7 +194,7 @@ public:
           const vtkm::cont::ArrayHandle<vtkm::Vec<CoordType, 3>, CoordStorageTag2>& qc_Handle,
           vtkm::cont::ArrayHandle<vtkm::Id>& nnId_Handle,
           vtkm::cont::ArrayHandle<CoordType>& nnDis_Handle,
-           DeviceAdapter id)
+           DeviceAdapter)
  {
    //fill the nnDis_Handle handle array with max values before running
    auto intialValue = std::numeric_limits<CoordType>::max();
@ -204,28 +204,14 @@ public:

 //set up stack size for cuda environment
 #ifdef VTKM_CUDA
-    std::size_t stackSizeBackup(0);
-    (void)stackSizeBackup;
-    if (id.GetValue() == VTKM_DEVICE_ADAPTER_CUDA)
-    {
-      cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
-      cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 16);
-    }
-#else
-    (void)id;
+    vtkm::cont::cuda::ScopedCudaStackSize stack(16 * 1024);
+    (void)stack;
 #endif

    NearestNeighborSearch3DWorklet nns3dWorklet;
    vtkm::worklet::DispatcherMapField<NearestNeighborSearch3DWorklet> nns3DDispatcher(nns3dWorklet);
    nns3DDispatcher.Invoke(
      qc_Handle, pointId_Handle, splitId_Handle, coordi_Handle, nnId_Handle, nnDis_Handle);
-
-#ifdef VTKM_CUDA
-    if (id.GetValue() == VTKM_DEVICE_ADAPTER_CUDA)
-    {
-      cudaDeviceSetLimit(cudaLimitStackSize, stackSizeBackup);
-    }
-#endif
  }
 };
 }