fixes for cuda stack

2019-03-13 16:41:08 -04:00 · 2019-03-13 16:41:08 -04:00 · b2d5d66de5
commit b2d5d66de5
parent cfd36bcd5b
3 changed files with 25 additions and 43 deletions
--- a/vtkm/cont/BoundingIntervalHierarchy.h
+++ b/vtkm/cont/BoundingIntervalHierarchy.h
@ -83,11 +83,6 @@ private:
                                                       const IdArrayHandle&,
                                                       DeviceAdapter);

-  template <typename DeviceAdapter>
-  VTKM_CONT void Init()
-  {
-  }
-
 public:
  VTKM_CONT
  BoundingIntervalHierarchy(vtkm::IdComponent numPlanes = 4, vtkm::IdComponent maxLeafSize = 5)
@ -96,11 +91,27 @@ public:
    , Nodes()
    , ProcessedCellIds()
  {
-    //    Init<DeviceAdapter>();
+#ifdef VTKM_CUDA
+    CudaStackSizeBackup = 0;
+    cudaDeviceGetLimit(&CudaStackSizeBackup, cudaLimitStackSize);
+//std::cout<<"Initial stack size: "<<CudaStackSizeBackup<<std::endl;
+//    std::cout<<"Increase stack size"<<std::endl;
+//    cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 64);
+#endif
  }

  VTKM_CONT
-  ~BoundingIntervalHierarchy() {}
+  ~BoundingIntervalHierarchy()
+  {
+#ifdef VTKM_CUDA
+    if (CudaStackSizeBackup > 0)
+    {
+      //std::cout<<"DE-Increase stack size "<<CudaStackSizeBackup<<std::endl;
+      cudaDeviceSetLimit(cudaLimitStackSize, CudaStackSizeBackup);
+      CudaStackSizeBackup = 0;
+    }
+#endif
+  }

  VTKM_CONT
  void SetNumberOfSplittingPlanes(vtkm::IdComponent numPlanes)
@ -136,6 +147,11 @@ private:
  vtkm::cont::ArrayHandle<BoundingIntervalHierarchyNode> Nodes;
  IdArrayHandle ProcessedCellIds;
  mutable HandleType ExecHandle;
+
+
+#ifdef VTKM_CUDA
+  std::size_t CudaStackSizeBackup;
+#endif
 };

 } // namespace cont
--- a/vtkm/cont/BoundingIntervalHierarchy.hxx
+++ b/vtkm/cont/BoundingIntervalHierarchy.hxx
@ -480,18 +480,6 @@ public:
                            const vtkm::cont::BoundingIntervalHierarchy& bih,
                            HandleType& bihExec) const
  {
-#if 0
-    std::cout<<"BIH:"<<std::endl;
-    vtkm::Id N = bih.Nodes.GetNumberOfValues();
-    auto portal = bih.Nodes.GetPortalConstControl();
-    for (vtkm::Id i = 0; i < N; i++)
-    {
-      auto n = portal.Get(i);
-      //printf("%d: {%d %d %f %f}\n", i, n.ChildIndex, n.Dimension, n.Node.LMax0,0);
-      std::cout<<i<<": {"<<n.ChildIndex<<" "<<n.Dimension<<" "<<n.Node.LMax<<" "<<n.Node.RMin<<"}"<<std::endl;
-    }
-#endif
-
    vtkm::cont::DynamicCellSet cellSet = bih.GetCellSet();
    if (cellSet.IsType<vtkm::cont::CellSetExplicit<>>())
    {
@ -556,20 +544,8 @@ VTKM_CONT
 const HandleType BoundingIntervalHierarchy::PrepareForExecutionImpl(
  const vtkm::cont::DeviceAdapterId deviceId) const
 {
-#if 0
-  //set up stack size for cuda environment
-#ifdef VTKM_CUDA
-  std::size_t stackSizeBackup(0);
-  (void)stackSizeBackup;
-  if (deviceId.GetValue() == VTKM_DEVICE_ADAPTER_CUDA)
-  {
-    cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
-    cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 16);
-  }
-#else
-  (void)deviceId;
-#endif
-#endif
+  //std::cout<<"Increase stack size"<<std::endl;
+  cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 64);

  const bool success =
    vtkm::cont::TryExecuteOnDevice(deviceId, PrepareForExecutionFunctor(), *this, this->ExecHandle);
--- a/vtkm/worklet/testing/UnitTestParticleAdvection.cxx
+++ b/vtkm/worklet/testing/UnitTestParticleAdvection.cxx
@ -552,18 +552,8 @@ void TestParticleWorklets()

 void TestParticleAdvection()
 {
-#ifdef VTKM_CUDA
-  size_t stackSizeBackup(0);
-  cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
-  cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 50);
-#endif
-
  TestEvaluators();
  TestParticleWorklets();
-
-#ifdef VTKM_CUDA
-  cudaDeviceSetLimit(cudaLimitStackSize, stackSizeBackup);
-#endif
 }

 int UnitTestParticleAdvection(int argc, char* argv[])