fixes for cuda stack

2019-03-13 16:41:08 -04:00 · 2019-03-13 16:41:08 -04:00 · b2d5d66de5
commit b2d5d66de5
parent cfd36bcd5b
3 changed files with 25 additions and 43 deletions
--- a/vtkm/cont/BoundingIntervalHierarchy.h
+++ b/vtkm/cont/BoundingIntervalHierarchy.h
@ -83,11 +83,6 @@ private:
                                                       const IdArrayHandle&,
                                                       DeviceAdapter);
  template <typename DeviceAdapter>
  VTKM_CONT void Init()
  {
  }
 public:
  VTKM_CONT
  BoundingIntervalHierarchy(vtkm::IdComponent numPlanes = 4, vtkm::IdComponent maxLeafSize = 5)
@ -96,11 +91,27 @@ public:
    , Nodes()
    , ProcessedCellIds()
  {
-    //    Init<DeviceAdapter>();
+#ifdef VTKM_CUDA
    CudaStackSizeBackup = 0;
    cudaDeviceGetLimit(&CudaStackSizeBackup, cudaLimitStackSize);
 //std::cout<<"Initial stack size: "<<CudaStackSizeBackup<<std::endl;
 //    std::cout<<"Increase stack size"<<std::endl;
 //    cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 64);
 #endif
  }
  VTKM_CONT
-  ~BoundingIntervalHierarchy() {}
+  ~BoundingIntervalHierarchy()
  {
 #ifdef VTKM_CUDA
    if (CudaStackSizeBackup > 0)
    {
      //std::cout<<"DE-Increase stack size "<<CudaStackSizeBackup<<std::endl;
      cudaDeviceSetLimit(cudaLimitStackSize, CudaStackSizeBackup);
      CudaStackSizeBackup = 0;
    }
 #endif
  }
  VTKM_CONT
  void SetNumberOfSplittingPlanes(vtkm::IdComponent numPlanes)
@ -136,6 +147,11 @@ private:
  vtkm::cont::ArrayHandle<BoundingIntervalHierarchyNode> Nodes;
  IdArrayHandle ProcessedCellIds;
  mutable HandleType ExecHandle;
 #ifdef VTKM_CUDA
  std::size_t CudaStackSizeBackup;
 #endif
 };
 } // namespace cont
--- a/vtkm/cont/BoundingIntervalHierarchy.hxx
+++ b/vtkm/cont/BoundingIntervalHierarchy.hxx
@ -480,18 +480,6 @@ public:
                            const vtkm::cont::BoundingIntervalHierarchy& bih,
                            HandleType& bihExec) const
  {
 #if 0
    std::cout<<"BIH:"<<std::endl;
    vtkm::Id N = bih.Nodes.GetNumberOfValues();
    auto portal = bih.Nodes.GetPortalConstControl();
    for (vtkm::Id i = 0; i < N; i++)
    {
      auto n = portal.Get(i);
      //printf("%d: {%d %d %f %f}\n", i, n.ChildIndex, n.Dimension, n.Node.LMax0,0);
      std::cout<<i<<": {"<<n.ChildIndex<<" "<<n.Dimension<<" "<<n.Node.LMax<<" "<<n.Node.RMin<<"}"<<std::endl;
    }
 #endif
    vtkm::cont::DynamicCellSet cellSet = bih.GetCellSet();
    if (cellSet.IsType<vtkm::cont::CellSetExplicit<>>())
    {
@ -556,20 +544,8 @@ VTKM_CONT
 const HandleType BoundingIntervalHierarchy::PrepareForExecutionImpl(
  const vtkm::cont::DeviceAdapterId deviceId) const
 {
-#if 0
+  //std::cout<<"Increase stack size"<<std::endl;
-  //set up stack size for cuda environment
+  cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 64);
 #ifdef VTKM_CUDA
  std::size_t stackSizeBackup(0);
  (void)stackSizeBackup;
  if (deviceId.GetValue() == VTKM_DEVICE_ADAPTER_CUDA)
  {
    cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
    cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 16);
  }
 #else
  (void)deviceId;
 #endif
 #endif
  const bool success =
    vtkm::cont::TryExecuteOnDevice(deviceId, PrepareForExecutionFunctor(), *this, this->ExecHandle);
--- a/vtkm/worklet/testing/UnitTestParticleAdvection.cxx
+++ b/vtkm/worklet/testing/UnitTestParticleAdvection.cxx
@ -552,18 +552,8 @@ void TestParticleWorklets()
 void TestParticleAdvection()
 {
 #ifdef VTKM_CUDA
  size_t stackSizeBackup(0);
  cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
  cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 50);
 #endif
  TestEvaluators();
  TestParticleWorklets();
 #ifdef VTKM_CUDA
  cudaDeviceSetLimit(cudaLimitStackSize, stackSizeBackup);
 #endif
 }
 int UnitTestParticleAdvection(int argc, char* argv[])