fixes for cuda stack

This commit is contained in:
Dave Pugmire 2019-03-13 16:41:08 -04:00
parent cfd36bcd5b
commit b2d5d66de5
3 changed files with 25 additions and 43 deletions

@ -83,11 +83,6 @@ private:
const IdArrayHandle&,
DeviceAdapter);
template <typename DeviceAdapter>
VTKM_CONT void Init()
{
}
public:
VTKM_CONT
BoundingIntervalHierarchy(vtkm::IdComponent numPlanes = 4, vtkm::IdComponent maxLeafSize = 5)
@ -96,11 +91,27 @@ public:
, Nodes()
, ProcessedCellIds()
{
// Init<DeviceAdapter>();
#ifdef VTKM_CUDA
CudaStackSizeBackup = 0;
cudaDeviceGetLimit(&CudaStackSizeBackup, cudaLimitStackSize);
//std::cout<<"Initial stack size: "<<CudaStackSizeBackup<<std::endl;
// std::cout<<"Increase stack size"<<std::endl;
// cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 64);
#endif
}
VTKM_CONT
~BoundingIntervalHierarchy() {}
~BoundingIntervalHierarchy()
{
#ifdef VTKM_CUDA
if (CudaStackSizeBackup > 0)
{
//std::cout<<"DE-Increase stack size "<<CudaStackSizeBackup<<std::endl;
cudaDeviceSetLimit(cudaLimitStackSize, CudaStackSizeBackup);
CudaStackSizeBackup = 0;
}
#endif
}
VTKM_CONT
void SetNumberOfSplittingPlanes(vtkm::IdComponent numPlanes)
@ -136,6 +147,11 @@ private:
vtkm::cont::ArrayHandle<BoundingIntervalHierarchyNode> Nodes;
IdArrayHandle ProcessedCellIds;
mutable HandleType ExecHandle;
#ifdef VTKM_CUDA
std::size_t CudaStackSizeBackup;
#endif
};
} // namespace cont

@ -480,18 +480,6 @@ public:
const vtkm::cont::BoundingIntervalHierarchy& bih,
HandleType& bihExec) const
{
#if 0
std::cout<<"BIH:"<<std::endl;
vtkm::Id N = bih.Nodes.GetNumberOfValues();
auto portal = bih.Nodes.GetPortalConstControl();
for (vtkm::Id i = 0; i < N; i++)
{
auto n = portal.Get(i);
//printf("%d: {%d %d %f %f}\n", i, n.ChildIndex, n.Dimension, n.Node.LMax0,0);
std::cout<<i<<": {"<<n.ChildIndex<<" "<<n.Dimension<<" "<<n.Node.LMax<<" "<<n.Node.RMin<<"}"<<std::endl;
}
#endif
vtkm::cont::DynamicCellSet cellSet = bih.GetCellSet();
if (cellSet.IsType<vtkm::cont::CellSetExplicit<>>())
{
@ -556,20 +544,8 @@ VTKM_CONT
const HandleType BoundingIntervalHierarchy::PrepareForExecutionImpl(
const vtkm::cont::DeviceAdapterId deviceId) const
{
#if 0
//set up stack size for cuda environment
#ifdef VTKM_CUDA
std::size_t stackSizeBackup(0);
(void)stackSizeBackup;
if (deviceId.GetValue() == VTKM_DEVICE_ADAPTER_CUDA)
{
cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 16);
}
#else
(void)deviceId;
#endif
#endif
//std::cout<<"Increase stack size"<<std::endl;
cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 64);
const bool success =
vtkm::cont::TryExecuteOnDevice(deviceId, PrepareForExecutionFunctor(), *this, this->ExecHandle);

@ -552,18 +552,8 @@ void TestParticleWorklets()
void TestParticleAdvection()
{
#ifdef VTKM_CUDA
size_t stackSizeBackup(0);
cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 50);
#endif
TestEvaluators();
TestParticleWorklets();
#ifdef VTKM_CUDA
cudaDeviceSetLimit(cudaLimitStackSize, stackSizeBackup);
#endif
}
int UnitTestParticleAdvection(int argc, char* argv[])