fixes for cuda stack

This commit is contained in:
Dave Pugmire 2019-03-13 16:41:08 -04:00
parent cfd36bcd5b
commit b2d5d66de5
3 changed files with 25 additions and 43 deletions

@ -83,11 +83,6 @@ private:
const IdArrayHandle&, const IdArrayHandle&,
DeviceAdapter); DeviceAdapter);
template <typename DeviceAdapter>
VTKM_CONT void Init()
{
}
public: public:
VTKM_CONT VTKM_CONT
BoundingIntervalHierarchy(vtkm::IdComponent numPlanes = 4, vtkm::IdComponent maxLeafSize = 5) BoundingIntervalHierarchy(vtkm::IdComponent numPlanes = 4, vtkm::IdComponent maxLeafSize = 5)
@ -96,11 +91,27 @@ public:
, Nodes() , Nodes()
, ProcessedCellIds() , ProcessedCellIds()
{ {
// Init<DeviceAdapter>(); #ifdef VTKM_CUDA
CudaStackSizeBackup = 0;
cudaDeviceGetLimit(&CudaStackSizeBackup, cudaLimitStackSize);
//std::cout<<"Initial stack size: "<<CudaStackSizeBackup<<std::endl;
// std::cout<<"Increase stack size"<<std::endl;
// cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 64);
#endif
} }
VTKM_CONT VTKM_CONT
~BoundingIntervalHierarchy() {} ~BoundingIntervalHierarchy()
{
#ifdef VTKM_CUDA
if (CudaStackSizeBackup > 0)
{
//std::cout<<"DE-Increase stack size "<<CudaStackSizeBackup<<std::endl;
cudaDeviceSetLimit(cudaLimitStackSize, CudaStackSizeBackup);
CudaStackSizeBackup = 0;
}
#endif
}
VTKM_CONT VTKM_CONT
void SetNumberOfSplittingPlanes(vtkm::IdComponent numPlanes) void SetNumberOfSplittingPlanes(vtkm::IdComponent numPlanes)
@ -136,6 +147,11 @@ private:
vtkm::cont::ArrayHandle<BoundingIntervalHierarchyNode> Nodes; vtkm::cont::ArrayHandle<BoundingIntervalHierarchyNode> Nodes;
IdArrayHandle ProcessedCellIds; IdArrayHandle ProcessedCellIds;
mutable HandleType ExecHandle; mutable HandleType ExecHandle;
#ifdef VTKM_CUDA
std::size_t CudaStackSizeBackup;
#endif
}; };
} // namespace cont } // namespace cont

@ -480,18 +480,6 @@ public:
const vtkm::cont::BoundingIntervalHierarchy& bih, const vtkm::cont::BoundingIntervalHierarchy& bih,
HandleType& bihExec) const HandleType& bihExec) const
{ {
#if 0
std::cout<<"BIH:"<<std::endl;
vtkm::Id N = bih.Nodes.GetNumberOfValues();
auto portal = bih.Nodes.GetPortalConstControl();
for (vtkm::Id i = 0; i < N; i++)
{
auto n = portal.Get(i);
//printf("%d: {%d %d %f %f}\n", i, n.ChildIndex, n.Dimension, n.Node.LMax0,0);
std::cout<<i<<": {"<<n.ChildIndex<<" "<<n.Dimension<<" "<<n.Node.LMax<<" "<<n.Node.RMin<<"}"<<std::endl;
}
#endif
vtkm::cont::DynamicCellSet cellSet = bih.GetCellSet(); vtkm::cont::DynamicCellSet cellSet = bih.GetCellSet();
if (cellSet.IsType<vtkm::cont::CellSetExplicit<>>()) if (cellSet.IsType<vtkm::cont::CellSetExplicit<>>())
{ {
@ -556,20 +544,8 @@ VTKM_CONT
const HandleType BoundingIntervalHierarchy::PrepareForExecutionImpl( const HandleType BoundingIntervalHierarchy::PrepareForExecutionImpl(
const vtkm::cont::DeviceAdapterId deviceId) const const vtkm::cont::DeviceAdapterId deviceId) const
{ {
#if 0 //std::cout<<"Increase stack size"<<std::endl;
//set up stack size for cuda environment cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 64);
#ifdef VTKM_CUDA
std::size_t stackSizeBackup(0);
(void)stackSizeBackup;
if (deviceId.GetValue() == VTKM_DEVICE_ADAPTER_CUDA)
{
cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 16);
}
#else
(void)deviceId;
#endif
#endif
const bool success = const bool success =
vtkm::cont::TryExecuteOnDevice(deviceId, PrepareForExecutionFunctor(), *this, this->ExecHandle); vtkm::cont::TryExecuteOnDevice(deviceId, PrepareForExecutionFunctor(), *this, this->ExecHandle);

@ -552,18 +552,8 @@ void TestParticleWorklets()
void TestParticleAdvection() void TestParticleAdvection()
{ {
#ifdef VTKM_CUDA
size_t stackSizeBackup(0);
cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 50);
#endif
TestEvaluators(); TestEvaluators();
TestParticleWorklets(); TestParticleWorklets();
#ifdef VTKM_CUDA
cudaDeviceSetLimit(cudaLimitStackSize, stackSizeBackup);
#endif
} }
int UnitTestParticleAdvection(int argc, char* argv[]) int UnitTestParticleAdvection(int argc, char* argv[])