Merge topic 'fix_particle_advection_cuda_stack'

b9affb7ed Disable copy for RAII helper.
ea0bbfeef Increase CUDA stack size for ParticleAdvection worklets.

Acked-by: Kitware Robot <kwrobot@kitware.com>
Acked-by: Robert Maynard <robert.maynard@kitware.com>
Merge-request: !1832
This commit is contained in:
Allison Vacanti 2019-09-09 23:38:32 +00:00 committed by Kitware Robot
commit 9e7da35c8e
3 changed files with 47 additions and 17 deletions

@ -67,6 +67,38 @@ namespace cont
{
namespace cuda
{
/// \brief RAII helper for temporarily changing CUDA stack size in an
/// exception-safe way.
struct ScopedCudaStackSize
{
ScopedCudaStackSize(std::size_t newStackSize)
{
cudaDeviceGetLimit(&this->OldStackSize, cudaLimitStackSize);
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
"Temporarily changing Cuda stack size from "
<< vtkm::cont::GetHumanReadableSize(static_cast<vtkm::UInt64>(this->OldStackSize))
<< " to "
<< vtkm::cont::GetHumanReadableSize(static_cast<vtkm::UInt64>(newStackSize)));
cudaDeviceSetLimit(cudaLimitStackSize, newStackSize);
}
~ScopedCudaStackSize()
{
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
"Restoring Cuda stack size to " << vtkm::cont::GetHumanReadableSize(
static_cast<vtkm::UInt64>(this->OldStackSize)));
cudaDeviceSetLimit(cudaLimitStackSize, this->OldStackSize);
}
// Disable copy
ScopedCudaStackSize(const ScopedCudaStackSize&) = delete;
ScopedCudaStackSize& operator=(const ScopedCudaStackSize&) = delete;
private:
std::size_t OldStackSize;
};
/// \brief Represents how to schedule 1D, 2D, and 3D Cuda kernels
///
/// \c ScheduleParameters represents how VTK-m should schedule different

@ -124,6 +124,12 @@ public:
vtkm::cont::ArrayHandleIndex idxArray(numSeeds);
ParticleType particles(seedArray, stepsTaken, statusArray, timeArray, maxSteps);
#ifdef VTKM_CUDA
// This worklet needs some extra space on CUDA.
vtkm::cont::cuda::ScopedCudaStackSize stack(16 * 1024);
(void)stack;
#endif // VTKM_CUDA
//Invoke particle advection worklet
ParticleWorkletDispatchType particleWorkletDispatch;
particleWorkletDispatch.Invoke(idxArray, integrator, particles);
@ -186,6 +192,12 @@ private:
vtkm::worklet::particleadvection::ParticleAdvectWorklet>;
using StreamlineType = vtkm::worklet::particleadvection::StateRecordingParticles;
#ifdef VTKM_CUDA
// This worklet needs some extra space on CUDA.
vtkm::cont::cuda::ScopedCudaStackSize stack(4 * 1024);
(void)stack;
#endif // VTKM_CUDA
vtkm::cont::ArrayHandle<vtkm::Id> initialStepsTaken;
vtkm::cont::ArrayCopy(stepsTaken, initialStepsTaken);

@ -194,7 +194,7 @@ public:
const vtkm::cont::ArrayHandle<vtkm::Vec<CoordType, 3>, CoordStorageTag2>& qc_Handle,
vtkm::cont::ArrayHandle<vtkm::Id>& nnId_Handle,
vtkm::cont::ArrayHandle<CoordType>& nnDis_Handle,
DeviceAdapter id)
DeviceAdapter)
{
//fill the nnDis_Handle handle array with max values before running
auto intialValue = std::numeric_limits<CoordType>::max();
@ -204,28 +204,14 @@ public:
//set up stack size for cuda environment
#ifdef VTKM_CUDA
std::size_t stackSizeBackup(0);
(void)stackSizeBackup;
if (id.GetValue() == VTKM_DEVICE_ADAPTER_CUDA)
{
cudaDeviceGetLimit(&stackSizeBackup, cudaLimitStackSize);
cudaDeviceSetLimit(cudaLimitStackSize, 1024 * 16);
}
#else
(void)id;
vtkm::cont::cuda::ScopedCudaStackSize stack(16 * 1024);
(void)stack;
#endif
NearestNeighborSearch3DWorklet nns3dWorklet;
vtkm::worklet::DispatcherMapField<NearestNeighborSearch3DWorklet> nns3DDispatcher(nns3dWorklet);
nns3DDispatcher.Invoke(
qc_Handle, pointId_Handle, splitId_Handle, coordi_Handle, nnId_Handle, nnDis_Handle);
#ifdef VTKM_CUDA
if (id.GetValue() == VTKM_DEVICE_ADAPTER_CUDA)
{
cudaDeviceSetLimit(cudaLimitStackSize, stackSizeBackup);
}
#endif
}
};
}