mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-19 18:45:43 +00:00
Merge topic 'handle_busy_cuda_device_better'
d0a70946b Simplify the DeviceAdapterRuntimeDetectorCuda to not do a kernel launch. Acked-by: Kitware Robot <kwrobot@kitware.com> Merge-request: !1533
This commit is contained in:
commit
9a94c8c59d
17
docs/changelog/vtkm-handles-busy-cuda-devices-better.md
Normal file
17
docs/changelog/vtkm-handles-busy-cuda-devices-better.md
Normal file
@ -0,0 +1,17 @@
|
||||
# VTK-m CUDA detection properly handles busy devices
|
||||
|
||||
When an application that uses VTK-m is first launched it will
|
||||
do a check to see if CUDA is supported at runtime. If for
|
||||
some reason that CUDA card is not allowing kernel execution
|
||||
VTK-m would report the hardware doesn't have CUDA support.
|
||||
|
||||
This was problematic as was over aggressive in disabling CUDA
|
||||
support for hardware that could support kernel execution in
|
||||
the future. With the fact that every VTK-m worklet is executed
|
||||
through a TryExecute it is no longer necessary to be so
|
||||
aggressive in disabling CUDA support.
|
||||
|
||||
Now the behavior is that VTK-m considers a machine to have
|
||||
CUDA runtime support if it has 1+ GPU's of Kepler or
|
||||
higher hardware (SM_30+).
|
||||
|
@ -27,24 +27,6 @@
|
||||
#include <vtkm/Math.h>
|
||||
#include <vtkm/cont/cuda/ErrorCuda.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace cuda
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
static __global__ void DetermineIfValidCudaDevice()
|
||||
{
|
||||
//used only to see if we can launch kernels. It is possible to have a
|
||||
//CUDA capable device, but still fail to have CUDA support.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
namespace
|
||||
{
|
||||
static std::once_flag deviceQueryFlag;
|
||||
@ -64,28 +46,11 @@ void queryNumberOfDevicesandHighestArchSupported(vtkm::Int32& nod, vtkm::Int32&
|
||||
for (vtkm::Int32 i = 0; i < numDevices; i++)
|
||||
{
|
||||
cudaDeviceProp prop;
|
||||
VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
|
||||
const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
|
||||
archVersion = vtkm::Max(arch, archVersion);
|
||||
}
|
||||
|
||||
//Make sure we can actually launch a kernel. This could fail for any
|
||||
//of the following reasons:
|
||||
//
|
||||
// 1. cudaErrorInsufficientDriver, caused by out of data drives
|
||||
// 2. cudaErrorDevicesUnavailable, caused by another process locking the
|
||||
// device or somebody disabling cuda support on the device
|
||||
// 3. cudaErrorNoKernelImageForDevice we built for a compute version
|
||||
// greater than the device we are running on
|
||||
// Most likely others that I don't even know about
|
||||
if (numDevices > 0)
|
||||
{
|
||||
vtkm::cont::cuda::internal::DetermineIfValidCudaDevice<<<1, 1, 0, cudaStreamPerThread>>>();
|
||||
cudaStreamSynchronize(cudaStreamPerThread);
|
||||
if (cudaSuccess != cudaGetLastError())
|
||||
res = cudaGetDeviceProperties(&prop, i);
|
||||
if (res == cudaSuccess)
|
||||
{
|
||||
numDevices = 0;
|
||||
archVersion = 0;
|
||||
const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
|
||||
archVersion = vtkm::Max(arch, archVersion);
|
||||
}
|
||||
}
|
||||
});
|
||||
@ -112,7 +77,7 @@ DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::DeviceAdapterRun
|
||||
|
||||
bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::Exists() const
|
||||
{
|
||||
return this->NumberOfDevices > 0 && this->HighestArchSupported >= 20;
|
||||
return this->NumberOfDevices > 0 && this->HighestArchSupported >= 30;
|
||||
}
|
||||
}
|
||||
} // namespace vtkm::cont
|
||||
|
Loading…
Reference in New Issue
Block a user