vtk-m2/vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.cxx
Haocheng LIU ce9cd8072a Use std::call_once to construct singeltons
By using `call_once` from C++11, we can simplify the logic in code
where we are querying same value variables from multiple threads.
2018-08-06 16:36:03 -04:00

119 lines
3.3 KiB
C++

//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/cuda/internal/DeviceAdapterRuntimeDetectorCuda.h>
#ifdef VTKM_CUDA
#include <mutex>
#include <cuda.h>
#include <vtkm/Math.h>
#include <vtkm/cont/cuda/ErrorCuda.h>
namespace vtkm
{
namespace cont
{
namespace cuda
{
namespace internal
{
static __global__ void DetermineIfValidCudaDevice()
{
//used only to see if we can launch kernels. It is possible to have a
//CUDA capable device, but still fail to have CUDA support.
}
}
}
}
}
namespace
{
static std::once_flag deviceQueryFlag;
static int numDevices = 0;
static int archVersion = 0;
void queryNumberOfDevicesandHighestArchSupported(vtkm::Int32& nod, vtkm::Int32& has)
{
std::call_once(deviceQueryFlag, []() {
//first query for the number of devices
auto res = cudaGetDeviceCount(&numDevices);
if (res != cudaSuccess)
{
numDevices = 0;
}
for (vtkm::Int32 i = 0; i < numDevices; i++)
{
cudaDeviceProp prop;
VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
const vtkm::Int32 arch = (prop.major * 10) + prop.minor;
archVersion = vtkm::Max(arch, archVersion);
}
//Make sure we can actually launch a kernel. This could fail for any
//of the following reasons:
//
// 1. cudaErrorInsufficientDriver, caused by out of data drives
// 2. cudaErrorDevicesUnavailable, caused by another process locking the
// device or somebody disabling cuda support on the device
// 3. cudaErrorNoKernelImageForDevice we built for a compute version
// greater than the device we are running on
// Most likely others that I don't even know about
if (numDevices > 0)
{
vtkm::cont::cuda::internal::DetermineIfValidCudaDevice<<<1, 1, 0, cudaStreamPerThread>>>();
cudaStreamSynchronize(cudaStreamPerThread);
if (cudaSuccess != cudaGetLastError())
{
numDevices = 0;
archVersion = 0;
}
}
});
nod = numDevices;
has = archVersion;
}
} // anonymous namspace
#endif
namespace vtkm
{
namespace cont
{
DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::DeviceAdapterRuntimeDetector()
: NumberOfDevices(0)
, HighestArchSupported(0)
{
#ifdef VTKM_CUDA
queryNumberOfDevicesandHighestArchSupported(this->NumberOfDevices, this->HighestArchSupported);
#endif
}
bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagCuda>::Exists() const
{
return this->NumberOfDevices > 0 && this->HighestArchSupported >= 20;
}
}
} // namespace vtkm::cont