mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-08 13:23:51 +00:00
remove cudaGetDevice calls, favor runtime device config
This commit is contained in:
parent
adac415f15
commit
9730de8074
@ -239,8 +239,10 @@ void CudaAllocator::PrepareForInput(const void* ptr, std::size_t numBytes)
|
||||
if (IsManagedPointer(ptr) && numBytes >= Threshold)
|
||||
{
|
||||
#if CUDART_VERSION >= 8000
|
||||
int dev;
|
||||
VTKM_CUDA_CALL(cudaGetDevice(&dev));
|
||||
vtkm::Id dev;
|
||||
vtkm::cont::RuntimeDeviceInformation()
|
||||
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda())
|
||||
.GetDeviceInstance(dev);
|
||||
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
|
||||
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetReadMostly, dev));
|
||||
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
|
||||
@ -254,8 +256,10 @@ void CudaAllocator::PrepareForOutput(const void* ptr, std::size_t numBytes)
|
||||
if (IsManagedPointer(ptr) && numBytes >= Threshold)
|
||||
{
|
||||
#if CUDART_VERSION >= 8000
|
||||
int dev;
|
||||
VTKM_CUDA_CALL(cudaGetDevice(&dev));
|
||||
vtkm::Id dev;
|
||||
vtkm::cont::RuntimeDeviceInformation()
|
||||
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda())
|
||||
.GetDeviceInstance(dev);
|
||||
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
|
||||
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev));
|
||||
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
|
||||
|
@ -10,6 +10,10 @@
|
||||
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h>
|
||||
|
||||
#include <vtkm/cont/RuntimeDeviceInformation.h>
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/RuntimeDeviceConfigurationCuda.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <functional>
|
||||
@ -133,14 +137,14 @@ VTKM_CONT_EXPORT void SetupKernelSchedulingParameters()
|
||||
|
||||
std::call_once(lookupBuiltFlag, []() {
|
||||
ScheduleParameterBuilder builder;
|
||||
//iterate over all devices
|
||||
int count = 0;
|
||||
VTKM_CUDA_CALL(cudaGetDeviceCount(&count));
|
||||
for (int deviceId = 0; deviceId < count; ++deviceId)
|
||||
auto cudaDeviceConfig = dynamic_cast<
|
||||
vtkm::cont::internal::RuntimeDeviceConfiguration<vtkm::cont::DeviceAdapterTagCuda>&>(
|
||||
vtkm::cont::RuntimeDeviceInformation{}.GetRuntimeConfiguration(
|
||||
vtkm::cont::DeviceAdapterTagCuda()));
|
||||
std::vector<cudaDeviceProp> cudaDevices;
|
||||
cudaDeviceConfig.GetCudaDeviceProp(cudaDevices);
|
||||
for (const auto& deviceProp : cudaDevices)
|
||||
{
|
||||
cudaDeviceProp deviceProp;
|
||||
cudaGetDeviceProperties(&deviceProp, deviceId);
|
||||
|
||||
ScheduleParameters params = builder.Compute(deviceProp.name,
|
||||
deviceProp.major,
|
||||
deviceProp.minor,
|
||||
@ -204,8 +208,10 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
|
||||
(void)size;
|
||||
vtkm::cont::cuda::internal::SetupKernelSchedulingParameters();
|
||||
|
||||
int deviceId;
|
||||
VTKM_CUDA_CALL(cudaGetDevice(&deviceId)); //get deviceid from cuda
|
||||
vtkm::Id deviceId;
|
||||
vtkm::cont::RuntimeDeviceInformation()
|
||||
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda())
|
||||
.GetDeviceInstance(deviceId);
|
||||
const auto& params = cuda::internal::scheduling_1d_parameters[static_cast<size_t>(deviceId)];
|
||||
blocks = static_cast<vtkm::UInt32>(params.first);
|
||||
threadsPerBlock = static_cast<vtkm::UInt32>(params.second);
|
||||
@ -218,8 +224,10 @@ void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetBlocksAndThrea
|
||||
{
|
||||
vtkm::cont::cuda::internal::SetupKernelSchedulingParameters();
|
||||
|
||||
int deviceId;
|
||||
VTKM_CUDA_CALL(cudaGetDevice(&deviceId)); //get deviceid from cuda
|
||||
vtkm::Id deviceId;
|
||||
vtkm::cont::RuntimeDeviceInformation()
|
||||
.GetRuntimeConfiguration(vtkm::cont::DeviceAdapterTagCuda())
|
||||
.GetDeviceInstance(deviceId);
|
||||
if (size.z <= 1)
|
||||
{ //2d images
|
||||
const auto& params = cuda::internal::scheduling_2d_parameters[static_cast<size_t>(deviceId)];
|
||||
|
@ -25,6 +25,11 @@ static int archVersion = 0;
|
||||
|
||||
void queryNumberOfDevicesandHighestArchSupported(vtkm::Int32& nod, vtkm::Int32& has)
|
||||
{
|
||||
// We currently cannot use RuntimeDeviceInformation{}.GetRuntimeConfiguration(
|
||||
// vtkm::cont::DeviceAdapterTagCuda()) in this function due to constraints in
|
||||
// initialize that query device Existence before we initialize the Runtime
|
||||
// Configuration. Once those constraints are removed/fixed this file can be
|
||||
// updated to use that call instead of directly querying the cuda device
|
||||
std::call_once(deviceQueryFlag, []() {
|
||||
//first query for the number of devices
|
||||
auto res = cudaGetDeviceCount(&numDevices);
|
||||
|
@ -62,17 +62,8 @@ public:
|
||||
<< value << " >= " << this->CudaDeviceCount);
|
||||
return RuntimeDeviceConfigReturnCode::INVALID_VALUE;
|
||||
}
|
||||
try
|
||||
{
|
||||
VTKM_CUDA_CALL(cudaSetDevice(value));
|
||||
return RuntimeDeviceConfigReturnCode::SUCCESS;
|
||||
}
|
||||
catch (const vtkm::cont::cuda::ErrorCuda& err)
|
||||
{
|
||||
VTKM_LOG_S(vtkm::cont::LogLevel::Error,
|
||||
"Failed to set CudaDeviceInstance: " << err.GetMessage());
|
||||
return RuntimeDeviceConfigReturnCode::INTERNAL_ERROR;
|
||||
}
|
||||
VTKM_CUDA_CALL(cudaSetDevice(value));
|
||||
return RuntimeDeviceConfigReturnCode::SUCCESS;
|
||||
}
|
||||
|
||||
VTKM_CONT virtual RuntimeDeviceConfigReturnCode GetDeviceInstance(
|
||||
|
@ -24,14 +24,13 @@ namespace cont
|
||||
namespace internal
|
||||
{
|
||||
|
||||
enum class RuntimeDeviceConfigReturnCode : vtkm::Id
|
||||
enum class RuntimeDeviceConfigReturnCode
|
||||
{
|
||||
SUCCESS,
|
||||
OUT_OF_BOUNDS,
|
||||
INVALID_FOR_DEVICE,
|
||||
INVALID_VALUE,
|
||||
NOT_APPLIED,
|
||||
INTERNAL_ERROR
|
||||
NOT_APPLIED
|
||||
};
|
||||
|
||||
class VTKM_CONT_EXPORT RuntimeDeviceConfigurationBase
|
||||
|
Loading…
Reference in New Issue
Block a user