Add Sync/Async memory allocator functions.

This commit is contained in:
Dave Pugmire 2022-08-10 15:09:53 -04:00
parent 7fdea58ef6
commit b441c9e23d
2 changed files with 57 additions and 8 deletions

@ -47,6 +47,9 @@ static bool ManagedMemoryEnabled = false;
// True if concurrent pagable managed memory is supported by the machines hardware.
static bool HardwareSupportsManagedMemory = false;
// True if using syncronous memory allocator. Managed memory must be off to use this.
static bool UseSyncMemoryAlloc = true;
// Avoid overhead of cudaMemAdvise and cudaMemPrefetchAsync for small buffers.
// This value should be > 0 or else these functions will error out.
static std::size_t Threshold = 1 << 20;
@ -88,6 +91,13 @@ void CudaAllocator::ForceManagedMemoryOn()
{
ManagedMemoryEnabled = true;
VTKM_LOG_F(vtkm::cont::LogLevel::Info, "CudaAllocator enabling managed memory");
if (!UseSyncMemoryAlloc)
{
CudaAllocator::ForceSyncMemoryAllocator();
VTKM_LOG_F(vtkm::cont::LogLevel::Warn,
"CudaAllocator turning off asynchronous allocator to support managed memory");
}
}
else
{
@ -96,6 +106,34 @@ void CudaAllocator::ForceManagedMemoryOn()
}
}
bool CudaAllocator::UsingSyncMemoryAllocator()
{
return UseSyncMemoryAlloc;
}
void CudaAllocator::ForceSyncMemoryAllocator()
{
UseSyncMemoryAlloc = true;
VTKM_LOG_F(vtkm::cont::LogLevel::Warn, "CudaAllocator using synchronous memory allocator.");
}
void CudaAllocator::ForceAsyncMemoryAllocator()
{
#if CUDART_VERSION >= 11030
if (ManagedMemoryEnabled)
{
CudaAllocator::ForceManagedMemoryOff();
VTKM_LOG_F(vtkm::cont::LogLevel::Warn,
"CudaAllocator turning off managed memory for asynchronous memory allocation");
}
UseSyncMemoryAlloc = false;
VTKM_LOG_F(vtkm::cont::LogLevel::Warn, "CudaAllocator using asynchronous memory allocator.");
#else
VTKM_LOG_F(vtkm::cont::LogLevel::Warn,
"Asynchronous memory allocator not supported for cuda version < 11.3");
#endif
}
bool CudaAllocator::IsDevicePointer(const void* ptr)
{
CudaAllocator::Initialize();
@ -158,11 +196,16 @@ void* CudaAllocator::Allocate(std::size_t numBytes)
}
else
{
if (UseSyncMemoryAlloc)
{
VTKM_CUDA_CALL(cudaMalloc(&ptr, numBytes));
}
else
{
#if CUDART_VERSION >= 11030
VTKM_CUDA_CALL(cudaMallocAsync(&ptr, numBytes, cudaStreamPerThread));
#else
VTKM_CUDA_CALL(cudaMalloc(&ptr, numBytes));
VTKM_CUDA_CALL(cudaMallocAsync(&ptr, numBytes, cudaStreamPerThread));
#endif
}
}
{
@ -200,18 +243,16 @@ void CudaAllocator::Free(void* ptr)
VTKM_LOG_F(vtkm::cont::LogLevel::MemExec, "Freeing CUDA allocation at %p.", ptr);
#if CUDART_VERSION >= 11030
if (ManagedMemoryEnabled)
if (UseSyncMemoryAlloc)
{
VTKM_CUDA_CALL(cudaFree(ptr));
}
else
{
#if CUDART_VERSION >= 11030
VTKM_CUDA_CALL(cudaFreeAsync(ptr, cudaStreamPerThread));
}
#else
VTKM_CUDA_CALL(cudaFree(ptr));
#endif
}
}
void CudaAllocator::FreeDeferred(void* ptr, std::size_t numBytes)

@ -40,6 +40,14 @@ struct VTKM_CONT_EXPORT CudaAllocator
/// VTK-m will ignore the request and continue to use unmanaged memory (aka cudaMalloc).
static VTKM_CONT void ForceManagedMemoryOn();
static VTKM_CONT bool UsingSyncMemoryAllocator();
static VTKM_CONT bool UsingAsyncMemoryAllocator()
{
return !CudaAllocator::UsingSyncMemoryAllocator();
}
static VTKM_CONT void ForceSyncMemoryAllocator();
static VTKM_CONT void ForceAsyncMemoryAllocator();
/// Returns true if the pointer is accessible from a CUDA device.
static VTKM_CONT bool IsDevicePointer(const void* ptr);