2017-07-14 14:36:15 +00:00
|
|
|
//============================================================================
|
|
|
|
// Copyright (c) Kitware, Inc.
|
|
|
|
// All rights reserved.
|
|
|
|
// See LICENSE.txt for details.
|
|
|
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
|
|
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
|
|
// PURPOSE. See the above copyright notice for more information.
|
|
|
|
//
|
2017-09-20 21:33:44 +00:00
|
|
|
// Copyright 2017 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
2017-07-14 14:36:15 +00:00
|
|
|
// Copyright 2017 UT-Battelle, LLC.
|
|
|
|
// Copyright 2017 Los Alamos National Security.
|
|
|
|
//
|
2017-09-20 21:33:44 +00:00
|
|
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
2017-07-14 14:36:15 +00:00
|
|
|
// the U.S. Government retains certain rights in this software.
|
|
|
|
//
|
|
|
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
|
|
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
|
|
|
// this software.
|
|
|
|
//============================================================================
|
|
|
|
|
|
|
|
#include <vtkm/cont/cuda/ErrorCuda.h>
|
|
|
|
#include <vtkm/cont/cuda/internal/CudaAllocator.h>
|
|
|
|
|
2017-09-21 14:33:17 +00:00
|
|
|
VTKM_THIRDPARTY_PRE_INCLUDE
|
2017-07-14 14:36:15 +00:00
|
|
|
#include <cuda_runtime.h>
|
2017-09-21 14:33:17 +00:00
|
|
|
VTKM_THIRDPARTY_POST_INCLUDE
|
2017-07-14 14:36:15 +00:00
|
|
|
|
|
|
|
// These static vars are in an anon namespace to work around MSVC linker issues.
|
|
|
|
namespace
|
|
|
|
{
|
|
|
|
// Has CudaAllocator::Initialize been called?
|
|
|
|
static bool IsInitialized = false;
|
|
|
|
|
|
|
|
// True if all devices support concurrent pagable managed memory.
|
|
|
|
static bool ManagedMemorySupported = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace vtkm
|
|
|
|
{
|
|
|
|
namespace cont
|
|
|
|
{
|
|
|
|
namespace cuda
|
|
|
|
{
|
|
|
|
namespace internal
|
|
|
|
{
|
|
|
|
|
|
|
|
bool CudaAllocator::UsingManagedMemory()
|
|
|
|
{
|
|
|
|
CudaAllocator::Initialize();
|
|
|
|
return ManagedMemorySupported;
|
|
|
|
}
|
|
|
|
|
2017-08-03 16:55:31 +00:00
|
|
|
bool CudaAllocator::IsDevicePointer(const void* ptr)
|
|
|
|
{
|
2017-11-02 12:50:58 +00:00
|
|
|
CudaAllocator::Initialize();
|
2017-08-03 16:55:31 +00:00
|
|
|
if (!ptr)
|
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
cudaPointerAttributes attr;
|
|
|
|
cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
|
|
|
|
// This function will return invalid value if the pointer is unknown to the
|
|
|
|
// cuda runtime. Manually catch this value since it's not really an error.
|
|
|
|
if (err == cudaErrorInvalidValue)
|
|
|
|
{
|
|
|
|
cudaGetLastError(); // Clear the error so we don't raise it later...
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
VTKM_CUDA_CALL(err /*= cudaPointerGetAttributes(&attr, ptr)*/);
|
|
|
|
return attr.devicePointer == ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool CudaAllocator::IsManagedPointer(const void* ptr)
|
|
|
|
{
|
2017-11-02 12:50:58 +00:00
|
|
|
if (!ptr || !ManagedMemorySupported)
|
2017-08-03 16:55:31 +00:00
|
|
|
{
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
cudaPointerAttributes attr;
|
|
|
|
cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
|
|
|
|
// This function will return invalid value if the pointer is unknown to the
|
|
|
|
// cuda runtime. Manually catch this value since it's not really an error.
|
|
|
|
if (err == cudaErrorInvalidValue)
|
|
|
|
{
|
|
|
|
cudaGetLastError(); // Clear the error so we don't raise it later...
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
VTKM_CUDA_CALL(err /*= cudaPointerGetAttributes(&attr, ptr)*/);
|
|
|
|
return attr.isManaged != 0;
|
|
|
|
}
|
|
|
|
|
2017-07-14 14:36:15 +00:00
|
|
|
void* CudaAllocator::Allocate(std::size_t numBytes)
|
|
|
|
{
|
|
|
|
CudaAllocator::Initialize();
|
|
|
|
|
|
|
|
void* ptr = nullptr;
|
|
|
|
if (ManagedMemorySupported)
|
|
|
|
{
|
|
|
|
VTKM_CUDA_CALL(cudaMallocManaged(&ptr, numBytes));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
VTKM_CUDA_CALL(cudaMalloc(&ptr, numBytes));
|
|
|
|
}
|
|
|
|
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
void CudaAllocator::Free(void* ptr)
|
|
|
|
{
|
|
|
|
VTKM_CUDA_CALL(cudaFree(ptr));
|
|
|
|
}
|
|
|
|
|
|
|
|
void CudaAllocator::PrepareForControl(const void* ptr, std::size_t numBytes)
|
|
|
|
{
|
2017-11-02 12:50:58 +00:00
|
|
|
if (IsManagedPointer(ptr))
|
2017-07-14 14:36:15 +00:00
|
|
|
{
|
2017-10-24 15:55:07 +00:00
|
|
|
#if CUDART_VERSION >= 8000
|
2017-07-14 14:36:15 +00:00
|
|
|
// TODO these hints need to be benchmarked and adjusted once we start
|
|
|
|
// sharing the pointers between cont/exec
|
2017-11-02 12:50:58 +00:00
|
|
|
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, cudaCpuDeviceId));
|
2017-08-16 20:11:43 +00:00
|
|
|
VTKM_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, cudaCpuDeviceId, cudaStreamPerThread));
|
2017-10-24 15:55:07 +00:00
|
|
|
#endif // CUDA >= 8.0
|
2017-07-14 14:36:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CudaAllocator::PrepareForInput(const void* ptr, std::size_t numBytes)
|
|
|
|
{
|
2017-11-02 12:50:58 +00:00
|
|
|
if (IsManagedPointer(ptr))
|
2017-07-14 14:36:15 +00:00
|
|
|
{
|
2017-10-24 15:55:07 +00:00
|
|
|
#if CUDART_VERSION >= 8000
|
2017-07-14 14:36:15 +00:00
|
|
|
int dev;
|
|
|
|
VTKM_CUDA_CALL(cudaGetDevice(&dev));
|
2017-11-02 12:50:58 +00:00
|
|
|
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
|
|
|
|
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetReadMostly, dev));
|
|
|
|
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
|
2017-08-16 20:11:43 +00:00
|
|
|
VTKM_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
|
2017-10-24 15:55:07 +00:00
|
|
|
#endif // CUDA >= 8.0
|
2017-07-14 14:36:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CudaAllocator::PrepareForOutput(const void* ptr, std::size_t numBytes)
|
|
|
|
{
|
2017-11-02 12:50:58 +00:00
|
|
|
if (IsManagedPointer(ptr))
|
2017-07-14 14:36:15 +00:00
|
|
|
{
|
2017-10-24 15:55:07 +00:00
|
|
|
#if CUDART_VERSION >= 8000
|
2017-07-14 14:36:15 +00:00
|
|
|
int dev;
|
|
|
|
VTKM_CUDA_CALL(cudaGetDevice(&dev));
|
2017-11-02 12:50:58 +00:00
|
|
|
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
|
|
|
|
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev));
|
|
|
|
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
|
2017-08-16 20:11:43 +00:00
|
|
|
VTKM_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
|
2017-10-24 15:55:07 +00:00
|
|
|
#endif // CUDA >= 8.0
|
2017-07-14 14:36:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CudaAllocator::PrepareForInPlace(const void* ptr, std::size_t numBytes)
|
|
|
|
{
|
2017-11-02 12:50:58 +00:00
|
|
|
if (IsManagedPointer(ptr))
|
2017-07-14 14:36:15 +00:00
|
|
|
{
|
2017-10-24 15:55:07 +00:00
|
|
|
#if CUDART_VERSION >= 8000
|
2017-07-14 14:36:15 +00:00
|
|
|
int dev;
|
|
|
|
VTKM_CUDA_CALL(cudaGetDevice(&dev));
|
2017-11-02 12:50:58 +00:00
|
|
|
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
|
|
|
|
// VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev));
|
|
|
|
VTKM_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
|
2017-08-16 20:11:43 +00:00
|
|
|
VTKM_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
|
2017-10-24 15:55:07 +00:00
|
|
|
#endif // CUDA >= 8.0
|
2017-07-14 14:36:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CudaAllocator::Initialize()
|
|
|
|
{
|
|
|
|
if (!IsInitialized)
|
|
|
|
{
|
|
|
|
int numDevices;
|
|
|
|
VTKM_CUDA_CALL(cudaGetDeviceCount(&numDevices));
|
|
|
|
|
|
|
|
if (numDevices == 0)
|
|
|
|
{
|
|
|
|
ManagedMemorySupported = false;
|
|
|
|
IsInitialized = true;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check all devices, use the feature set supported by all
|
|
|
|
bool managed = true;
|
|
|
|
cudaDeviceProp prop;
|
|
|
|
for (int i = 0; i < numDevices && managed; ++i)
|
|
|
|
{
|
|
|
|
VTKM_CUDA_CALL(cudaGetDeviceProperties(&prop, i));
|
|
|
|
// We check for concurrentManagedAccess, as devices with only the
|
|
|
|
// managedAccess property have extra synchronization requirements.
|
|
|
|
managed = managed && prop.concurrentManagedAccess;
|
|
|
|
}
|
|
|
|
|
|
|
|
ManagedMemorySupported = managed;
|
|
|
|
IsInitialized = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} // end namespace vtkm::cont::cuda::internal
|