vtk-m2/vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.cu

//============================================================================
//  Copyright (c) Kitware, Inc.
//  All rights reserved.
//  See LICENSE.txt for details.
//  This software is distributed WITHOUT ANY WARRANTY; without even
//  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
//  PURPOSE.  See the above copyright notice for more information.
//
//  Copyright 2017 Sandia Corporation.
//  Copyright 2017 UT-Battelle, LLC.
//  Copyright 2017 Los Alamos National Security.
//
//  Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
//  the U.S. Government retains certain rights in this software.
//
//  Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
//  Laboratory (LANL), the U.S. Government retains certain rights in
//  this software.
//============================================================================

#define vtk_m_cont_cuda_internal_ArrayManagerExecutionCuda_cu

#include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
#include <vtkm/cont/cuda/internal/CudaAllocator.h>

using vtkm::cont::cuda::internal::CudaAllocator;

namespace vtkm
{
namespace cont
{
namespace internal
{

ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::ExecutionArrayInterfaceBasic(
  StorageBasicBase& storage)
  : Superclass(storage)
{
}

DeviceAdapterId ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::GetDeviceId() const
{
  return VTKM_DEVICE_ADAPTER_CUDA;
}

void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::Allocate(TypelessExecutionArray& execArray,
                                                                  vtkm::Id numBytes) const
{
  // Detect if we can reuse a device-accessible pointer from the control env:
  if (CudaAllocator::IsDevicePointer(execArray.ArrayControl))
  {
    const vtkm::Id managedCapacity = static_cast<const char*>(execArray.ArrayControlCapacity) -
      static_cast<const char*>(execArray.ArrayControl);
    if (managedCapacity >= numBytes)
    {
      if (execArray.Array && execArray.Array != execArray.ArrayControl)
      {
        this->Free(execArray);
      }

      execArray.Array = const_cast<void*>(execArray.ArrayControl);
      execArray.ArrayEnd = static_cast<char*>(execArray.Array) + numBytes;
      execArray.ArrayCapacity = const_cast<void*>(execArray.ArrayControlCapacity);
      return;
    }
  }

  if (execArray.Array != nullptr)
  {
    const vtkm::Id cap =
      static_cast<char*>(execArray.ArrayCapacity) - static_cast<char*>(execArray.Array);

    if (cap < numBytes)
    { // Current allocation too small -- free & realloc
      this->Free(execArray);
    }
    else
    { // Reuse buffer if possible:
      execArray.ArrayEnd = static_cast<char*>(execArray.Array) + numBytes;
      return;
    }
  }

  VTKM_ASSERT(execArray.Array == nullptr);

  // Attempt to allocate:
  try
  {
    // Cast to char* so that the pointer math below will work.
    char* tmp = static_cast<char*>(CudaAllocator::Allocate(static_cast<size_t>(numBytes)));
    execArray.Array = tmp;
    execArray.ArrayEnd = tmp + numBytes;
    execArray.ArrayCapacity = tmp + numBytes;
  }
  catch (const std::exception& error)
  {
    std::ostringstream err;
    err << "Failed to allocate " << numBytes << " bytes on device: " << error.what();
    throw vtkm::cont::ErrorBadAllocation(err.str());
  }
}

void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::Free(
  TypelessExecutionArray& execArray) const
{
  // If we're sharing a device-accessible pointer between control/exec, don't
  // actually free it -- just null the pointers here:
  if (execArray.Array == execArray.ArrayControl &&
      CudaAllocator::IsDevicePointer(execArray.ArrayControl))
  {
    execArray.Array = nullptr;
    execArray.ArrayEnd = nullptr;
    execArray.ArrayCapacity = nullptr;
    return;
  }

  if (execArray.Array != nullptr)
  {
    CudaAllocator::Free(execArray.Array);
    execArray.Array = nullptr;
    execArray.ArrayEnd = nullptr;
    execArray.ArrayCapacity = nullptr;
  }
}

void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyFromControl(const void* controlPtr,
                                                                         void* executionPtr,
                                                                         vtkm::Id numBytes) const
{
  // Do nothing if we're sharing a device-accessible pointer between control and
  // execution:
  if (controlPtr == executionPtr && CudaAllocator::IsDevicePointer(controlPtr))
  {
    return;
  }

  VTKM_CUDA_CALL(cudaMemcpy(
    executionPtr, controlPtr, static_cast<std::size_t>(numBytes), cudaMemcpyHostToDevice));
}

void ExecutionArrayInterfaceBasic<DeviceAdapterTagCuda>::CopyToControl(const void* executionPtr,
                                                                       void* controlPtr,
                                                                       vtkm::Id numBytes) const
{
  // Do nothing if we're sharing a cuda managed pointer between control and execution:
  if (controlPtr == executionPtr && CudaAllocator::IsDevicePointer(controlPtr))
  {
    // If we're trying to copy a shared, non-managed device pointer back to
    // control throw an exception -- the pointer cannot be read from control,
    // so this operation is invalid.
    if (!CudaAllocator::IsManagedPointer(controlPtr))
    {
      throw vtkm::cont::ErrorBadValue(
        "Control pointer is a CUDA device pointer that does not supported managed access.");
    }

    // If it is managed, just return and let CUDA handle the migration for us.
    return;
  }

  VTKM_CUDA_CALL(cudaMemcpy(
    controlPtr, executionPtr, static_cast<std::size_t>(numBytes), cudaMemcpyDeviceToHost));
}

} // end namespace internal

VTKM_INSTANTIATE_ARRAYHANDLES_FOR_DEVICE_ADAPTER(DeviceAdapterTagCuda)
}
} // end vtkm::cont