Merge topic '228-consolidate_cuda_internals'
1f6a662c0 Merge DevAdaptAlgoThrust --> DevAdaptAlgoCuda. be0c6a17a Move DevAdaptAtomicArrayImplementation to its own file. 3af9f6608 Merge ArrayManagerExecutionThrustDevice into AMECuda. Acked-by: Kitware Robot <kwrobot@kitware.com> Acked-by: Robert Maynard <robert.maynard@kitware.com> Merge-request: !1228
This commit is contained in:
commit
9cfc9112cd
@ -20,16 +20,30 @@
|
||||
#ifndef vtk_m_cont_cuda_internal_ArrayManagerExecutionCuda_h
|
||||
#define vtk_m_cont_cuda_internal_ArrayManagerExecutionCuda_h
|
||||
|
||||
#include <vtkm/cont/cuda/ErrorCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/CudaAllocator.h>
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/ThrustExceptionHandler.h>
|
||||
#include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
|
||||
|
||||
#include <vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h>
|
||||
#include <vtkm/cont/internal/ArrayExportMacros.h>
|
||||
#include <vtkm/cont/internal/ArrayManagerExecution.h>
|
||||
|
||||
#include <vtkm/cont/ArrayPortalToIterators.h>
|
||||
#include <vtkm/cont/ErrorBadAllocation.h>
|
||||
#include <vtkm/cont/Storage.h>
|
||||
|
||||
//This is in a separate header so that ArrayHandleBasicImpl can include
|
||||
//the interface without getting any CUDA headers
|
||||
#include <vtkm/cont/cuda/internal/ExecutionArrayInterfaceBasicCuda.h>
|
||||
|
||||
VTKM_THIRDPARTY_PRE_INCLUDE
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/device_ptr.h>
|
||||
VTKM_THIRDPARTY_POST_INCLUDE
|
||||
|
||||
#include <limits>
|
||||
|
||||
// These must be placed in the vtkm::cont::internal namespace so that
|
||||
// the template can be found.
|
||||
|
||||
@ -42,29 +56,43 @@ namespace internal
|
||||
|
||||
template <typename T, class StorageTag>
|
||||
class ArrayManagerExecution<T, StorageTag, vtkm::cont::DeviceAdapterTagCuda>
|
||||
: public vtkm::cont::cuda::internal::ArrayManagerExecutionThrustDevice<T, StorageTag>
|
||||
{
|
||||
public:
|
||||
using Superclass = vtkm::cont::cuda::internal::ArrayManagerExecutionThrustDevice<T, StorageTag>;
|
||||
using ValueType = typename Superclass::ValueType;
|
||||
using PortalType = typename Superclass::PortalType;
|
||||
using PortalConstType = typename Superclass::PortalConstType;
|
||||
using StorageType = typename Superclass::StorageType;
|
||||
using ValueType = T;
|
||||
using PointerType = T*;
|
||||
using PortalType = vtkm::exec::cuda::internal::ArrayPortalFromThrust<T>;
|
||||
using PortalConstType = vtkm::exec::cuda::internal::ConstArrayPortalFromThrust<T>;
|
||||
using StorageType = vtkm::cont::internal::Storage<ValueType, StorageTag>;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
|
||||
VTKM_CONT
|
||||
ArrayManagerExecution(StorageType* storage)
|
||||
: Superclass(storage)
|
||||
: Storage(storage)
|
||||
, Begin(nullptr)
|
||||
, End(nullptr)
|
||||
, Capacity(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
~ArrayManagerExecution() { this->ReleaseResources(); }
|
||||
|
||||
/// Returns the size of the array.
|
||||
///
|
||||
VTKM_CONT
|
||||
vtkm::Id GetNumberOfValues() const { return static_cast<vtkm::Id>(this->End - this->Begin); }
|
||||
|
||||
VTKM_CONT
|
||||
PortalConstType PrepareForInput(bool updateData)
|
||||
{
|
||||
try
|
||||
{
|
||||
// This alternate form of PrepareForInput works around an issue
|
||||
// with nvcc 7.5.
|
||||
return this->Superclass::template _PrepareForInput<void>(updateData);
|
||||
if (updateData)
|
||||
{
|
||||
this->CopyToExecution();
|
||||
}
|
||||
|
||||
return PortalConstType(this->Begin, this->End);
|
||||
}
|
||||
catch (vtkm::cont::ErrorBadAllocation& error)
|
||||
{
|
||||
@ -83,9 +111,12 @@ public:
|
||||
{
|
||||
try
|
||||
{
|
||||
// This alternate form of PrepareForInPlace works around an issue
|
||||
// with nvcc 7.5.
|
||||
return this->Superclass::template _PrepareForInPlace<void>(updateData);
|
||||
if (updateData)
|
||||
{
|
||||
this->CopyToExecution();
|
||||
}
|
||||
|
||||
return PortalType(this->Begin, this->End);
|
||||
}
|
||||
catch (vtkm::cont::ErrorBadAllocation& error)
|
||||
{
|
||||
@ -104,9 +135,49 @@ public:
|
||||
{
|
||||
try
|
||||
{
|
||||
// This alternate form of PrepareForOutput works around an issue
|
||||
// with nvcc 7.5.
|
||||
return this->Superclass::template _PrepareForOutput<void>(numberOfValues);
|
||||
// Can we reuse the existing buffer?
|
||||
vtkm::Id curCapacity =
|
||||
this->Begin != nullptr ? static_cast<vtkm::Id>(this->Capacity - this->Begin) : 0;
|
||||
|
||||
// Just mark a new end if we don't need to increase the allocation:
|
||||
if (curCapacity >= numberOfValues)
|
||||
{
|
||||
this->End = this->Begin + static_cast<difference_type>(numberOfValues);
|
||||
|
||||
return PortalType(this->Begin, this->End);
|
||||
}
|
||||
|
||||
const std::size_t maxNumVals = (std::numeric_limits<std::size_t>::max() / sizeof(ValueType));
|
||||
|
||||
if (static_cast<std::size_t>(numberOfValues) > maxNumVals)
|
||||
{
|
||||
std::ostringstream err;
|
||||
err << "Failed to allocate " << numberOfValues << " values on device: "
|
||||
<< "Number of bytes is not representable by std::size_t.";
|
||||
throw vtkm::cont::ErrorBadAllocation(err.str());
|
||||
}
|
||||
|
||||
this->ReleaseResources();
|
||||
|
||||
const std::size_t bufferSize = static_cast<std::size_t>(numberOfValues) * sizeof(ValueType);
|
||||
|
||||
// Attempt to allocate:
|
||||
try
|
||||
{
|
||||
this->Begin =
|
||||
static_cast<ValueType*>(vtkm::cont::cuda::internal::CudaAllocator::Allocate(bufferSize));
|
||||
}
|
||||
catch (const std::exception& error)
|
||||
{
|
||||
std::ostringstream err;
|
||||
err << "Failed to allocate " << bufferSize << " bytes on device: " << error.what();
|
||||
throw vtkm::cont::ErrorBadAllocation(err.str());
|
||||
}
|
||||
|
||||
this->Capacity = this->Begin + static_cast<difference_type>(numberOfValues);
|
||||
this->End = this->Capacity;
|
||||
|
||||
return PortalType(this->Begin, this->End);
|
||||
}
|
||||
catch (vtkm::cont::ErrorBadAllocation& error)
|
||||
{
|
||||
@ -119,6 +190,75 @@ public:
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/// Allocates enough space in \c storage and copies the data in the
|
||||
/// device vector into it.
|
||||
///
|
||||
VTKM_CONT
|
||||
void RetrieveOutputData(StorageType* storage) const
|
||||
{
|
||||
storage->Allocate(this->GetNumberOfValues());
|
||||
try
|
||||
{
|
||||
::thrust::copy(thrust::cuda::pointer<ValueType>(this->Begin),
|
||||
thrust::cuda::pointer<ValueType>(this->End),
|
||||
vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal()));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
vtkm::cont::cuda::internal::throwAsVTKmException();
|
||||
}
|
||||
}
|
||||
|
||||
/// Resizes the device vector.
|
||||
///
|
||||
VTKM_CONT void Shrink(vtkm::Id numberOfValues)
|
||||
{
|
||||
// The operation will succeed even if this assertion fails, but this
|
||||
// is still supposed to be a precondition to Shrink.
|
||||
VTKM_ASSERT(this->Begin != nullptr && this->Begin + numberOfValues <= this->End);
|
||||
|
||||
this->End = this->Begin + static_cast<difference_type>(numberOfValues);
|
||||
}
|
||||
|
||||
/// Frees all memory.
|
||||
///
|
||||
VTKM_CONT void ReleaseResources()
|
||||
{
|
||||
if (this->Begin != nullptr)
|
||||
{
|
||||
vtkm::cont::cuda::internal::CudaAllocator::Free(this->Begin);
|
||||
this->Begin = nullptr;
|
||||
this->End = nullptr;
|
||||
this->Capacity = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
ArrayManagerExecution(ArrayManagerExecution&) = delete;
|
||||
void operator=(ArrayManagerExecution&) = delete;
|
||||
|
||||
StorageType* Storage;
|
||||
|
||||
PointerType Begin;
|
||||
PointerType End;
|
||||
PointerType Capacity;
|
||||
|
||||
VTKM_CONT
|
||||
void CopyToExecution()
|
||||
{
|
||||
try
|
||||
{
|
||||
this->PrepareForOutput(this->Storage->GetNumberOfValues());
|
||||
::thrust::copy(vtkm::cont::ArrayPortalToIteratorBegin(this->Storage->GetPortalConst()),
|
||||
vtkm::cont::ArrayPortalToIteratorEnd(this->Storage->GetPortalConst()),
|
||||
thrust::cuda::pointer<ValueType>(this->Begin));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
vtkm::cont::cuda::internal::throwAsVTKmException();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
|
@ -1,262 +0,0 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2014 UT-Battelle, LLC.
|
||||
// Copyright 2014 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_cuda_internal_ArrayManagerExecutionThrustDevice_h
|
||||
#define vtk_m_cont_cuda_internal_ArrayManagerExecutionThrustDevice_h
|
||||
|
||||
#include <vtkm/cont/ArrayPortalToIterators.h>
|
||||
#include <vtkm/cont/ErrorBadAllocation.h>
|
||||
#include <vtkm/cont/Storage.h>
|
||||
|
||||
#include <vtkm/cont/cuda/ErrorCuda.h>
|
||||
#include <vtkm/cont/cuda/internal/CudaAllocator.h>
|
||||
#include <vtkm/cont/cuda/internal/ThrustExceptionHandler.h>
|
||||
#include <vtkm/exec/cuda/internal/ArrayPortalFromThrust.h>
|
||||
|
||||
VTKM_THIRDPARTY_PRE_INCLUDE
|
||||
#include <thrust/copy.h>
|
||||
#include <thrust/device_ptr.h>
|
||||
VTKM_THIRDPARTY_POST_INCLUDE
|
||||
|
||||
#include <limits>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace cuda
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
/// \c ArrayManagerExecutionThrustDevice provides an implementation for a \c
|
||||
/// ArrayManagerExecution class for a thrust device adapter that is designed
|
||||
/// for the cuda backend which has separate memory spaces for host and device.
|
||||
template <typename T, class StorageTag>
|
||||
class ArrayManagerExecutionThrustDevice
|
||||
{
|
||||
public:
|
||||
using ValueType = T;
|
||||
using PointerType = T*;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
|
||||
using StorageType = vtkm::cont::internal::Storage<ValueType, StorageTag>;
|
||||
|
||||
using PortalType = vtkm::exec::cuda::internal::ArrayPortalFromThrust<T>;
|
||||
using PortalConstType = vtkm::exec::cuda::internal::ConstArrayPortalFromThrust<T>;
|
||||
|
||||
VTKM_CONT
|
||||
ArrayManagerExecutionThrustDevice(StorageType* storage)
|
||||
: Storage(storage)
|
||||
, Begin(nullptr)
|
||||
, End(nullptr)
|
||||
, Capacity(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
~ArrayManagerExecutionThrustDevice() { this->ReleaseResources(); }
|
||||
|
||||
/// Returns the size of the array.
|
||||
///
|
||||
VTKM_CONT
|
||||
vtkm::Id GetNumberOfValues() const { return static_cast<vtkm::Id>(this->End - this->Begin); }
|
||||
|
||||
/// Allocates the appropriate size of the array and copies the given data
|
||||
/// into the array.
|
||||
///
|
||||
VTKM_CONT
|
||||
PortalConstType PrepareForInput(bool updateData)
|
||||
{
|
||||
if (updateData)
|
||||
{
|
||||
this->CopyToExecution();
|
||||
}
|
||||
else // !updateData
|
||||
{
|
||||
// The data in this->Array should already be valid.
|
||||
}
|
||||
|
||||
return PortalConstType(this->Begin, this->End);
|
||||
}
|
||||
|
||||
/// Workaround for nvcc 7.5 compiler warning bug.
|
||||
template <typename DummyType>
|
||||
VTKM_CONT PortalConstType _PrepareForInput(bool updateData)
|
||||
{
|
||||
return this->PrepareForInput(updateData);
|
||||
}
|
||||
|
||||
/// Allocates the appropriate size of the array and copies the given data
|
||||
/// into the array.
|
||||
///
|
||||
VTKM_CONT
|
||||
PortalType PrepareForInPlace(bool updateData)
|
||||
{
|
||||
if (updateData)
|
||||
{
|
||||
this->CopyToExecution();
|
||||
}
|
||||
else // !updateData
|
||||
{
|
||||
// The data in this->Array should already be valid.
|
||||
}
|
||||
|
||||
return PortalType(this->Begin, this->End);
|
||||
}
|
||||
|
||||
/// Workaround for nvcc 7.5 compiler warning bug.
|
||||
template <typename DummyType>
|
||||
VTKM_CONT PortalType _PrepareForInPlace(bool updateData)
|
||||
{
|
||||
return this->PrepareForInPlace(updateData);
|
||||
}
|
||||
|
||||
/// Allocates the array to the given size.
|
||||
///
|
||||
VTKM_CONT
|
||||
PortalType PrepareForOutput(vtkm::Id numberOfValues)
|
||||
{
|
||||
// Can we reuse the existing buffer?
|
||||
vtkm::Id curCapacity =
|
||||
this->Begin != nullptr ? static_cast<vtkm::Id>(this->Capacity - this->Begin) : 0;
|
||||
|
||||
// Just mark a new end if we don't need to increase the allocation:
|
||||
if (curCapacity >= numberOfValues)
|
||||
{
|
||||
this->End = this->Begin + static_cast<difference_type>(numberOfValues);
|
||||
|
||||
return PortalType(this->Begin, this->End);
|
||||
}
|
||||
|
||||
const std::size_t maxNumVals = (std::numeric_limits<std::size_t>::max() / sizeof(ValueType));
|
||||
|
||||
if (static_cast<std::size_t>(numberOfValues) > maxNumVals)
|
||||
{
|
||||
std::ostringstream err;
|
||||
err << "Failed to allocate " << numberOfValues << " values on device: "
|
||||
<< "Number of bytes is not representable by std::size_t.";
|
||||
throw vtkm::cont::ErrorBadAllocation(err.str());
|
||||
}
|
||||
|
||||
this->ReleaseResources();
|
||||
|
||||
const std::size_t bufferSize = static_cast<std::size_t>(numberOfValues) * sizeof(ValueType);
|
||||
|
||||
// Attempt to allocate:
|
||||
try
|
||||
{
|
||||
this->Begin =
|
||||
static_cast<ValueType*>(vtkm::cont::cuda::internal::CudaAllocator::Allocate(bufferSize));
|
||||
}
|
||||
catch (const std::exception& error)
|
||||
{
|
||||
std::ostringstream err;
|
||||
err << "Failed to allocate " << bufferSize << " bytes on device: " << error.what();
|
||||
throw vtkm::cont::ErrorBadAllocation(err.str());
|
||||
}
|
||||
|
||||
this->Capacity = this->Begin + static_cast<difference_type>(numberOfValues);
|
||||
this->End = this->Capacity;
|
||||
|
||||
return PortalType(this->Begin, this->End);
|
||||
}
|
||||
|
||||
/// Workaround for nvcc 7.5 compiler warning bug.
|
||||
template <typename DummyType>
|
||||
VTKM_CONT PortalType _PrepareForOutput(vtkm::Id numberOfValues)
|
||||
{
|
||||
return this->PrepareForOutput(numberOfValues);
|
||||
}
|
||||
|
||||
/// Allocates enough space in \c storage and copies the data in the
|
||||
/// device vector into it.
|
||||
///
|
||||
VTKM_CONT
|
||||
void RetrieveOutputData(StorageType* storage) const
|
||||
{
|
||||
storage->Allocate(this->GetNumberOfValues());
|
||||
try
|
||||
{
|
||||
::thrust::copy(thrust::cuda::pointer<ValueType>(this->Begin),
|
||||
thrust::cuda::pointer<ValueType>(this->End),
|
||||
vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal()));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
vtkm::cont::cuda::internal::throwAsVTKmException();
|
||||
}
|
||||
}
|
||||
|
||||
/// Resizes the device vector.
|
||||
///
|
||||
VTKM_CONT void Shrink(vtkm::Id numberOfValues)
|
||||
{
|
||||
// The operation will succeed even if this assertion fails, but this
|
||||
// is still supposed to be a precondition to Shrink.
|
||||
VTKM_ASSERT(this->Begin != nullptr && this->Begin + numberOfValues <= this->End);
|
||||
|
||||
this->End = this->Begin + static_cast<difference_type>(numberOfValues);
|
||||
}
|
||||
|
||||
/// Frees all memory.
|
||||
///
|
||||
VTKM_CONT void ReleaseResources()
|
||||
{
|
||||
if (this->Begin != nullptr)
|
||||
{
|
||||
vtkm::cont::cuda::internal::CudaAllocator::Free(this->Begin);
|
||||
this->Begin = nullptr;
|
||||
this->End = nullptr;
|
||||
this->Capacity = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
ArrayManagerExecutionThrustDevice(ArrayManagerExecutionThrustDevice<T, StorageTag>&) = delete;
|
||||
void operator=(ArrayManagerExecutionThrustDevice<T, StorageTag>&) = delete;
|
||||
|
||||
StorageType* Storage;
|
||||
|
||||
PointerType Begin;
|
||||
PointerType End;
|
||||
PointerType Capacity;
|
||||
|
||||
VTKM_CONT
|
||||
void CopyToExecution()
|
||||
{
|
||||
try
|
||||
{
|
||||
this->PrepareForOutput(this->Storage->GetNumberOfValues());
|
||||
::thrust::copy(vtkm::cont::ArrayPortalToIteratorBegin(this->Storage->GetPortalConst()),
|
||||
vtkm::cont::ArrayPortalToIteratorEnd(this->Storage->GetPortalConst()),
|
||||
thrust::cuda::pointer<ValueType>(this->Begin));
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
vtkm::cont::cuda::internal::throwAsVTKmException();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace vtkm::cont::cuda::internal
|
||||
|
||||
#endif // vtk_m_cont_cuda_internal_ArrayManagerExecutionThrustDevice_h
|
@ -20,10 +20,9 @@
|
||||
|
||||
set(headers
|
||||
ArrayManagerExecutionCuda.h
|
||||
ArrayManagerExecutionThrustDevice.h
|
||||
CudaAllocator.h
|
||||
DeviceAdapterAlgorithmCuda.h
|
||||
DeviceAdapterAlgorithmThrust.h
|
||||
DeviceAdapterAtomicArrayImplementationCuda.h
|
||||
DeviceAdapterRuntimeDetectorCuda.h
|
||||
DeviceAdapterTagCuda.h
|
||||
DeviceAdapterTimerImplementationCuda.h
|
||||
@ -49,7 +48,7 @@ endif()
|
||||
target_sources(vtkm_cont PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ArrayManagerExecutionCuda.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CudaAllocator.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterAlgorithmThrust.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterAlgorithmCuda.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterRuntimeDetectorCuda.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterTimerImplementationCuda.cu
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ExecutionArrayInterfaceBasicCuda.cu
|
||||
|
@ -18,7 +18,7 @@
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterAlgorithmThrust.h>
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
@ -65,11 +65,12 @@ VTKM_CONT_EXPORT vtkm::UInt32 getNumSMs(int dId)
|
||||
}
|
||||
return numSMs[index];
|
||||
}
|
||||
}
|
||||
} // end namespace cuda::internal
|
||||
|
||||
// we use cuda pinned memory to reduce the amount of synchronization
|
||||
// and mem copies between the host and device.
|
||||
template <>
|
||||
char* DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetPinnedErrorArray(
|
||||
char* DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetPinnedErrorArray(
|
||||
vtkm::Id& arraySize,
|
||||
char** hostPointer)
|
||||
{
|
||||
@ -92,8 +93,7 @@ char* DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetPinnedE
|
||||
return devicePtr;
|
||||
}
|
||||
|
||||
template <>
|
||||
char* DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::SetupErrorBuffer(
|
||||
char* DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::SetupErrorBuffer(
|
||||
vtkm::exec::cuda::internal::TaskStrided& functor)
|
||||
{
|
||||
//since the memory is pinned we can access it safely on the host
|
||||
@ -111,8 +111,7 @@ char* DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::SetupError
|
||||
return hostErrorPtr;
|
||||
}
|
||||
|
||||
template <>
|
||||
void DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAndBlocks(
|
||||
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAndBlocks(
|
||||
vtkm::UInt32& grids,
|
||||
vtkm::UInt32& blocks,
|
||||
vtkm::Id size)
|
||||
@ -120,19 +119,18 @@ void DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAnd
|
||||
(void)size;
|
||||
int deviceId;
|
||||
VTKM_CUDA_CALL(cudaGetDevice(&deviceId)); //get deviceid from cuda
|
||||
grids = 32 * getNumSMs(deviceId);
|
||||
grids = 32 * cuda::internal::getNumSMs(deviceId);
|
||||
blocks = 128;
|
||||
}
|
||||
|
||||
template <>
|
||||
void DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAndBlocks(
|
||||
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAndBlocks(
|
||||
vtkm::UInt32& grids,
|
||||
dim3& blocks,
|
||||
const dim3& size)
|
||||
{
|
||||
int deviceId;
|
||||
VTKM_CUDA_CALL(cudaGetDevice(&deviceId)); //get deviceid from cuda
|
||||
grids = 32 * getNumSMs(deviceId);
|
||||
grids = 32 * cuda::internal::getNumSMs(deviceId);
|
||||
|
||||
if (size.x == 0)
|
||||
{ //grids that have no x dimension
|
||||
@ -154,6 +152,4 @@ void DeviceAdapterAlgorithmThrust<vtkm::cont::DeviceAdapterTagCuda>::GetGridsAnd
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,104 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h
|
||||
#define vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h
|
||||
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||
#include <vtkm/cont/StorageBasic.h>
|
||||
|
||||
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
|
||||
|
||||
// Disable warnings we check vtkm for but Thrust does not.
|
||||
VTKM_THIRDPARTY_PRE_INCLUDE
|
||||
#include <thrust/device_ptr.h>
|
||||
VTKM_THIRDPARTY_POST_INCLUDE
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
|
||||
/// CUDA contains its own atomic operations
|
||||
///
|
||||
template <typename T>
|
||||
class DeviceAdapterAtomicArrayImplementation<T, vtkm::cont::DeviceAdapterTagCuda>
|
||||
{
|
||||
public:
|
||||
VTKM_CONT
|
||||
DeviceAdapterAtomicArrayImplementation(
|
||||
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle)
|
||||
: Portal(handle.PrepareForInPlace(vtkm::cont::DeviceAdapterTagCuda()))
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_EXEC T Add(vtkm::Id index, const T& value) const
|
||||
{
|
||||
T* lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index);
|
||||
return this->vtkmAtomicAdd(lockedValue, value);
|
||||
}
|
||||
|
||||
VTKM_EXEC T CompareAndSwap(vtkm::Id index,
|
||||
const vtkm::Int64& newValue,
|
||||
const vtkm::Int64& oldValue) const
|
||||
{
|
||||
T* lockedValue = ::thrust::raw_pointer_cast(this->Portal.GetIteratorBegin() + index);
|
||||
return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue);
|
||||
}
|
||||
|
||||
private:
|
||||
using PortalType =
|
||||
typename vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>::template ExecutionTypes<
|
||||
vtkm::cont::DeviceAdapterTagCuda>::Portal;
|
||||
PortalType Portal;
|
||||
|
||||
__device__ vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
|
||||
{
|
||||
return atomicAdd((unsigned long long*)address, (unsigned long long)value);
|
||||
}
|
||||
|
||||
__device__ vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
|
||||
{
|
||||
return atomicAdd(address, value);
|
||||
}
|
||||
|
||||
__device__ vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
|
||||
const vtkm::Int32& newValue,
|
||||
const vtkm::Int32& oldValue) const
|
||||
{
|
||||
return atomicCAS(address, oldValue, newValue);
|
||||
}
|
||||
|
||||
__device__ vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
|
||||
const vtkm::Int64& newValue,
|
||||
const vtkm::Int64& oldValue) const
|
||||
{
|
||||
return atomicCAS((unsigned long long int*)address,
|
||||
(unsigned long long int)oldValue,
|
||||
(unsigned long long int)newValue);
|
||||
}
|
||||
};
|
||||
}
|
||||
} // end namespace vtkm::cont
|
||||
|
||||
#endif // vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementationCuda_h
|
@ -30,6 +30,7 @@ set(headers
|
||||
ArrayTransfer.h
|
||||
ConnectivityExplicitInternals.h
|
||||
DeviceAdapterAlgorithmGeneral.h
|
||||
DeviceAdapterAtomicArrayImplementation.h
|
||||
DeviceAdapterDefaultSelection.h
|
||||
DeviceAdapterError.h
|
||||
DeviceAdapterListHelpers.h
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <vtkm/cont/ArrayHandleIndex.h>
|
||||
#include <vtkm/cont/ArrayHandleStreaming.h>
|
||||
#include <vtkm/cont/ArrayHandleZip.h>
|
||||
#include <vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h>
|
||||
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
||||
|
||||
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
|
||||
@ -874,125 +875,8 @@ public:
|
||||
input, values_output, values_output);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
} // namespace vtkm::cont::internal
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
/// \brief Class providing a device-specific atomic interface.
|
||||
///
|
||||
/// The class provide the actual implementation used by vtkm::exec::AtomicArray.
|
||||
/// A serial default implementation is provided. But each device will have a different
|
||||
/// implementation.
|
||||
///
|
||||
/// Serial requires no form of atomicity
|
||||
///
|
||||
template <typename T, typename DeviceTag>
|
||||
class DeviceAdapterAtomicArrayImplementation
|
||||
{
|
||||
public:
|
||||
VTKM_CONT
|
||||
DeviceAdapterAtomicArrayImplementation(
|
||||
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle)
|
||||
: Iterators(IteratorsType(handle.PrepareForInPlace(DeviceTag())))
|
||||
{
|
||||
}
|
||||
|
||||
T Add(vtkm::Id index, const T& value) const
|
||||
{
|
||||
T* lockedValue;
|
||||
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
|
||||
using IteratorType = typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType;
|
||||
typename IteratorType::pointer temp =
|
||||
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
|
||||
lockedValue = temp;
|
||||
return vtkmAtomicAdd(lockedValue, value);
|
||||
#else
|
||||
lockedValue = (Iterators.GetBegin() + index);
|
||||
return vtkmAtomicAdd(lockedValue, value);
|
||||
#endif
|
||||
}
|
||||
|
||||
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
|
||||
{
|
||||
T* lockedValue;
|
||||
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
|
||||
using IteratorType = typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType;
|
||||
typename IteratorType::pointer temp =
|
||||
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
|
||||
lockedValue = temp;
|
||||
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
|
||||
#else
|
||||
lockedValue = (Iterators.GetBegin() + index);
|
||||
return vtkmCompareAndSwap(lockedValue, newValue, oldValue);
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
using PortalType =
|
||||
typename vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>::template ExecutionTypes<
|
||||
DeviceTag>::Portal;
|
||||
using IteratorsType = vtkm::cont::ArrayPortalToIterators<PortalType>;
|
||||
IteratorsType Iterators;
|
||||
|
||||
#if defined(VTKM_MSVC) //MSVC atomics
|
||||
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
|
||||
{
|
||||
return InterlockedExchangeAdd(reinterpret_cast<volatile long*>(address), value);
|
||||
}
|
||||
|
||||
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
|
||||
{
|
||||
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(address), value);
|
||||
}
|
||||
|
||||
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
|
||||
const vtkm::Int32& newValue,
|
||||
const vtkm::Int32& oldValue) const
|
||||
{
|
||||
return InterlockedCompareExchange(
|
||||
reinterpret_cast<volatile long*>(address), newValue, oldValue);
|
||||
}
|
||||
|
||||
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
|
||||
const vtkm::Int64& newValue,
|
||||
const vtkm::Int64& oldValue) const
|
||||
{
|
||||
return InterlockedCompareExchange64(
|
||||
reinterpret_cast<volatile long long*>(address), newValue, oldValue);
|
||||
}
|
||||
|
||||
#else //gcc built-in atomics
|
||||
|
||||
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
|
||||
{
|
||||
return __sync_fetch_and_add(address, value);
|
||||
}
|
||||
|
||||
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
|
||||
{
|
||||
return __sync_fetch_and_add(address, value);
|
||||
}
|
||||
|
||||
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
|
||||
const vtkm::Int32& newValue,
|
||||
const vtkm::Int32& oldValue) const
|
||||
{
|
||||
return __sync_val_compare_and_swap(address, oldValue, newValue);
|
||||
}
|
||||
|
||||
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
|
||||
const vtkm::Int64& newValue,
|
||||
const vtkm::Int64& oldValue) const
|
||||
{
|
||||
return __sync_val_compare_and_swap(address, oldValue, newValue);
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
} // namespace internal
|
||||
|
||||
/// \brief Class providing a device-specific support for selecting the optimal
|
||||
/// Task type for a given worklet.
|
||||
|
152
vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h
Normal file
152
vtkm/cont/internal/DeviceAdapterAtomicArrayImplementation.h
Normal file
@ -0,0 +1,152 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h
|
||||
#define vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h
|
||||
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/ArrayPortalToIterators.h>
|
||||
#include <vtkm/cont/StorageBasic.h>
|
||||
|
||||
#include <vtkm/internal/Configure.h>
|
||||
#include <vtkm/internal/Windows.h>
|
||||
|
||||
#include <vtkm/Types.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
|
||||
/// \brief Class providing a device-specific atomic interface.
|
||||
///
|
||||
/// The class provide the actual implementation used by vtkm::exec::AtomicArray.
|
||||
/// A serial default implementation is provided. But each device will have a different
|
||||
/// implementation.
|
||||
///
|
||||
/// Serial requires no form of atomicity
|
||||
///
|
||||
template <typename T, typename DeviceTag>
|
||||
class DeviceAdapterAtomicArrayImplementation
|
||||
{
|
||||
using PortalType =
|
||||
typename vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>::template ExecutionTypes<
|
||||
DeviceTag>::Portal;
|
||||
using IteratorsType = vtkm::cont::ArrayPortalToIterators<PortalType>;
|
||||
IteratorsType Iterators;
|
||||
|
||||
public:
|
||||
VTKM_CONT
|
||||
DeviceAdapterAtomicArrayImplementation(
|
||||
vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic> handle)
|
||||
: Iterators(IteratorsType(handle.PrepareForInPlace(DeviceTag())))
|
||||
{
|
||||
}
|
||||
|
||||
T Add(vtkm::Id index, const T& value) const
|
||||
{
|
||||
T* lockedValue;
|
||||
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
|
||||
using IteratorType = typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType;
|
||||
typename IteratorType::pointer temp =
|
||||
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
|
||||
lockedValue = temp;
|
||||
return this->vtkmAtomicAdd(lockedValue, value);
|
||||
#else
|
||||
lockedValue = (Iterators.GetBegin() + index);
|
||||
return this->vtkmAtomicAdd(lockedValue, value);
|
||||
#endif
|
||||
}
|
||||
|
||||
T CompareAndSwap(vtkm::Id index, const T& newValue, const T& oldValue) const
|
||||
{
|
||||
T* lockedValue;
|
||||
#if defined(_ITERATOR_DEBUG_LEVEL) && _ITERATOR_DEBUG_LEVEL > 0
|
||||
using IteratorType = typename vtkm::cont::ArrayPortalToIterators<PortalType>::IteratorType;
|
||||
typename IteratorType::pointer temp =
|
||||
&(*(Iterators.GetBegin() + static_cast<std::ptrdiff_t>(index)));
|
||||
lockedValue = temp;
|
||||
return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue);
|
||||
#else
|
||||
lockedValue = (Iterators.GetBegin() + index);
|
||||
return this->vtkmCompareAndSwap(lockedValue, newValue, oldValue);
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
#if defined(VTKM_MSVC) //MSVC atomics
|
||||
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
|
||||
{
|
||||
return InterlockedExchangeAdd(reinterpret_cast<volatile long*>(address), value);
|
||||
}
|
||||
|
||||
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
|
||||
{
|
||||
return InterlockedExchangeAdd64(reinterpret_cast<volatile long long*>(address), value);
|
||||
}
|
||||
|
||||
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
|
||||
const vtkm::Int32& newValue,
|
||||
const vtkm::Int32& oldValue) const
|
||||
{
|
||||
return InterlockedCompareExchange(
|
||||
reinterpret_cast<volatile long*>(address), newValue, oldValue);
|
||||
}
|
||||
|
||||
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
|
||||
const vtkm::Int64& newValue,
|
||||
const vtkm::Int64& oldValue) const
|
||||
{
|
||||
return InterlockedCompareExchange64(
|
||||
reinterpret_cast<volatile long long*>(address), newValue, oldValue);
|
||||
}
|
||||
|
||||
#else //gcc built-in atomics
|
||||
|
||||
vtkm::Int32 vtkmAtomicAdd(vtkm::Int32* address, const vtkm::Int32& value) const
|
||||
{
|
||||
return __sync_fetch_and_add(address, value);
|
||||
}
|
||||
|
||||
vtkm::Int64 vtkmAtomicAdd(vtkm::Int64* address, const vtkm::Int64& value) const
|
||||
{
|
||||
return __sync_fetch_and_add(address, value);
|
||||
}
|
||||
|
||||
vtkm::Int32 vtkmCompareAndSwap(vtkm::Int32* address,
|
||||
const vtkm::Int32& newValue,
|
||||
const vtkm::Int32& oldValue) const
|
||||
{
|
||||
return __sync_val_compare_and_swap(address, oldValue, newValue);
|
||||
}
|
||||
|
||||
vtkm::Int64 vtkmCompareAndSwap(vtkm::Int64* address,
|
||||
const vtkm::Int64& newValue,
|
||||
const vtkm::Int64& oldValue) const
|
||||
{
|
||||
return __sync_val_compare_and_swap(address, oldValue, newValue);
|
||||
}
|
||||
|
||||
#endif
|
||||
};
|
||||
}
|
||||
} // end namespace vtkm::cont
|
||||
|
||||
#endif // vtk_m_cont_internal_DeviceAdapterAtomicArrayImplementation_h
|
Loading…
Reference in New Issue
Block a user