//============================================================================ // Copyright (c) Kitware, Inc. // All rights reserved. // See LICENSE.txt for details. // // This software is distributed WITHOUT ANY WARRANTY; without even // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. See the above copyright notice for more information. //============================================================================ #ifndef vtk_m_cont_cuda_internal_ArrayManagerExecutionCuda_h #define vtk_m_cont_cuda_internal_ArrayManagerExecutionCuda_h #include #include #include #include #include #include #include #include #include #include #include //This is in a separate header so that ArrayHandleBasicImpl can include //the interface without getting any CUDA headers #include #include VTKM_THIRDPARTY_PRE_INCLUDE #include #include VTKM_THIRDPARTY_POST_INCLUDE #include // These must be placed in the vtkm::cont::internal namespace so that // the template can be found. namespace vtkm { namespace cont { namespace internal { template class ArrayManagerExecution { public: using ValueType = T; using PointerType = T*; using PortalType = vtkm::exec::cuda::internal::ArrayPortalFromThrust; using PortalConstType = vtkm::exec::cuda::internal::ConstArrayPortalFromThrust; using StorageType = vtkm::cont::internal::Storage; using difference_type = std::ptrdiff_t; VTKM_CONT ArrayManagerExecution(StorageType* storage) : Storage(storage) , Begin(nullptr) , End(nullptr) , Capacity(nullptr) { } VTKM_CONT ~ArrayManagerExecution() { this->ReleaseResources(); } /// Returns the size of the array. /// VTKM_CONT vtkm::Id GetNumberOfValues() const { return static_cast(this->End - this->Begin); } VTKM_CONT PortalConstType PrepareForInput(bool updateData) { try { if (updateData) { this->CopyToExecution(); } return PortalConstType(this->Begin, this->End); } catch (vtkm::cont::ErrorBadAllocation& error) { // Thrust does not seem to be clearing the CUDA error, so do it here. cudaError_t cudaError = cudaPeekAtLastError(); if (cudaError == cudaErrorMemoryAllocation) { cudaGetLastError(); } throw error; } } VTKM_CONT PortalType PrepareForInPlace(bool updateData) { try { if (updateData) { this->CopyToExecution(); } return PortalType(this->Begin, this->End); } catch (vtkm::cont::ErrorBadAllocation& error) { // Thrust does not seem to be clearing the CUDA error, so do it here. cudaError_t cudaError = cudaPeekAtLastError(); if (cudaError == cudaErrorMemoryAllocation) { cudaGetLastError(); } throw error; } } VTKM_CONT PortalType PrepareForOutput(vtkm::Id numberOfValues) { try { // Can we reuse the existing buffer? vtkm::Id curCapacity = this->Begin != nullptr ? static_cast(this->Capacity - this->Begin) : 0; // Just mark a new end if we don't need to increase the allocation: if (curCapacity >= numberOfValues) { this->End = this->Begin + static_cast(numberOfValues); return PortalType(this->Begin, this->End); } const std::size_t maxNumVals = (std::numeric_limits::max() / sizeof(ValueType)); if (static_cast(numberOfValues) > maxNumVals) { VTKM_LOG_F(vtkm::cont::LogLevel::MemExec, "Refusing to allocate CUDA memory; number of values (%llu) exceeds " "std::size_t capacity.", static_cast(numberOfValues)); std::ostringstream err; err << "Failed to allocate " << numberOfValues << " values on device: " << "Number of bytes is not representable by std::size_t."; throw vtkm::cont::ErrorBadAllocation(err.str()); } this->ReleaseResources(); const std::size_t bufferSize = static_cast(numberOfValues) * sizeof(ValueType); // Attempt to allocate: try { this->Begin = static_cast(vtkm::cont::cuda::internal::CudaAllocator::Allocate(bufferSize)); } catch (const std::exception& error) { std::ostringstream err; err << "Failed to allocate " << bufferSize << " bytes on device: " << error.what(); throw vtkm::cont::ErrorBadAllocation(err.str()); } this->Capacity = this->Begin + static_cast(numberOfValues); this->End = this->Capacity; return PortalType(this->Begin, this->End); } catch (vtkm::cont::ErrorBadAllocation& error) { // Thrust does not seem to be clearing the CUDA error, so do it here. cudaError_t cudaError = cudaPeekAtLastError(); if (cudaError == cudaErrorMemoryAllocation) { cudaGetLastError(); } throw error; } } /// Allocates enough space in \c storage and copies the data in the /// device vector into it. /// VTKM_CONT void RetrieveOutputData(StorageType* storage) const { storage->Allocate(this->GetNumberOfValues()); VTKM_LOG_F(vtkm::cont::LogLevel::MemTransfer, "Copying CUDA dev --> host: %s", vtkm::cont::GetSizeString(this->End - this->Begin).c_str()); try { ::thrust::copy(thrust::cuda::pointer(this->Begin), thrust::cuda::pointer(this->End), vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal())); } catch (...) { vtkm::cont::cuda::internal::throwAsVTKmException(); } } /// Resizes the device vector. /// VTKM_CONT void Shrink(vtkm::Id numberOfValues) { // The operation will succeed even if this assertion fails, but this // is still supposed to be a precondition to Shrink. VTKM_ASSERT(this->Begin != nullptr && this->Begin + numberOfValues <= this->End); this->End = this->Begin + static_cast(numberOfValues); } /// Frees all memory. /// VTKM_CONT void ReleaseResources() { if (this->Begin != nullptr) { vtkm::cont::cuda::internal::CudaAllocator::Free(this->Begin); this->Begin = nullptr; this->End = nullptr; this->Capacity = nullptr; } } private: ArrayManagerExecution(ArrayManagerExecution&) = delete; void operator=(ArrayManagerExecution&) = delete; StorageType* Storage; PointerType Begin; PointerType End; PointerType Capacity; VTKM_CONT void CopyToExecution() { try { this->PrepareForOutput(this->Storage->GetNumberOfValues()); VTKM_LOG_F(vtkm::cont::LogLevel::MemTransfer, "Copying host --> CUDA dev: %s.", vtkm::cont::GetSizeString(this->End - this->Begin).c_str()); ::thrust::copy(vtkm::cont::ArrayPortalToIteratorBegin(this->Storage->GetPortalConst()), vtkm::cont::ArrayPortalToIteratorEnd(this->Storage->GetPortalConst()), thrust::cuda::pointer(this->Begin)); } catch (...) { vtkm::cont::cuda::internal::throwAsVTKmException(); } } }; template struct ExecutionPortalFactoryBasic { using ValueType = T; using PortalType = vtkm::exec::cuda::internal::ArrayPortalFromThrust; using PortalConstType = vtkm::exec::cuda::internal::ConstArrayPortalFromThrust; VTKM_CONT static PortalType CreatePortal(ValueType* start, ValueType* end) { return PortalType(start, end); } VTKM_CONT static PortalConstType CreatePortalConst(const ValueType* start, const ValueType* end) { return PortalConstType(start, end); } }; } // namespace internal #ifndef vtk_m_cont_cuda_internal_ArrayManagerExecutionCuda_cu VTKM_EXPORT_ARRAYHANDLES_FOR_DEVICE_ADAPTER(DeviceAdapterTagCuda) #endif // !vtk_m_cont_cuda_internal_ArrayManagerExecutionCuda_cu } } // namespace vtkm::cont #endif //vtk_m_cont_cuda_internal_ArrayManagerExecutionCuda_h