From 82c40a6374deb408ff485376e9768c6ea5992043 Mon Sep 17 00:00:00 2001 From: Christopher Sewell Date: Wed, 18 Jan 2017 11:43:49 -0700 Subject: [PATCH 1/3] First support for unified memory --- CMakeLists.txt | 12 + examples/CMakeLists.txt | 2 +- examples/unified_memory/CMakeLists.txt | 36 +++ examples/unified_memory/UnifiedMemory.cu | 223 ++++++++++++++++++ .../ArrayManagerExecutionThrustDevice.h | 108 ++++++++- 5 files changed, 372 insertions(+), 9 deletions(-) create mode 100644 examples/unified_memory/CMakeLists.txt create mode 100644 examples/unified_memory/UnifiedMemory.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ca6f4eb0..1df2fe8b8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,6 +104,18 @@ option(VTKm_USE_DOUBLE_PRECISION ) option(VTKm_USE_64BIT_IDS "Use 64-bit indices." ON) +if (VTKm_ENABLE_CUDA) + option(VTKm_USE_UNIFIED_MEMORY + "Use CUDA unified memory" + OFF + ) +endif (VTKm_ENABLE_CUDA) + +if (VTKm_USE_UNIFIED_MEMORY) + set(CMAKE_CXX_FLAGS "-DVTKM_USE_UNIFIED_MEMORY ${CMAKE_CXX_FLAGS}") +endif() + + option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" ON) set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index c993fcd02..dd49bb824 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -24,7 +24,6 @@ set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR}) add_subdirectory(clipping) -add_subdirectory(contour_tree) add_subdirectory(demo) add_subdirectory(dynamic_dispatcher) add_subdirectory(hello_world) @@ -35,4 +34,5 @@ add_subdirectory(tetrahedra) if(VTKm_ENABLE_RENDERING) add_subdirectory(rendering) endif() +add_subdirectory(unified_memory) diff --git a/examples/unified_memory/CMakeLists.txt b/examples/unified_memory/CMakeLists.txt new file mode 100644 index 000000000..7f7519ba7 --- /dev/null +++ b/examples/unified_memory/CMakeLists.txt @@ -0,0 +1,36 @@ +##============================================================================= +## +## Copyright (c) Kitware, Inc. +## All rights reserved. +## See LICENSE.txt for details. +## +## This software is distributed WITHOUT ANY WARRANTY; without even +## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +## PURPOSE. See the above copyright notice for more information. +## +## Copyright 2015 Sandia Corporation. +## Copyright 2015 UT-Battelle, LLC. +## Copyright 2015 Los Alamos National Security. +## +## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +## the U.S. Government retains certain rights in this software. +## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +## Laboratory (LANL), the U.S. Government retains certain rights in +## this software. +## +##============================================================================= + +#Find the VTK-m package +find_package(VTKm REQUIRED QUIET + OPTIONAL_COMPONENTS Serial CUDA TBB OpenGL GLUT + ) + +if(VTKm_CUDA_FOUND) + vtkm_disable_troublesome_thrust_warnings() + # Cuda compiles do not respect target_include_directories + cuda_include_directories(${VTKm_INCLUDE_DIRS}) + cuda_add_executable(UnifiedMemory_CUDA UnifiedMemory.cu) + target_link_libraries(UnifiedMemory_CUDA ${VTKm_LIBRARIES}) + target_compile_options(UnifiedMemory_CUDA PRIVATE ${VTKm_COMPILE_OPTIONS}) +endif() + diff --git a/examples/unified_memory/UnifiedMemory.cu b/examples/unified_memory/UnifiedMemory.cu new file mode 100644 index 000000000..826f9aa9f --- /dev/null +++ b/examples/unified_memory/UnifiedMemory.cu @@ -0,0 +1,223 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2014 Sandia Corporation. +// Copyright 2014 UT-Battelle, LLC. +// Copyright 2014 Los Alamos National Security. +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================================ + +#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_CUDA + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +namespace { + +// Define the tangle field for the input data +class TangleField : public vtkm::worklet::WorkletMapField +{ +public: + typedef void ControlSignature(FieldIn vertexId, FieldOut v); + typedef void ExecutionSignature(_1, _2); + typedef _1 InputDomain; + + const vtkm::Id xdim, ydim, zdim; + const vtkm::Float32 xmin, ymin, zmin, xmax, ymax, zmax; + const vtkm::Id cellsPerLayer; + + VTKM_CONT + TangleField(const vtkm::Id3 dims, const vtkm::Float32 mins[3], const vtkm::Float32 maxs[3]) : xdim(dims[0]), ydim(dims[1]), zdim(dims[2]), + xmin(mins[0]), ymin(mins[1]), zmin(mins[2]), xmax(maxs[0]), ymax(maxs[1]), zmax(maxs[2]), cellsPerLayer((xdim) * (ydim)) { }; + + VTKM_EXEC + void operator()(const vtkm::Id &vertexId, vtkm::Float32 &v) const + { + const vtkm::Id x = vertexId % (xdim); + const vtkm::Id y = (vertexId / (xdim)) % (ydim); + const vtkm::Id z = vertexId / cellsPerLayer; + + const vtkm::Float32 fx = static_cast(x) / static_cast(xdim-1); + const vtkm::Float32 fy = static_cast(y) / static_cast(xdim-1); + const vtkm::Float32 fz = static_cast(z) / static_cast(xdim-1); + + const vtkm::Float32 xx = 3.0f*(xmin+(xmax-xmin)*(fx)); + const vtkm::Float32 yy = 3.0f*(ymin+(ymax-ymin)*(fy)); + const vtkm::Float32 zz = 3.0f*(zmin+(zmax-zmin)*(fz)); + + v = (xx*xx*xx*xx - 5.0f*xx*xx + yy*yy*yy*yy - 5.0f*yy*yy + zz*zz*zz*zz - 5.0f*zz*zz + 11.8f) * 0.2f + 0.5f; + } +}; + + +// Construct an input data set using the tangle field worklet +vtkm::cont::DataSet MakeIsosurfaceTestDataSet(vtkm::Id3 dims) +{ + vtkm::cont::DataSet dataSet; + + const vtkm::Id3 vdims(dims[0] + 1, dims[1] + 1, dims[2] + 1); + + vtkm::Float32 mins[3] = {-1.0f, -1.0f, -1.0f}; + vtkm::Float32 maxs[3] = {1.0f, 1.0f, 1.0f}; + + vtkm::cont::ArrayHandle fieldArray; + vtkm::cont::ArrayHandleCounting vertexCountImplicitArray(0, 1, vdims[0]*vdims[1]*vdims[2]); + vtkm::worklet::DispatcherMapField tangleFieldDispatcher(TangleField(vdims, mins, maxs)); + tangleFieldDispatcher.Invoke(vertexCountImplicitArray, fieldArray); + + vtkm::Vec origin(0.0f, 0.0f, 0.0f); + vtkm::Vec spacing( + 1.0f/static_cast(dims[0]), + 1.0f/static_cast(dims[2]), + 1.0f/static_cast(dims[1])); + + vtkm::cont::ArrayHandleUniformPointCoordinates + coordinates(vdims, origin, spacing); + dataSet.AddCoordinateSystem( + vtkm::cont::CoordinateSystem("coordinates", coordinates)); + + dataSet.AddField(vtkm::cont::Field("nodevar", vtkm::cont::Field::ASSOC_POINTS, fieldArray)); + + static const vtkm::IdComponent ndim = 3; + vtkm::cont::CellSetStructured cellSet("cells"); + cellSet.SetPointDimensions(vdims); + dataSet.AddCellSet(cellSet); + + return dataSet; +} + +} + + +namespace vtkm +{ +namespace worklet +{ +class SineWorklet : public vtkm::worklet::WorkletMapField +{ +public: + typedef void ControlSignature(FieldIn<>, FieldOut<>); + typedef _2 ExecutionSignature(_1, WorkIndex); + + VTKM_EXEC + vtkm::Float32 operator()(vtkm::Int64 x, vtkm::Id& index) const { + return (vtkm::Sin(1.0*x)); + } +}; +} +} + + +// Run a simple worklet, and compute an isosurface +int main(int argc, char* argv[]) +{ + vtkm::Int64 N = 1024*1024*1024; + if (argc > 1) N = N*atoi(argv[1]); + else N = N*4; + std::cout << "Testing streaming worklet with size " << N << std::endl; + + vtkm::cont::ArrayHandle input; + vtkm::cont::ArrayHandle output; + std::vector data(N); + for (vtkm::Int64 i=0; i DeviceAlgorithms; + vtkm::worklet::SineWorklet sineWorklet; + +#ifdef VTKM_USE_UNIFIED_MEMORY + std::cout << "Testing with unified memory" << std::endl; + + vtkm::worklet::DispatcherMapField + dispatcher(sineWorklet); + + vtkm::cont::Timer<> timer; + + dispatcher.Invoke(input, output); + std::cout << output.GetPortalConstControl().Get(output.GetNumberOfValues()-1) << std::endl; + + vtkm::Float64 elapsedTime = timer.GetElapsedTime(); + std::cout << "Time: " << elapsedTime << std::endl; + +#else + + vtkm::worklet::DispatcherStreamingMapField + dispatcher(sineWorklet); + vtkm::Id NBlocks = N/(1024*1024*1024); + NBlocks *= 2; + dispatcher.SetNumberOfBlocks(NBlocks); + std::cout << "Testing with streaming (without unified memory) with " << NBlocks << " blocks" << std::endl; + + vtkm::cont::Timer<> timer; + + dispatcher.Invoke(input, output); + std::cout << output.GetPortalConstControl().Get(output.GetNumberOfValues()-1) << std::endl; + + vtkm::Float64 elapsedTime = timer.GetElapsedTime(); + std::cout << "Time: " << elapsedTime << std::endl; + +#endif + + int dim = 128; + if (argc > 2) dim = atoi(argv[2]); + std::cout << "Testing Marching Cubes with size " << dim << "x" << dim << "x" << dim << std::endl; + + vtkm::Id3 dims(dim, dim, dim); + vtkm::cont::ArrayHandle > verticesArray, normalsArray; + vtkm::cont::ArrayHandle scalarsArray; + vtkm::cont::DataSet dataSet = MakeIsosurfaceTestDataSet(dims); + + vtkm::filter::MarchingCubes filter; + filter.SetGenerateNormals(true); + filter.SetMergeDuplicatePoints( false ); + filter.SetIsoValue( 0.5 ); + vtkm::filter::ResultDataSet result = + filter.Execute( dataSet, dataSet.GetField("nodevar") ); + + filter.MapFieldOntoOutput(result, dataSet.GetField("nodevar")); + + //need to extract vertices, normals, and scalars + vtkm::cont::DataSet& outputData = result.GetDataSet(); + + typedef vtkm::cont::ArrayHandle< vtkm::Vec > VertType; + vtkm::cont::CoordinateSystem coords = outputData.GetCoordinateSystem(); + + verticesArray = coords.GetData().Cast(); + normalsArray = outputData.GetField("normals").GetData().Cast(); + scalarsArray = outputData.GetField("nodevar").GetData().Cast< vtkm::cont::ArrayHandle >(); + + std::cout << "Number of output vertices: " << verticesArray.GetNumberOfValues() << std::endl; + + std::cout << "vertices: "; + vtkm::cont::printSummary_ArrayHandle(verticesArray, std::cout); + std::cout << std::endl; + std::cout << "normals: "; + vtkm::cont::printSummary_ArrayHandle(normalsArray, std::cout); + std::cout << std::endl; + std::cout << "scalars: "; + vtkm::cont::printSummary_ArrayHandle(scalarsArray, std::cout); + std::cout << std::endl; + + return 0; +} + diff --git a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h index ae8c86ce5..526a62431 100644 --- a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h +++ b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h @@ -77,8 +77,13 @@ public: typedef vtkm::exec::cuda::internal::ConstArrayPortalFromThrust< T > PortalConstType; VTKM_CONT +#ifdef VTKM_USE_UNIFIED_MEMORY + ArrayManagerExecutionThrustDevice(StorageType *storage) + : Storage(storage), Pointer(0), Length(0) +#else ArrayManagerExecutionThrustDevice(StorageType *storage) : Storage(storage), Array() +#endif { } @@ -93,7 +98,11 @@ public: /// VTKM_CONT vtkm::Id GetNumberOfValues() const { +#ifdef VTKM_USE_UNIFIED_MEMORY + return this->Length; +#else return static_cast(this->Array.size()); +#endif } /// Allocates the appropriate size of the array and copies the given data @@ -111,9 +120,14 @@ public: // The data in this->Array should already be valid. } - +#ifdef VTKM_USE_UNIFIED_MEMORY + ::thrust::cuda::pointer first(this->Pointer); + ::thrust::cuda::pointer last(this->Pointer + this->Length); + return PortalType(first, last); +#else return PortalConstType(this->Array.data(), this->Array.data() + static_cast(this->Array.size())); +#endif } /// Workaround for nvcc 7.5 compiler warning bug. @@ -139,8 +153,14 @@ public: // The data in this->Array should already be valid. } +#ifdef VTKM_USE_UNIFIED_MEMORY + ::thrust::cuda::pointer first(this->Pointer); + ::thrust::cuda::pointer last(this->Pointer + this->Length); + return PortalType(first, last); +#else return PortalType(this->Array.data(), this->Array.data() + static_cast(this->Array.size())); +#endif } /// Workaround for nvcc 7.5 compiler warning bug. @@ -160,20 +180,43 @@ public: { // Resize to 0 first so that you don't have to copy data when resizing // to a larger size. +#ifdef VTKM_USE_UNIFIED_MEMORY + if (this->Pointer) { cudaFree(this->Pointer); this->Pointer = 0; this->Length = 0; } +#else this->Array.clear(); +#endif } try { +#ifdef VTKM_USE_UNIFIED_MEMORY + if (numberOfValues != this->GetNumberOfValues()) + { + ValueType* temp; + cudaError_t r = cudaMallocManaged(&(temp), numberOfValues*sizeof(ValueType)); + if (r == cudaErrorMemoryAllocation) throw std::bad_alloc(); + if (numberOfValues <= this->Length) ::thrust::copy(this->Pointer, this->Pointer + numberOfValues, temp); + if (this->Pointer) { cudaFree(this->Pointer); this->Pointer = 0; this->Length = 0; } + this->Pointer = temp; + + this->Length = numberOfValues; + } +#else this->Array.resize(static_cast(numberOfValues)); +#endif } catch (std::bad_alloc error) { throw vtkm::cont::ErrorControlBadAllocation(error.what()); } - +#ifdef VTKM_USE_UNIFIED_MEMORY + ::thrust::cuda::pointer first(this->Pointer); + ::thrust::cuda::pointer last(this->Pointer + this->Length); + return PortalType(first, last); +#else return PortalType(this->Array.data(), this->Array.data() + static_cast(this->Array.size())); +#endif } /// Workaround for nvcc 7.5 compiler warning bug. @@ -190,14 +233,23 @@ public: VTKM_CONT void RetrieveOutputData(StorageType *storage) const { +#ifdef VTKM_USE_UNIFIED_MEMORY + storage->Allocate(this->Length); +#else storage->Allocate(static_cast(this->Array.size())); +#endif try - { + { +#ifdef VTKM_USE_UNIFIED_MEMORY + cudaDeviceSynchronize(); + ::thrust::copy(this->Pointer, this->Pointer + this->Length, vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal())); +#else ::thrust::copy( - this->Array.data(), - this->Array.data() + static_cast(this->Array.size()), - vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal())); - } + this->Array.data(), + this->Array.data() + static_cast(this->Array.size()), + vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal())); +#endif + } catch (...) { vtkm::cont::cuda::internal::throwAsVTKmException(); @@ -211,10 +263,14 @@ public: template VTKM_CONT void CopyInto(IteratorTypeControl dest) const { +#ifdef VTKM_USE_UNIFIED_MEMORY + ::thrust::copy(this->Pointer, this->Pointer + this->Length, dest); +#else ::thrust::copy( this->Array.data(), this->Array.data() + static_cast(this->Array.size()), dest); +#endif } /// Resizes the device vector. @@ -223,9 +279,27 @@ public: { // The operation will succeed even if this assertion fails, but this // is still supposed to be a precondition to Shrink. - VTKM_ASSERT(numberOfValues <= static_cast(this->Array.size())); +#ifdef VTKM_USE_UNIFIED_MEMORY + VTKM_ASSERT(numberOfValues <= static_cast(this->Length)); + try + { + ValueType* temp; + cudaError_t r = cudaMallocManaged(&(temp), numberOfValues*sizeof(ValueType)); + if (r == cudaErrorMemoryAllocation) throw std::bad_alloc(); + if (this->Length > 0) ::thrust::copy(this->Pointer, this->Pointer + numberOfValues, temp); + if (this->Pointer) cudaFree(this->Pointer); + this->Pointer = temp; + this->Length = numberOfValues; + } + catch (std::bad_alloc error) + { + throw vtkm::cont::ErrorControlBadAllocation(error.what()); + } +#else + VTKM_ASSERT(numberOfValues <= static_cast(this->Array.size())); this->Array.resize(static_cast(numberOfValues)); +#endif } @@ -233,8 +307,12 @@ public: /// VTKM_CONT void ReleaseResources() { +#ifdef VTKM_USE_UNIFIED_MEMORY + if (this->Pointer) { cudaFree(this->Pointer); this->Pointer = 0; this->Length = 0; } +#else this->Array.clear(); this->Array.shrink_to_fit(); +#endif } private: @@ -246,17 +324,31 @@ private: StorageType *Storage; +#ifdef VTKM_USE_UNIFIED_MEMORY + ValueType *Pointer; + vtkm::Id Length; +#else ::thrust::system::cuda::vector > Array; +#endif VTKM_CONT void CopyToExecution() { try { +#ifdef VTKM_USE_UNIFIED_MEMORY + cudaError_t r = cudaMallocManaged(&(this->Pointer), (this->Storage->GetNumberOfValues())*sizeof(ValueType)); + if (r == cudaErrorMemoryAllocation) throw std::bad_alloc(); + ::thrust::copy(vtkm::cont::ArrayPortalToIteratorBegin(this->Storage->GetPortalConst()), + vtkm::cont::ArrayPortalToIteratorEnd(this->Storage->GetPortalConst()), + this->Pointer); + this->Length = this->Storage->GetNumberOfValues(); +#else this->Array.assign( vtkm::cont::ArrayPortalToIteratorBegin(this->Storage->GetPortalConst()), vtkm::cont::ArrayPortalToIteratorEnd(this->Storage->GetPortalConst())); +#endif } catch (...) { From dfae8f5e3e00519169c4c02ab9e29546562777c8 Mon Sep 17 00:00:00 2001 From: Christopher Sewell Date: Wed, 18 Jan 2017 11:47:52 -0700 Subject: [PATCH 2/3] Fixing merge issue with contour_tree --- examples/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index dd49bb824..95edf8f5c 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -24,6 +24,7 @@ set(CMAKE_PREFIX_PATH ${VTKm_BINARY_DIR}/${VTKm_INSTALL_CONFIG_DIR}) add_subdirectory(clipping) +add_subdirectory(contour_tree) add_subdirectory(demo) add_subdirectory(dynamic_dispatcher) add_subdirectory(hello_world) From 835073dae2cb8a5fe898e57ac2328f1de9927a4a Mon Sep 17 00:00:00 2001 From: Li-Ta Lo - 194699 Date: Mon, 13 Feb 2017 11:45:17 -0700 Subject: [PATCH 3/3] clean up with custom allocator --- .../ArrayManagerExecutionThrustDevice.h | 116 ++++-------------- 1 file changed, 21 insertions(+), 95 deletions(-) diff --git a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h index 526a62431..3dea48334 100644 --- a/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h +++ b/vtkm/cont/cuda/internal/ArrayManagerExecutionThrustDevice.h @@ -56,6 +56,21 @@ template { // no-op } + +#ifdef VTKM_USE_UNIFIED_MEMORY + thrust::device_ptr allocate(std::size_t num) + { + T* temp; + cudaError_t r = cudaMallocManaged(&(temp), num*sizeof(T)); + if (r == cudaErrorMemoryAllocation) + throw std::bad_alloc(); + return thrust::device_ptr(temp); + } + void deallocate(thrust::device_ptr p, std::size_t vtkmNotUsed(num)) + { + cudaFree(p.get()); + } +#endif }; /// \c ArrayManagerExecutionThrustDevice provides an implementation for a \c @@ -77,13 +92,9 @@ public: typedef vtkm::exec::cuda::internal::ConstArrayPortalFromThrust< T > PortalConstType; VTKM_CONT -#ifdef VTKM_USE_UNIFIED_MEMORY - ArrayManagerExecutionThrustDevice(StorageType *storage) - : Storage(storage), Pointer(0), Length(0) -#else ArrayManagerExecutionThrustDevice(StorageType *storage) : Storage(storage), Array() -#endif + { } @@ -98,11 +109,7 @@ public: /// VTKM_CONT vtkm::Id GetNumberOfValues() const { -#ifdef VTKM_USE_UNIFIED_MEMORY - return this->Length; -#else return static_cast(this->Array.size()); -#endif } /// Allocates the appropriate size of the array and copies the given data @@ -119,15 +126,8 @@ public: { // The data in this->Array should already be valid. } - -#ifdef VTKM_USE_UNIFIED_MEMORY - ::thrust::cuda::pointer first(this->Pointer); - ::thrust::cuda::pointer last(this->Pointer + this->Length); - return PortalType(first, last); -#else return PortalConstType(this->Array.data(), this->Array.data() + static_cast(this->Array.size())); -#endif } /// Workaround for nvcc 7.5 compiler warning bug. @@ -152,15 +152,8 @@ public: { // The data in this->Array should already be valid. } - -#ifdef VTKM_USE_UNIFIED_MEMORY - ::thrust::cuda::pointer first(this->Pointer); - ::thrust::cuda::pointer last(this->Pointer + this->Length); - return PortalType(first, last); -#else return PortalType(this->Array.data(), this->Array.data() + static_cast(this->Array.size())); -#endif } /// Workaround for nvcc 7.5 compiler warning bug. @@ -180,43 +173,19 @@ public: { // Resize to 0 first so that you don't have to copy data when resizing // to a larger size. -#ifdef VTKM_USE_UNIFIED_MEMORY - if (this->Pointer) { cudaFree(this->Pointer); this->Pointer = 0; this->Length = 0; } -#else this->Array.clear(); -#endif } try { -#ifdef VTKM_USE_UNIFIED_MEMORY - if (numberOfValues != this->GetNumberOfValues()) - { - ValueType* temp; - cudaError_t r = cudaMallocManaged(&(temp), numberOfValues*sizeof(ValueType)); - if (r == cudaErrorMemoryAllocation) throw std::bad_alloc(); - if (numberOfValues <= this->Length) ::thrust::copy(this->Pointer, this->Pointer + numberOfValues, temp); - if (this->Pointer) { cudaFree(this->Pointer); this->Pointer = 0; this->Length = 0; } - this->Pointer = temp; - - this->Length = numberOfValues; - } -#else this->Array.resize(static_cast(numberOfValues)); -#endif } catch (std::bad_alloc error) { throw vtkm::cont::ErrorControlBadAllocation(error.what()); } -#ifdef VTKM_USE_UNIFIED_MEMORY - ::thrust::cuda::pointer first(this->Pointer); - ::thrust::cuda::pointer last(this->Pointer + this->Length); - return PortalType(first, last); -#else return PortalType(this->Array.data(), this->Array.data() + static_cast(this->Array.size())); -#endif } /// Workaround for nvcc 7.5 compiler warning bug. @@ -233,22 +202,17 @@ public: VTKM_CONT void RetrieveOutputData(StorageType *storage) const { -#ifdef VTKM_USE_UNIFIED_MEMORY - storage->Allocate(this->Length); -#else storage->Allocate(static_cast(this->Array.size())); -#endif try { #ifdef VTKM_USE_UNIFIED_MEMORY cudaDeviceSynchronize(); - ::thrust::copy(this->Pointer, this->Pointer + this->Length, vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal())); -#else +#endif ::thrust::copy( this->Array.data(), this->Array.data() + static_cast(this->Array.size()), vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal())); -#endif + } catch (...) { @@ -263,14 +227,10 @@ public: template VTKM_CONT void CopyInto(IteratorTypeControl dest) const { -#ifdef VTKM_USE_UNIFIED_MEMORY - ::thrust::copy(this->Pointer, this->Pointer + this->Length, dest); -#else ::thrust::copy( this->Array.data(), this->Array.data() + static_cast(this->Array.size()), dest); -#endif } /// Resizes the device vector. @@ -279,27 +239,8 @@ public: { // The operation will succeed even if this assertion fails, but this // is still supposed to be a precondition to Shrink. -#ifdef VTKM_USE_UNIFIED_MEMORY - VTKM_ASSERT(numberOfValues <= static_cast(this->Length)); - - try - { - ValueType* temp; - cudaError_t r = cudaMallocManaged(&(temp), numberOfValues*sizeof(ValueType)); - if (r == cudaErrorMemoryAllocation) throw std::bad_alloc(); - if (this->Length > 0) ::thrust::copy(this->Pointer, this->Pointer + numberOfValues, temp); - if (this->Pointer) cudaFree(this->Pointer); - this->Pointer = temp; - this->Length = numberOfValues; - } - catch (std::bad_alloc error) - { - throw vtkm::cont::ErrorControlBadAllocation(error.what()); - } -#else VTKM_ASSERT(numberOfValues <= static_cast(this->Array.size())); this->Array.resize(static_cast(numberOfValues)); -#endif } @@ -307,12 +248,8 @@ public: /// VTKM_CONT void ReleaseResources() { -#ifdef VTKM_USE_UNIFIED_MEMORY - if (this->Pointer) { cudaFree(this->Pointer); this->Pointer = 0; this->Length = 0; } -#else this->Array.clear(); this->Array.shrink_to_fit(); -#endif } private: @@ -324,31 +261,20 @@ private: StorageType *Storage; -#ifdef VTKM_USE_UNIFIED_MEMORY - ValueType *Pointer; - vtkm::Id Length; -#else ::thrust::system::cuda::vector > Array; -#endif + VTKM_CONT void CopyToExecution() { try { -#ifdef VTKM_USE_UNIFIED_MEMORY - cudaError_t r = cudaMallocManaged(&(this->Pointer), (this->Storage->GetNumberOfValues())*sizeof(ValueType)); - if (r == cudaErrorMemoryAllocation) throw std::bad_alloc(); - ::thrust::copy(vtkm::cont::ArrayPortalToIteratorBegin(this->Storage->GetPortalConst()), - vtkm::cont::ArrayPortalToIteratorEnd(this->Storage->GetPortalConst()), - this->Pointer); - this->Length = this->Storage->GetNumberOfValues(); -#else + this->Array.assign( vtkm::cont::ArrayPortalToIteratorBegin(this->Storage->GetPortalConst()), vtkm::cont::ArrayPortalToIteratorEnd(this->Storage->GetPortalConst())); -#endif + } catch (...) {