mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-09-08 21:33:55 +00:00
new unified memory effort with the new Thrust device
This commit is contained in:
commit
b470175f98
@ -108,6 +108,18 @@ option(VTKm_USE_DOUBLE_PRECISION
|
||||
)
|
||||
option(VTKm_USE_64BIT_IDS "Use 64-bit indices." ON)
|
||||
|
||||
if (VTKm_ENABLE_CUDA)
|
||||
option(VTKm_USE_UNIFIED_MEMORY
|
||||
"Use CUDA unified memory"
|
||||
OFF
|
||||
)
|
||||
endif (VTKm_ENABLE_CUDA)
|
||||
|
||||
if (VTKm_USE_UNIFIED_MEMORY)
|
||||
set(CMAKE_CXX_FLAGS "-DVTKM_USE_UNIFIED_MEMORY ${CMAKE_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
|
||||
option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" ON)
|
||||
set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
|
||||
|
||||
|
@ -35,4 +35,5 @@ add_subdirectory(tetrahedra)
|
||||
if(VTKm_ENABLE_RENDERING)
|
||||
add_subdirectory(rendering)
|
||||
endif()
|
||||
add_subdirectory(unified_memory)
|
||||
|
||||
|
36
examples/unified_memory/CMakeLists.txt
Normal file
36
examples/unified_memory/CMakeLists.txt
Normal file
@ -0,0 +1,36 @@
|
||||
##=============================================================================
|
||||
##
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
##
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2015 Sandia Corporation.
|
||||
## Copyright 2015 UT-Battelle, LLC.
|
||||
## Copyright 2015 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##
|
||||
##=============================================================================
|
||||
|
||||
#Find the VTK-m package
|
||||
find_package(VTKm REQUIRED QUIET
|
||||
OPTIONAL_COMPONENTS Serial CUDA TBB OpenGL GLUT
|
||||
)
|
||||
|
||||
if(VTKm_CUDA_FOUND)
|
||||
vtkm_disable_troublesome_thrust_warnings()
|
||||
# Cuda compiles do not respect target_include_directories
|
||||
cuda_include_directories(${VTKm_INCLUDE_DIRS})
|
||||
cuda_add_executable(UnifiedMemory_CUDA UnifiedMemory.cu)
|
||||
target_link_libraries(UnifiedMemory_CUDA PRIVATE ${VTKm_LIBRARIES})
|
||||
target_compile_options(UnifiedMemory_CUDA PRIVATE ${VTKm_COMPILE_OPTIONS})
|
||||
endif()
|
||||
|
223
examples/unified_memory/UnifiedMemory.cu
Normal file
223
examples/unified_memory/UnifiedMemory.cu
Normal file
@ -0,0 +1,223 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2014 Sandia Corporation.
|
||||
// Copyright 2014 UT-Battelle, LLC.
|
||||
// Copyright 2014 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_CUDA
|
||||
|
||||
#include <vtkm/cont/ArrayHandleStreaming.h>
|
||||
#include <vtkm/worklet/DispatcherStreamingMapField.h>
|
||||
#include <vtkm/filter/MarchingCubes.h>
|
||||
#include <vtkm/worklet/DispatcherMapField.h>
|
||||
|
||||
#include <vtkm/Math.h>
|
||||
#include <vtkm/cont/ArrayHandleCounting.h>
|
||||
#include <vtkm/cont/CellSetExplicit.h>
|
||||
#include <vtkm/cont/DataSet.h>
|
||||
#include <vtkm/cont/Timer.h>
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
// Define the tangle field for the input data
|
||||
class TangleField : public vtkm::worklet::WorkletMapField
|
||||
{
|
||||
public:
|
||||
typedef void ControlSignature(FieldIn<IdType> vertexId, FieldOut<Scalar> v);
|
||||
typedef void ExecutionSignature(_1, _2);
|
||||
typedef _1 InputDomain;
|
||||
|
||||
const vtkm::Id xdim, ydim, zdim;
|
||||
const vtkm::Float32 xmin, ymin, zmin, xmax, ymax, zmax;
|
||||
const vtkm::Id cellsPerLayer;
|
||||
|
||||
VTKM_CONT
|
||||
TangleField(const vtkm::Id3 dims, const vtkm::Float32 mins[3], const vtkm::Float32 maxs[3]) : xdim(dims[0]), ydim(dims[1]), zdim(dims[2]),
|
||||
xmin(mins[0]), ymin(mins[1]), zmin(mins[2]), xmax(maxs[0]), ymax(maxs[1]), zmax(maxs[2]), cellsPerLayer((xdim) * (ydim)) { };
|
||||
|
||||
VTKM_EXEC
|
||||
void operator()(const vtkm::Id &vertexId, vtkm::Float32 &v) const
|
||||
{
|
||||
const vtkm::Id x = vertexId % (xdim);
|
||||
const vtkm::Id y = (vertexId / (xdim)) % (ydim);
|
||||
const vtkm::Id z = vertexId / cellsPerLayer;
|
||||
|
||||
const vtkm::Float32 fx = static_cast<vtkm::Float32>(x) / static_cast<vtkm::Float32>(xdim-1);
|
||||
const vtkm::Float32 fy = static_cast<vtkm::Float32>(y) / static_cast<vtkm::Float32>(xdim-1);
|
||||
const vtkm::Float32 fz = static_cast<vtkm::Float32>(z) / static_cast<vtkm::Float32>(xdim-1);
|
||||
|
||||
const vtkm::Float32 xx = 3.0f*(xmin+(xmax-xmin)*(fx));
|
||||
const vtkm::Float32 yy = 3.0f*(ymin+(ymax-ymin)*(fy));
|
||||
const vtkm::Float32 zz = 3.0f*(zmin+(zmax-zmin)*(fz));
|
||||
|
||||
v = (xx*xx*xx*xx - 5.0f*xx*xx + yy*yy*yy*yy - 5.0f*yy*yy + zz*zz*zz*zz - 5.0f*zz*zz + 11.8f) * 0.2f + 0.5f;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Construct an input data set using the tangle field worklet
|
||||
vtkm::cont::DataSet MakeIsosurfaceTestDataSet(vtkm::Id3 dims)
|
||||
{
|
||||
vtkm::cont::DataSet dataSet;
|
||||
|
||||
const vtkm::Id3 vdims(dims[0] + 1, dims[1] + 1, dims[2] + 1);
|
||||
|
||||
vtkm::Float32 mins[3] = {-1.0f, -1.0f, -1.0f};
|
||||
vtkm::Float32 maxs[3] = {1.0f, 1.0f, 1.0f};
|
||||
|
||||
vtkm::cont::ArrayHandle<vtkm::Float32> fieldArray;
|
||||
vtkm::cont::ArrayHandleCounting<vtkm::Id> vertexCountImplicitArray(0, 1, vdims[0]*vdims[1]*vdims[2]);
|
||||
vtkm::worklet::DispatcherMapField<TangleField> tangleFieldDispatcher(TangleField(vdims, mins, maxs));
|
||||
tangleFieldDispatcher.Invoke(vertexCountImplicitArray, fieldArray);
|
||||
|
||||
vtkm::Vec<vtkm::FloatDefault,3> origin(0.0f, 0.0f, 0.0f);
|
||||
vtkm::Vec<vtkm::FloatDefault,3> spacing(
|
||||
1.0f/static_cast<vtkm::FloatDefault>(dims[0]),
|
||||
1.0f/static_cast<vtkm::FloatDefault>(dims[2]),
|
||||
1.0f/static_cast<vtkm::FloatDefault>(dims[1]));
|
||||
|
||||
vtkm::cont::ArrayHandleUniformPointCoordinates
|
||||
coordinates(vdims, origin, spacing);
|
||||
dataSet.AddCoordinateSystem(
|
||||
vtkm::cont::CoordinateSystem("coordinates", coordinates));
|
||||
|
||||
dataSet.AddField(vtkm::cont::Field("nodevar", vtkm::cont::Field::ASSOC_POINTS, fieldArray));
|
||||
|
||||
static const vtkm::IdComponent ndim = 3;
|
||||
vtkm::cont::CellSetStructured<ndim> cellSet("cells");
|
||||
cellSet.SetPointDimensions(vdims);
|
||||
dataSet.AddCellSet(cellSet);
|
||||
|
||||
return dataSet;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace worklet
|
||||
{
|
||||
class SineWorklet : public vtkm::worklet::WorkletMapField
|
||||
{
|
||||
public:
|
||||
typedef void ControlSignature(FieldIn<>, FieldOut<>);
|
||||
typedef _2 ExecutionSignature(_1, WorkIndex);
|
||||
|
||||
VTKM_EXEC
|
||||
vtkm::Float32 operator()(vtkm::Int64 x, vtkm::Id& index) const {
|
||||
return (vtkm::Sin(1.0*x));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Run a simple worklet, and compute an isosurface
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
vtkm::Int64 N = 1024*1024*1024;
|
||||
if (argc > 1) N = N*atoi(argv[1]);
|
||||
else N = N*4;
|
||||
std::cout << "Testing streaming worklet with size " << N << std::endl;
|
||||
|
||||
vtkm::cont::ArrayHandle<vtkm::Int64> input;
|
||||
vtkm::cont::ArrayHandle<vtkm::Float32> output;
|
||||
std::vector<vtkm::Int64> data(N);
|
||||
for (vtkm::Int64 i=0; i<N; i++) data[i] = i;
|
||||
input = vtkm::cont::make_ArrayHandle(data);
|
||||
|
||||
typedef vtkm::cont::DeviceAdapterAlgorithm<VTKM_DEFAULT_DEVICE_ADAPTER_TAG> DeviceAlgorithms;
|
||||
vtkm::worklet::SineWorklet sineWorklet;
|
||||
|
||||
#ifdef VTKM_USE_UNIFIED_MEMORY
|
||||
std::cout << "Testing with unified memory" << std::endl;
|
||||
|
||||
vtkm::worklet::DispatcherMapField<vtkm::worklet::SineWorklet>
|
||||
dispatcher(sineWorklet);
|
||||
|
||||
vtkm::cont::Timer<> timer;
|
||||
|
||||
dispatcher.Invoke(input, output);
|
||||
std::cout << output.GetPortalConstControl().Get(output.GetNumberOfValues()-1) << std::endl;
|
||||
|
||||
vtkm::Float64 elapsedTime = timer.GetElapsedTime();
|
||||
std::cout << "Time: " << elapsedTime << std::endl;
|
||||
|
||||
#else
|
||||
|
||||
vtkm::worklet::DispatcherStreamingMapField<vtkm::worklet::SineWorklet>
|
||||
dispatcher(sineWorklet);
|
||||
vtkm::Id NBlocks = N/(1024*1024*1024);
|
||||
NBlocks *= 2;
|
||||
dispatcher.SetNumberOfBlocks(NBlocks);
|
||||
std::cout << "Testing with streaming (without unified memory) with " << NBlocks << " blocks" << std::endl;
|
||||
|
||||
vtkm::cont::Timer<> timer;
|
||||
|
||||
dispatcher.Invoke(input, output);
|
||||
std::cout << output.GetPortalConstControl().Get(output.GetNumberOfValues()-1) << std::endl;
|
||||
|
||||
vtkm::Float64 elapsedTime = timer.GetElapsedTime();
|
||||
std::cout << "Time: " << elapsedTime << std::endl;
|
||||
|
||||
#endif
|
||||
|
||||
int dim = 128;
|
||||
if (argc > 2) dim = atoi(argv[2]);
|
||||
std::cout << "Testing Marching Cubes with size " << dim << "x" << dim << "x" << dim << std::endl;
|
||||
|
||||
vtkm::Id3 dims(dim, dim, dim);
|
||||
vtkm::cont::ArrayHandle<vtkm::Vec<vtkm::Float32,3> > verticesArray, normalsArray;
|
||||
vtkm::cont::ArrayHandle<vtkm::Float32> scalarsArray;
|
||||
vtkm::cont::DataSet dataSet = MakeIsosurfaceTestDataSet(dims);
|
||||
|
||||
vtkm::filter::MarchingCubes filter;
|
||||
filter.SetGenerateNormals(true);
|
||||
filter.SetMergeDuplicatePoints( false );
|
||||
filter.SetIsoValue( 0.5 );
|
||||
vtkm::filter::ResultDataSet result =
|
||||
filter.Execute( dataSet, dataSet.GetField("nodevar") );
|
||||
|
||||
filter.MapFieldOntoOutput(result, dataSet.GetField("nodevar"));
|
||||
|
||||
//need to extract vertices, normals, and scalars
|
||||
vtkm::cont::DataSet& outputData = result.GetDataSet();
|
||||
|
||||
typedef vtkm::cont::ArrayHandle< vtkm::Vec<vtkm::Float32,3> > VertType;
|
||||
vtkm::cont::CoordinateSystem coords = outputData.GetCoordinateSystem();
|
||||
|
||||
verticesArray = coords.GetData().Cast<VertType>();
|
||||
normalsArray = outputData.GetField("normals").GetData().Cast<VertType>();
|
||||
scalarsArray = outputData.GetField("nodevar").GetData().Cast< vtkm::cont::ArrayHandle<vtkm::Float32> >();
|
||||
|
||||
std::cout << "Number of output vertices: " << verticesArray.GetNumberOfValues() << std::endl;
|
||||
|
||||
std::cout << "vertices: ";
|
||||
vtkm::cont::printSummary_ArrayHandle(verticesArray, std::cout);
|
||||
std::cout << std::endl;
|
||||
std::cout << "normals: ";
|
||||
vtkm::cont::printSummary_ArrayHandle(normalsArray, std::cout);
|
||||
std::cout << std::endl;
|
||||
std::cout << "scalars: ";
|
||||
vtkm::cont::printSummary_ArrayHandle(scalarsArray, std::cout);
|
||||
std::cout << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -221,6 +221,7 @@ private:
|
||||
dispatcher.Invoke( this->InputHandle,
|
||||
cellSet,
|
||||
result);
|
||||
//result.SyncControlArray();
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
@ -257,6 +258,7 @@ private:
|
||||
dispatcher.Invoke( dinput,
|
||||
cellSet,
|
||||
result);
|
||||
//result.SyncControlArray();
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
@ -301,6 +303,7 @@ private:
|
||||
dispatcher.Invoke( this->InputHandle,
|
||||
cellSet,
|
||||
result);
|
||||
//result.SyncControlArray();
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
@ -336,6 +339,7 @@ private:
|
||||
dispatcher.Invoke( dinput,
|
||||
cellSet,
|
||||
result);
|
||||
//result.SyncControlArray();
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
@ -385,6 +389,7 @@ private:
|
||||
dispatcher.Invoke( dinput,
|
||||
cellSet,
|
||||
result);
|
||||
//result.SyncControlArray();
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
@ -418,6 +423,7 @@ private:
|
||||
dispatcher.Invoke( this->InputHandle,
|
||||
cellSet,
|
||||
result);
|
||||
//result.SyncControlArray();
|
||||
|
||||
return timer.GetElapsedTime();
|
||||
}
|
||||
|
@ -175,7 +175,12 @@ public:
|
||||
try
|
||||
{
|
||||
ValueType *tmp;
|
||||
#ifdef VTKM_USE_UNIFIED_MEMORY
|
||||
VTKM_CUDA_CALL(cudaMallocManaged(&tmp, bufferSize));
|
||||
cudaMemAdvise(tmp, bufferSize, cudaMemAdviseSetPreferredLocation, 0);
|
||||
#else
|
||||
VTKM_CUDA_CALL(cudaMalloc(&tmp, bufferSize));
|
||||
#endif
|
||||
this->Begin = PointerType(tmp);
|
||||
}
|
||||
catch (const std::exception &error)
|
||||
@ -210,6 +215,9 @@ public:
|
||||
storage->Allocate(this->GetNumberOfValues());
|
||||
try
|
||||
{
|
||||
#ifdef VTKM_USE_UNIFIED_MEMORY
|
||||
cudaDeviceSynchronize();
|
||||
#endif
|
||||
::thrust::copy(this->Begin, this->End,
|
||||
vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal()));
|
||||
}
|
||||
@ -267,6 +275,7 @@ private:
|
||||
PointerType End;
|
||||
PointerType Capacity;
|
||||
|
||||
|
||||
VTKM_CONT
|
||||
void CopyToExecution()
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user