new unified memory effort with the new Thrust device

This commit is contained in:
Li-Ta Lo - 194699 2017-03-09 14:51:45 -07:00
commit b470175f98
6 changed files with 287 additions and 0 deletions

@ -108,6 +108,18 @@ option(VTKm_USE_DOUBLE_PRECISION
)
option(VTKm_USE_64BIT_IDS "Use 64-bit indices." ON)
if (VTKm_ENABLE_CUDA)
option(VTKm_USE_UNIFIED_MEMORY
"Use CUDA unified memory"
OFF
)
endif (VTKm_ENABLE_CUDA)
if (VTKm_USE_UNIFIED_MEMORY)
set(CMAKE_CXX_FLAGS "-DVTKM_USE_UNIFIED_MEMORY ${CMAKE_CXX_FLAGS}")
endif()
option(BUILD_SHARED_LIBS "Build VTK-m with shared libraries" ON)
set(VTKm_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})

@ -35,4 +35,5 @@ add_subdirectory(tetrahedra)
if(VTKm_ENABLE_RENDERING)
add_subdirectory(rendering)
endif()
add_subdirectory(unified_memory)

@ -0,0 +1,36 @@
##=============================================================================
##
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2015 Sandia Corporation.
## Copyright 2015 UT-Battelle, LLC.
## Copyright 2015 Los Alamos National Security.
##
## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
## the U.S. Government retains certain rights in this software.
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##
##=============================================================================
#Find the VTK-m package
find_package(VTKm REQUIRED QUIET
OPTIONAL_COMPONENTS Serial CUDA TBB OpenGL GLUT
)
if(VTKm_CUDA_FOUND)
vtkm_disable_troublesome_thrust_warnings()
# Cuda compiles do not respect target_include_directories
cuda_include_directories(${VTKm_INCLUDE_DIRS})
cuda_add_executable(UnifiedMemory_CUDA UnifiedMemory.cu)
target_link_libraries(UnifiedMemory_CUDA PRIVATE ${VTKm_LIBRARIES})
target_compile_options(UnifiedMemory_CUDA PRIVATE ${VTKm_COMPILE_OPTIONS})
endif()

@ -0,0 +1,223 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_CUDA
#include <vtkm/cont/ArrayHandleStreaming.h>
#include <vtkm/worklet/DispatcherStreamingMapField.h>
#include <vtkm/filter/MarchingCubes.h>
#include <vtkm/worklet/DispatcherMapField.h>
#include <vtkm/Math.h>
#include <vtkm/cont/ArrayHandleCounting.h>
#include <vtkm/cont/CellSetExplicit.h>
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/Timer.h>
namespace {
// Define the tangle field for the input data
class TangleField : public vtkm::worklet::WorkletMapField
{
public:
typedef void ControlSignature(FieldIn<IdType> vertexId, FieldOut<Scalar> v);
typedef void ExecutionSignature(_1, _2);
typedef _1 InputDomain;
const vtkm::Id xdim, ydim, zdim;
const vtkm::Float32 xmin, ymin, zmin, xmax, ymax, zmax;
const vtkm::Id cellsPerLayer;
VTKM_CONT
TangleField(const vtkm::Id3 dims, const vtkm::Float32 mins[3], const vtkm::Float32 maxs[3]) : xdim(dims[0]), ydim(dims[1]), zdim(dims[2]),
xmin(mins[0]), ymin(mins[1]), zmin(mins[2]), xmax(maxs[0]), ymax(maxs[1]), zmax(maxs[2]), cellsPerLayer((xdim) * (ydim)) { };
VTKM_EXEC
void operator()(const vtkm::Id &vertexId, vtkm::Float32 &v) const
{
const vtkm::Id x = vertexId % (xdim);
const vtkm::Id y = (vertexId / (xdim)) % (ydim);
const vtkm::Id z = vertexId / cellsPerLayer;
const vtkm::Float32 fx = static_cast<vtkm::Float32>(x) / static_cast<vtkm::Float32>(xdim-1);
const vtkm::Float32 fy = static_cast<vtkm::Float32>(y) / static_cast<vtkm::Float32>(xdim-1);
const vtkm::Float32 fz = static_cast<vtkm::Float32>(z) / static_cast<vtkm::Float32>(xdim-1);
const vtkm::Float32 xx = 3.0f*(xmin+(xmax-xmin)*(fx));
const vtkm::Float32 yy = 3.0f*(ymin+(ymax-ymin)*(fy));
const vtkm::Float32 zz = 3.0f*(zmin+(zmax-zmin)*(fz));
v = (xx*xx*xx*xx - 5.0f*xx*xx + yy*yy*yy*yy - 5.0f*yy*yy + zz*zz*zz*zz - 5.0f*zz*zz + 11.8f) * 0.2f + 0.5f;
}
};
// Construct an input data set using the tangle field worklet
vtkm::cont::DataSet MakeIsosurfaceTestDataSet(vtkm::Id3 dims)
{
vtkm::cont::DataSet dataSet;
const vtkm::Id3 vdims(dims[0] + 1, dims[1] + 1, dims[2] + 1);
vtkm::Float32 mins[3] = {-1.0f, -1.0f, -1.0f};
vtkm::Float32 maxs[3] = {1.0f, 1.0f, 1.0f};
vtkm::cont::ArrayHandle<vtkm::Float32> fieldArray;
vtkm::cont::ArrayHandleCounting<vtkm::Id> vertexCountImplicitArray(0, 1, vdims[0]*vdims[1]*vdims[2]);
vtkm::worklet::DispatcherMapField<TangleField> tangleFieldDispatcher(TangleField(vdims, mins, maxs));
tangleFieldDispatcher.Invoke(vertexCountImplicitArray, fieldArray);
vtkm::Vec<vtkm::FloatDefault,3> origin(0.0f, 0.0f, 0.0f);
vtkm::Vec<vtkm::FloatDefault,3> spacing(
1.0f/static_cast<vtkm::FloatDefault>(dims[0]),
1.0f/static_cast<vtkm::FloatDefault>(dims[2]),
1.0f/static_cast<vtkm::FloatDefault>(dims[1]));
vtkm::cont::ArrayHandleUniformPointCoordinates
coordinates(vdims, origin, spacing);
dataSet.AddCoordinateSystem(
vtkm::cont::CoordinateSystem("coordinates", coordinates));
dataSet.AddField(vtkm::cont::Field("nodevar", vtkm::cont::Field::ASSOC_POINTS, fieldArray));
static const vtkm::IdComponent ndim = 3;
vtkm::cont::CellSetStructured<ndim> cellSet("cells");
cellSet.SetPointDimensions(vdims);
dataSet.AddCellSet(cellSet);
return dataSet;
}
}
namespace vtkm
{
namespace worklet
{
class SineWorklet : public vtkm::worklet::WorkletMapField
{
public:
typedef void ControlSignature(FieldIn<>, FieldOut<>);
typedef _2 ExecutionSignature(_1, WorkIndex);
VTKM_EXEC
vtkm::Float32 operator()(vtkm::Int64 x, vtkm::Id& index) const {
return (vtkm::Sin(1.0*x));
}
};
}
}
// Run a simple worklet, and compute an isosurface
int main(int argc, char* argv[])
{
vtkm::Int64 N = 1024*1024*1024;
if (argc > 1) N = N*atoi(argv[1]);
else N = N*4;
std::cout << "Testing streaming worklet with size " << N << std::endl;
vtkm::cont::ArrayHandle<vtkm::Int64> input;
vtkm::cont::ArrayHandle<vtkm::Float32> output;
std::vector<vtkm::Int64> data(N);
for (vtkm::Int64 i=0; i<N; i++) data[i] = i;
input = vtkm::cont::make_ArrayHandle(data);
typedef vtkm::cont::DeviceAdapterAlgorithm<VTKM_DEFAULT_DEVICE_ADAPTER_TAG> DeviceAlgorithms;
vtkm::worklet::SineWorklet sineWorklet;
#ifdef VTKM_USE_UNIFIED_MEMORY
std::cout << "Testing with unified memory" << std::endl;
vtkm::worklet::DispatcherMapField<vtkm::worklet::SineWorklet>
dispatcher(sineWorklet);
vtkm::cont::Timer<> timer;
dispatcher.Invoke(input, output);
std::cout << output.GetPortalConstControl().Get(output.GetNumberOfValues()-1) << std::endl;
vtkm::Float64 elapsedTime = timer.GetElapsedTime();
std::cout << "Time: " << elapsedTime << std::endl;
#else
vtkm::worklet::DispatcherStreamingMapField<vtkm::worklet::SineWorklet>
dispatcher(sineWorklet);
vtkm::Id NBlocks = N/(1024*1024*1024);
NBlocks *= 2;
dispatcher.SetNumberOfBlocks(NBlocks);
std::cout << "Testing with streaming (without unified memory) with " << NBlocks << " blocks" << std::endl;
vtkm::cont::Timer<> timer;
dispatcher.Invoke(input, output);
std::cout << output.GetPortalConstControl().Get(output.GetNumberOfValues()-1) << std::endl;
vtkm::Float64 elapsedTime = timer.GetElapsedTime();
std::cout << "Time: " << elapsedTime << std::endl;
#endif
int dim = 128;
if (argc > 2) dim = atoi(argv[2]);
std::cout << "Testing Marching Cubes with size " << dim << "x" << dim << "x" << dim << std::endl;
vtkm::Id3 dims(dim, dim, dim);
vtkm::cont::ArrayHandle<vtkm::Vec<vtkm::Float32,3> > verticesArray, normalsArray;
vtkm::cont::ArrayHandle<vtkm::Float32> scalarsArray;
vtkm::cont::DataSet dataSet = MakeIsosurfaceTestDataSet(dims);
vtkm::filter::MarchingCubes filter;
filter.SetGenerateNormals(true);
filter.SetMergeDuplicatePoints( false );
filter.SetIsoValue( 0.5 );
vtkm::filter::ResultDataSet result =
filter.Execute( dataSet, dataSet.GetField("nodevar") );
filter.MapFieldOntoOutput(result, dataSet.GetField("nodevar"));
//need to extract vertices, normals, and scalars
vtkm::cont::DataSet& outputData = result.GetDataSet();
typedef vtkm::cont::ArrayHandle< vtkm::Vec<vtkm::Float32,3> > VertType;
vtkm::cont::CoordinateSystem coords = outputData.GetCoordinateSystem();
verticesArray = coords.GetData().Cast<VertType>();
normalsArray = outputData.GetField("normals").GetData().Cast<VertType>();
scalarsArray = outputData.GetField("nodevar").GetData().Cast< vtkm::cont::ArrayHandle<vtkm::Float32> >();
std::cout << "Number of output vertices: " << verticesArray.GetNumberOfValues() << std::endl;
std::cout << "vertices: ";
vtkm::cont::printSummary_ArrayHandle(verticesArray, std::cout);
std::cout << std::endl;
std::cout << "normals: ";
vtkm::cont::printSummary_ArrayHandle(normalsArray, std::cout);
std::cout << std::endl;
std::cout << "scalars: ";
vtkm::cont::printSummary_ArrayHandle(scalarsArray, std::cout);
std::cout << std::endl;
return 0;
}

@ -221,6 +221,7 @@ private:
dispatcher.Invoke( this->InputHandle,
cellSet,
result);
//result.SyncControlArray();
return timer.GetElapsedTime();
}
@ -257,6 +258,7 @@ private:
dispatcher.Invoke( dinput,
cellSet,
result);
//result.SyncControlArray();
return timer.GetElapsedTime();
}
@ -301,6 +303,7 @@ private:
dispatcher.Invoke( this->InputHandle,
cellSet,
result);
//result.SyncControlArray();
return timer.GetElapsedTime();
}
@ -336,6 +339,7 @@ private:
dispatcher.Invoke( dinput,
cellSet,
result);
//result.SyncControlArray();
return timer.GetElapsedTime();
}
@ -385,6 +389,7 @@ private:
dispatcher.Invoke( dinput,
cellSet,
result);
//result.SyncControlArray();
return timer.GetElapsedTime();
}
@ -418,6 +423,7 @@ private:
dispatcher.Invoke( this->InputHandle,
cellSet,
result);
//result.SyncControlArray();
return timer.GetElapsedTime();
}

@ -175,7 +175,12 @@ public:
try
{
ValueType *tmp;
#ifdef VTKM_USE_UNIFIED_MEMORY
VTKM_CUDA_CALL(cudaMallocManaged(&tmp, bufferSize));
cudaMemAdvise(tmp, bufferSize, cudaMemAdviseSetPreferredLocation, 0);
#else
VTKM_CUDA_CALL(cudaMalloc(&tmp, bufferSize));
#endif
this->Begin = PointerType(tmp);
}
catch (const std::exception &error)
@ -210,6 +215,9 @@ public:
storage->Allocate(this->GetNumberOfValues());
try
{
#ifdef VTKM_USE_UNIFIED_MEMORY
cudaDeviceSynchronize();
#endif
::thrust::copy(this->Begin, this->End,
vtkm::cont::ArrayPortalToIteratorBegin(storage->GetPortal()));
}
@ -267,6 +275,7 @@ private:
PointerType End;
PointerType Capacity;
VTKM_CONT
void CopyToExecution()
{