Address reviewer comments and suggestions.

This commit is contained in:
Dave Pugmire 2022-08-11 13:43:49 -04:00
parent 7e945bb145
commit 3920806a66
6 changed files with 44 additions and 41 deletions

@ -18,10 +18,6 @@
#include <cstdlib>
#include <iostream>
int rank = 0;
int numRanks = 1;
struct Options
{
public:
@ -38,8 +34,6 @@ public:
std::string MapField = "";
int IsoLevels = 1;
std::vector<double> IsoValues;
int Rank = 0;
int NumRanks = 1;
std::string ThreadMode = "serial";
bool SyncMemAlloc = true;
int NumTasks = 0;
@ -172,8 +166,7 @@ public:
if ((!this->Tangle && (this->DataFile == "" || this->ThreadMode == "")) || this->Field == "")
{
if (this->Rank == 0)
std::cerr << "Error in options" << std::endl;
std::cerr << "Error in options" << std::endl;
return false;
}
@ -218,12 +211,12 @@ int main(int argc, char** argv)
{
if (opts.SyncMemAlloc)
{
CudaAllocator::ForceSyncMemoryAllocator();
vtkm::cont::cuda::internal::CudaAllocator::ForceSyncMemoryAllocator();
std::cout << " Task: Sync memory alloc = ON" << std::endl;
}
else
{
CudaAllocator::ForceAsyncMemoryAllocator();
vtkm::cont::cuda::internal::CudaAllocator::ForceAsyncMemoryAllocator();
std::cout << " Task: Sync memory alloc = OFF" << std::endl;
}
}

@ -48,7 +48,7 @@ static bool ManagedMemoryEnabled = false;
static bool HardwareSupportsManagedMemory = false;
// True if using syncronous memory allocator. Managed memory must be off to use this.
static bool UseSyncMemoryAlloc = true;
static thread_local bool UseSyncMemoryAlloc = true;
// Avoid overhead of cudaMemAdvise and cudaMemPrefetchAsync for small buffers.
// This value should be > 0 or else these functions will error out.
@ -106,11 +106,6 @@ void CudaAllocator::ForceManagedMemoryOn()
}
}
bool CudaAllocator::UsingSyncMemoryAllocator()
{
return UseSyncMemoryAlloc;
}
void CudaAllocator::ForceSyncMemoryAllocator()
{
UseSyncMemoryAlloc = true;
@ -190,22 +185,21 @@ void* CudaAllocator::Allocate(std::size_t numBytes)
}
void* ptr = nullptr;
if (ManagedMemoryEnabled)
#if CUDART_VERSION >= 11030
if (!UseSyncMemoryAlloc)
{
VTKM_CUDA_CALL(cudaMallocAsync(&ptr, numBytes, cudaStreamPerThread));
}
else
#endif
if (ManagedMemoryEnabled)
{
VTKM_CUDA_CALL(cudaMallocManaged(&ptr, numBytes));
}
else
{
if (UseSyncMemoryAlloc)
{
VTKM_CUDA_CALL(cudaMalloc(&ptr, numBytes));
}
else
{
#if CUDART_VERSION >= 11030
VTKM_CUDA_CALL(cudaMallocAsync(&ptr, numBytes, cudaStreamPerThread));
#endif
}
VTKM_CUDA_CALL(cudaMalloc(&ptr, numBytes));
}
{
@ -251,6 +245,8 @@ void CudaAllocator::Free(void* ptr)
{
#if CUDART_VERSION >= 11030
VTKM_CUDA_CALL(cudaFreeAsync(ptr, cudaStreamPerThread));
#else
VTKM_CUDA_CALL(cudaFree(ptr));
#endif
}
}

@ -40,11 +40,6 @@ struct VTKM_CONT_EXPORT CudaAllocator
/// VTK-m will ignore the request and continue to use unmanaged memory (aka cudaMalloc).
static VTKM_CONT void ForceManagedMemoryOn();
static VTKM_CONT bool UsingSyncMemoryAllocator();
static VTKM_CONT bool UsingAsyncMemoryAllocator()
{
return !CudaAllocator::UsingSyncMemoryAllocator();
}
static VTKM_CONT void ForceSyncMemoryAllocator();
static VTKM_CONT void ForceAsyncMemoryAllocator();

@ -20,12 +20,17 @@ namespace vtkm
{
namespace filter
{
namespace
{
void RunFilter(NewFilter* self,
vtkm::filter::DataSetQueue& input,
vtkm::filter::DataSetQueue& output)
NewFilter::~NewFilter() = default;
void NewFilter::RunFilter(NewFilter* self,
vtkm::filter::DataSetQueue& input,
vtkm::filter::DataSetQueue& output)
{
#ifdef VTKM_CUDA
vtkm::cont::cuda::internal::CudaAllocator::ForceSyncMemoryAllocator();
#endif
std::pair<vtkm::Id, vtkm::cont::DataSet> task;
while (input.GetTask(task))
{
@ -35,9 +40,7 @@ void RunFilter(NewFilter* self,
vtkm::cont::Algorithm::Synchronize();
}
} // anonymous namespace
NewFilter::~NewFilter() = default;
bool NewFilter::CanThread() const
{
@ -61,8 +64,11 @@ vtkm::cont::PartitionedDataSet NewFilter::DoExecutePartitions(
std::vector<std::future<void>> futures(static_cast<std::size_t>(numThreads));
for (std::size_t i = 0; i < static_cast<std::size_t>(numThreads); i++)
{
auto f = std::async(
std::launch::async, RunFilter, this, std::ref(inputQueue), std::ref(outputQueue));
auto f = std::async(std::launch::async,
vtkm::filter::NewFilter::RunFilter,
this,
std::ref(inputQueue),
std::ref(outputQueue));
futures[i] = std::move(f);
}

@ -17,6 +17,7 @@
#include <vtkm/cont/PartitionedDataSet.h>
#include <vtkm/filter/FieldSelection.h>
#include <vtkm/filter/TaskQueue.h>
#include <vtkm/filter/vtkm_filter_core_export.h>
namespace vtkm
@ -228,6 +229,11 @@ public:
VTKM_CONT
void SetThreadsPerGPU(vtkm::Id numThreads) { this->NumThreadsPerGPU = numThreads; }
VTKM_CONT
vtkm::Id SetThreadsPerCPU() const { return this->NumThreadsPerCPU; }
VTKM_CONT
vtkm::Id SetThreadsPerGPU() const { return this->NumThreadsPerGPU; }
VTKM_CONT
bool GetRunMultiThreadedFilter() const
{
@ -440,6 +446,11 @@ private:
}
}
VTKM_CONT
static void RunFilter(NewFilter* self,
vtkm::filter::DataSetQueue& input,
vtkm::filter::DataSetQueue& output);
VTKM_CONT
virtual vtkm::Id DetermineNumberOfThreads(const vtkm::cont::PartitionedDataSet& input);

@ -103,7 +103,9 @@ public:
//Insert them back in the same order.
std::pair<vtkm::Id, vtkm::cont::DataSet> task;
while (this->GetTask(task))
{
dataSets[static_cast<std::size_t>(task.first)] = std::move(task.second);
}
pds.AppendPartitions(dataSets);
}