mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-10-05 01:49:02 +00:00
Address reviewer comments and suggestions.
This commit is contained in:
parent
7e945bb145
commit
3920806a66
@ -18,10 +18,6 @@
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
|
||||
int rank = 0;
|
||||
int numRanks = 1;
|
||||
|
||||
|
||||
struct Options
|
||||
{
|
||||
public:
|
||||
@ -38,8 +34,6 @@ public:
|
||||
std::string MapField = "";
|
||||
int IsoLevels = 1;
|
||||
std::vector<double> IsoValues;
|
||||
int Rank = 0;
|
||||
int NumRanks = 1;
|
||||
std::string ThreadMode = "serial";
|
||||
bool SyncMemAlloc = true;
|
||||
int NumTasks = 0;
|
||||
@ -172,8 +166,7 @@ public:
|
||||
|
||||
if ((!this->Tangle && (this->DataFile == "" || this->ThreadMode == "")) || this->Field == "")
|
||||
{
|
||||
if (this->Rank == 0)
|
||||
std::cerr << "Error in options" << std::endl;
|
||||
std::cerr << "Error in options" << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -218,12 +211,12 @@ int main(int argc, char** argv)
|
||||
{
|
||||
if (opts.SyncMemAlloc)
|
||||
{
|
||||
CudaAllocator::ForceSyncMemoryAllocator();
|
||||
vtkm::cont::cuda::internal::CudaAllocator::ForceSyncMemoryAllocator();
|
||||
std::cout << " Task: Sync memory alloc = ON" << std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
CudaAllocator::ForceAsyncMemoryAllocator();
|
||||
vtkm::cont::cuda::internal::CudaAllocator::ForceAsyncMemoryAllocator();
|
||||
std::cout << " Task: Sync memory alloc = OFF" << std::endl;
|
||||
}
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ static bool ManagedMemoryEnabled = false;
|
||||
static bool HardwareSupportsManagedMemory = false;
|
||||
|
||||
// True if using syncronous memory allocator. Managed memory must be off to use this.
|
||||
static bool UseSyncMemoryAlloc = true;
|
||||
static thread_local bool UseSyncMemoryAlloc = true;
|
||||
|
||||
// Avoid overhead of cudaMemAdvise and cudaMemPrefetchAsync for small buffers.
|
||||
// This value should be > 0 or else these functions will error out.
|
||||
@ -106,11 +106,6 @@ void CudaAllocator::ForceManagedMemoryOn()
|
||||
}
|
||||
}
|
||||
|
||||
bool CudaAllocator::UsingSyncMemoryAllocator()
|
||||
{
|
||||
return UseSyncMemoryAlloc;
|
||||
}
|
||||
|
||||
void CudaAllocator::ForceSyncMemoryAllocator()
|
||||
{
|
||||
UseSyncMemoryAlloc = true;
|
||||
@ -190,22 +185,21 @@ void* CudaAllocator::Allocate(std::size_t numBytes)
|
||||
}
|
||||
|
||||
void* ptr = nullptr;
|
||||
if (ManagedMemoryEnabled)
|
||||
#if CUDART_VERSION >= 11030
|
||||
if (!UseSyncMemoryAlloc)
|
||||
{
|
||||
VTKM_CUDA_CALL(cudaMallocAsync(&ptr, numBytes, cudaStreamPerThread));
|
||||
}
|
||||
|
||||
else
|
||||
#endif
|
||||
if (ManagedMemoryEnabled)
|
||||
{
|
||||
VTKM_CUDA_CALL(cudaMallocManaged(&ptr, numBytes));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (UseSyncMemoryAlloc)
|
||||
{
|
||||
VTKM_CUDA_CALL(cudaMalloc(&ptr, numBytes));
|
||||
}
|
||||
else
|
||||
{
|
||||
#if CUDART_VERSION >= 11030
|
||||
VTKM_CUDA_CALL(cudaMallocAsync(&ptr, numBytes, cudaStreamPerThread));
|
||||
#endif
|
||||
}
|
||||
VTKM_CUDA_CALL(cudaMalloc(&ptr, numBytes));
|
||||
}
|
||||
|
||||
{
|
||||
@ -251,6 +245,8 @@ void CudaAllocator::Free(void* ptr)
|
||||
{
|
||||
#if CUDART_VERSION >= 11030
|
||||
VTKM_CUDA_CALL(cudaFreeAsync(ptr, cudaStreamPerThread));
|
||||
#else
|
||||
VTKM_CUDA_CALL(cudaFree(ptr));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -40,11 +40,6 @@ struct VTKM_CONT_EXPORT CudaAllocator
|
||||
/// VTK-m will ignore the request and continue to use unmanaged memory (aka cudaMalloc).
|
||||
static VTKM_CONT void ForceManagedMemoryOn();
|
||||
|
||||
static VTKM_CONT bool UsingSyncMemoryAllocator();
|
||||
static VTKM_CONT bool UsingAsyncMemoryAllocator()
|
||||
{
|
||||
return !CudaAllocator::UsingSyncMemoryAllocator();
|
||||
}
|
||||
static VTKM_CONT void ForceSyncMemoryAllocator();
|
||||
static VTKM_CONT void ForceAsyncMemoryAllocator();
|
||||
|
||||
|
@ -20,12 +20,17 @@ namespace vtkm
|
||||
{
|
||||
namespace filter
|
||||
{
|
||||
namespace
|
||||
{
|
||||
void RunFilter(NewFilter* self,
|
||||
vtkm::filter::DataSetQueue& input,
|
||||
vtkm::filter::DataSetQueue& output)
|
||||
|
||||
NewFilter::~NewFilter() = default;
|
||||
|
||||
void NewFilter::RunFilter(NewFilter* self,
|
||||
vtkm::filter::DataSetQueue& input,
|
||||
vtkm::filter::DataSetQueue& output)
|
||||
{
|
||||
#ifdef VTKM_CUDA
|
||||
vtkm::cont::cuda::internal::CudaAllocator::ForceSyncMemoryAllocator();
|
||||
#endif
|
||||
|
||||
std::pair<vtkm::Id, vtkm::cont::DataSet> task;
|
||||
while (input.GetTask(task))
|
||||
{
|
||||
@ -35,9 +40,7 @@ void RunFilter(NewFilter* self,
|
||||
|
||||
vtkm::cont::Algorithm::Synchronize();
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
NewFilter::~NewFilter() = default;
|
||||
|
||||
bool NewFilter::CanThread() const
|
||||
{
|
||||
@ -61,8 +64,11 @@ vtkm::cont::PartitionedDataSet NewFilter::DoExecutePartitions(
|
||||
std::vector<std::future<void>> futures(static_cast<std::size_t>(numThreads));
|
||||
for (std::size_t i = 0; i < static_cast<std::size_t>(numThreads); i++)
|
||||
{
|
||||
auto f = std::async(
|
||||
std::launch::async, RunFilter, this, std::ref(inputQueue), std::ref(outputQueue));
|
||||
auto f = std::async(std::launch::async,
|
||||
vtkm::filter::NewFilter::RunFilter,
|
||||
this,
|
||||
std::ref(inputQueue),
|
||||
std::ref(outputQueue));
|
||||
futures[i] = std::move(f);
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <vtkm/cont/PartitionedDataSet.h>
|
||||
|
||||
#include <vtkm/filter/FieldSelection.h>
|
||||
#include <vtkm/filter/TaskQueue.h>
|
||||
#include <vtkm/filter/vtkm_filter_core_export.h>
|
||||
|
||||
namespace vtkm
|
||||
@ -228,6 +229,11 @@ public:
|
||||
VTKM_CONT
|
||||
void SetThreadsPerGPU(vtkm::Id numThreads) { this->NumThreadsPerGPU = numThreads; }
|
||||
|
||||
VTKM_CONT
|
||||
vtkm::Id SetThreadsPerCPU() const { return this->NumThreadsPerCPU; }
|
||||
VTKM_CONT
|
||||
vtkm::Id SetThreadsPerGPU() const { return this->NumThreadsPerGPU; }
|
||||
|
||||
VTKM_CONT
|
||||
bool GetRunMultiThreadedFilter() const
|
||||
{
|
||||
@ -440,6 +446,11 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
static void RunFilter(NewFilter* self,
|
||||
vtkm::filter::DataSetQueue& input,
|
||||
vtkm::filter::DataSetQueue& output);
|
||||
|
||||
VTKM_CONT
|
||||
virtual vtkm::Id DetermineNumberOfThreads(const vtkm::cont::PartitionedDataSet& input);
|
||||
|
||||
|
@ -103,7 +103,9 @@ public:
|
||||
//Insert them back in the same order.
|
||||
std::pair<vtkm::Id, vtkm::cont::DataSet> task;
|
||||
while (this->GetTask(task))
|
||||
{
|
||||
dataSets[static_cast<std::size_t>(task.first)] = std::move(task.second);
|
||||
}
|
||||
|
||||
pds.AppendPartitions(dataSets);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user