From cb34cbbaf22daf06b5531597d12e212bca2c4b0a Mon Sep 17 00:00:00 2001 From: Kenneth Moreland Date: Thu, 23 Jul 2020 19:34:45 -0600 Subject: [PATCH] Make BenchmarkArrayTransfer actually benchmark transfers Previously, most of the benchmarks just measured time spent reading or writing the array on the device. The transfer only happened on the first iteration and was then cached on the device. This change clears out the array every iteration so that the array has to be transferred afresh. --- benchmarking/BenchmarkArrayTransfer.cxx | 57 +++++++++++++++++-------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/benchmarking/BenchmarkArrayTransfer.cxx b/benchmarking/BenchmarkArrayTransfer.cxx index 88f07a88d..0deef0572 100644 --- a/benchmarking/BenchmarkArrayTransfer.cxx +++ b/benchmarking/BenchmarkArrayTransfer.cxx @@ -77,6 +77,14 @@ struct ReadWriteValues : vtkm::worklet::WorkletMapField } }; +// Takes a vector of data and creates a fresh ArrayHandle with memory just allocated +// in the control environment. +template +vtkm::cont::ArrayHandle CreateFreshArrayHandle(const std::vector& vec) +{ + return vtkm::cont::make_ArrayHandleMove(std::vector(vec)); +} + //------------- Benchmark functors ------------------------------------------- // Copies NumValues from control environment to execution environment and @@ -98,13 +106,17 @@ void BenchContToExecRead(benchmark::State& state) } std::vector vec(static_cast(numValues), 2); - ArrayType array = vtkm::cont::make_ArrayHandle(vec, vtkm::CopyFlag::On); vtkm::cont::Invoker invoker{ device }; vtkm::cont::Timer timer{ device }; for (auto _ : state) { (void)_; + + // Make a fresh array each iteration to force a copy from control to execution each time. + // (Prevents unified memory devices from caching data.) + ArrayType array = CreateFreshArrayHandle(vec); + timer.Start(); invoker(ReadValues{}, array); timer.Stop(); @@ -182,18 +194,25 @@ void BenchContToExecReadWrite(benchmark::State& state) } std::vector vec(static_cast(numValues), 2); - ArrayType array = vtkm::cont::make_ArrayHandle(vec, vtkm::CopyFlag::On); vtkm::cont::Invoker invoker{ device }; vtkm::cont::Timer timer{ device }; for (auto _ : state) { (void)_; + + // Make a fresh array each iteration to force a copy from control to execution each time. + // (Prevents unified memory devices from caching data.) + ArrayType array = CreateFreshArrayHandle(vec); + timer.Start(); invoker(ReadWriteValues{}, array); timer.Stop(); state.SetIterationTime(timer.GetElapsedTime()); + + // Remove data from execution environment so it has to be transferred again. + array.ReleaseResourcesExecution(); } const int64_t iterations = static_cast(state.iterations()); @@ -224,20 +243,22 @@ void BenchRoundTripRead(benchmark::State& state) } std::vector vec(static_cast(numValues), 2); - ArrayType array = vtkm::cont::make_ArrayHandle(vec, vtkm::CopyFlag::On); vtkm::cont::Invoker invoker{ device }; vtkm::cont::Timer timer{ device }; for (auto _ : state) { (void)_; - // Ensure data is in control before we start: - array.ReleaseResourcesExecution(); + + // Make a fresh array each iteration to force a copy from control to execution each time. + // (Prevents unified memory devices from caching data.) + ArrayType array = CreateFreshArrayHandle(vec); timer.Start(); invoker(ReadValues{}, array); // Copy back to host and read: + // (Note, this probably does not copy. The array exists in both control and execution for read.) auto portal = array.ReadPortal(); for (vtkm::Id i = 0; i < numValues; ++i) { @@ -277,21 +298,23 @@ void BenchRoundTripReadWrite(benchmark::State& state) } std::vector vec(static_cast(numValues)); - ArrayType array = vtkm::cont::make_ArrayHandle(vec, vtkm::CopyFlag::On); vtkm::cont::Invoker invoker{ device }; vtkm::cont::Timer timer{ device }; for (auto _ : state) { (void)_; - // Ensure data is in control before we start: - array.ReleaseResourcesExecution(); + + // Make a fresh array each iteration to force a copy from control to execution each time. + // (Prevents unified memory devices from caching data.) + ArrayType array = CreateFreshArrayHandle(vec); timer.Start(); // Do work on device: invoker(ReadWriteValues{}, array); + // Copy back to host and read/write: auto portal = array.WritePortal(); for (vtkm::Id i = 0; i < numValues; ++i) { @@ -330,14 +353,14 @@ void BenchExecToContRead(benchmark::State& state) state.SetLabel(desc.str()); } - ArrayType array; - array.Allocate(numValues); - vtkm::cont::Invoker invoker{ device }; vtkm::cont::Timer timer{ device }; for (auto _ : state) { (void)_; + ArrayType array; + array.Allocate(numValues); + // Time the copy: timer.Start(); @@ -383,14 +406,14 @@ void BenchExecToContWrite(benchmark::State& state) state.SetLabel(desc.str()); } - ArrayType array; - array.Allocate(numValues); - vtkm::cont::Invoker invoker{ device }; vtkm::cont::Timer timer{ device }; for (auto _ : state) { (void)_; + ArrayType array; + array.Allocate(numValues); + timer.Start(); // Allocate/write data on device @@ -435,14 +458,14 @@ void BenchExecToContReadWrite(benchmark::State& state) state.SetLabel(desc.str()); } - ArrayType array; - array.Allocate(numValues); - vtkm::cont::Invoker invoker{ device }; vtkm::cont::Timer timer{ device }; for (auto _ : state) { (void)_; + ArrayType array; + array.Allocate(numValues); + timer.Start(); // Allocate/write data on device