From 760c5856f06cd9155f29a7946bf4ad873aee8f1e Mon Sep 17 00:00:00 2001 From: "David C. Lonie" Date: Thu, 6 Jul 2017 16:08:40 -0400 Subject: [PATCH] Add BenchmarkArrayTransfer. This will let us measure performance while tuning CUDA managed memory hints. --- vtkm/benchmarking/BenchmarkArrayTransfer.cxx | 560 +++++++++++++++++++ vtkm/benchmarking/CMakeLists.txt | 1 + 2 files changed, 561 insertions(+) create mode 100644 vtkm/benchmarking/BenchmarkArrayTransfer.cxx diff --git a/vtkm/benchmarking/BenchmarkArrayTransfer.cxx b/vtkm/benchmarking/BenchmarkArrayTransfer.cxx new file mode 100644 index 000000000..bab6119ab --- /dev/null +++ b/vtkm/benchmarking/BenchmarkArrayTransfer.cxx @@ -0,0 +1,560 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2017 Sandia Corporation. +// Copyright 2017 UT-Battelle, LLC. +// Copyright 2017 Los Alamos National Security. +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================================ + +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include + +// 256 MB of floats: +const vtkm::Id ARRAY_SIZE = 256 * 1024 * 1024 / 4; + +namespace vtkm +{ +namespace benchmarking +{ + +template +struct BenchmarkArrayTransfer +{ + using Algo = vtkm::cont::DeviceAdapterAlgorithm; + using StorageTag = vtkm::cont::StorageTagBasic; + using Timer = vtkm::cont::Timer; + + //------------- Functors for benchmarks -------------------------------------- + + // Reads all values in ArrayHandle. + template + struct ReadValues : vtkm::exec::FunctorBase + { + using ValueType = typename PortalType::ValueType; + using ArrayType = vtkm::cont::ArrayHandle; + + PortalType Portal; + const ValueType MinValue; + + VTKM_CONT + ReadValues(const PortalType& portal, const ValueType& minValue) + : Portal(portal) + , MinValue(minValue) + { + } + + VTKM_EXEC + void operator()(vtkm::Id i) const + { + if (this->Portal.Get(i) < this->MinValue) + { + // We don't really do anything with this, we just need to do *something* + // to prevent the compiler from optimizing out the array accesses. + this->RaiseError("Unexpected value."); + } + } + + // unused int argument is simply to distinguish this method from the + // VTKM_EXEC overload (VTKM_EXEC_CONT won't work here because of the + // RaiseError call). + VTKM_CONT + void operator()(vtkm::Id i, int) const + { + if (this->Portal.Get(i) < this->MinValue) + { + // We don't really do anything with this, we just need to do *something* + // to prevent the compiler from optimizing out the array accesses. + std::cerr << "Unexpected value.\n"; + } + } + }; + + // Writes values to ArrayHandle. + template + struct WriteValues : vtkm::exec::FunctorBase + { + using ValueType = typename PortalType::ValueType; + using ArrayType = vtkm::cont::ArrayHandle; + + PortalType Portal; + + VTKM_CONT + WriteValues(const PortalType& portal) + : Portal(portal) + { + } + + VTKM_EXEC_CONT + void operator()(vtkm::Id i) const { this->Portal.Set(i, static_cast(i)); } + }; + + // Reads and writes values to ArrayHandle. + template + struct ReadWriteValues : vtkm::exec::FunctorBase + { + using ValueType = typename PortalType::ValueType; + using ArrayType = vtkm::cont::ArrayHandle; + + PortalType Portal; + + VTKM_CONT + ReadWriteValues(const PortalType& portal) + : Portal(portal) + { + } + + VTKM_EXEC_CONT + void operator()(vtkm::Id i) const + { + ValueType val = this->Portal.Get(i); + val += static_cast(i); + this->Portal.Set(i, val); + } + }; + + //------------- Benchmark functors ------------------------------------------- + + // Copies NumValues from control environment to execution environment and + // accesses them as read-only. + template + struct BenchContToExecRead + { + using ArrayType = vtkm::cont::ArrayHandle; + using PortalType = typename ArrayType::template ExecutionTypes::PortalConst; + using ValueTypeTraits = vtkm::TypeTraits; + + vtkm::Id NumValues; + + VTKM_CONT + BenchContToExecRead(vtkm::Id numValues) + : NumValues(numValues) + { + } + + VTKM_CONT + std::string Description() const + { + std::ostringstream out; + out << "Copying from Control --> Execution (read-only): " << this->NumValues << " values (" + << (this->NumValues * static_cast(sizeof(ValueType))) << " bytes)"; + return out.str(); + } + + VTKM_CONT + vtkm::Float64 operator()() + { + std::vector vec(this->NumValues, ValueTypeTraits::ZeroInitialization()); + ArrayType array = vtkm::cont::make_ArrayHandle(vec); + + // Time the copy: + Timer timer; + ReadValues functor(array.PrepareForInput(DeviceAdapter()), + ValueTypeTraits::ZeroInitialization()); + Algo::Schedule(functor, this->NumValues); + return timer.GetElapsedTime(); + } + }; + VTKM_MAKE_BENCHMARK(ContToExecRead, BenchContToExecRead, ARRAY_SIZE); + + // Writes values to ArrayHandle in execution environment. There is no actual + // copy between control/execution in this case. + template + struct BenchContToExecWrite + { + using ArrayType = vtkm::cont::ArrayHandle; + using PortalType = typename ArrayType::template ExecutionTypes::Portal; + using ValueTypeTraits = vtkm::TypeTraits; + + vtkm::Id NumValues; + + VTKM_CONT + BenchContToExecWrite(vtkm::Id numValues) + : NumValues(numValues) + { + } + + VTKM_CONT + std::string Description() const + { + std::ostringstream out; + out << "Copying from Control --> Execution (write-only): " << this->NumValues << " values (" + << (this->NumValues * static_cast(sizeof(ValueType))) << " bytes)"; + return out.str(); + } + + VTKM_CONT + vtkm::Float64 operator()() + { + ArrayType array; + + // Time the write: + Timer timer; + WriteValues functor(array.PrepareForOutput(this->NumValues, DeviceAdapter())); + Algo::Schedule(functor, this->NumValues); + return timer.GetElapsedTime(); + } + }; + VTKM_MAKE_BENCHMARK(ContToExecWrite, BenchContToExecWrite, ARRAY_SIZE); + + // Copies NumValues from control environment to execution environment and + // both reads and writes them. + template + struct BenchContToExecReadWrite + { + using ArrayType = vtkm::cont::ArrayHandle; + using PortalType = typename ArrayType::template ExecutionTypes::Portal; + using ValueTypeTraits = vtkm::TypeTraits; + + vtkm::Id NumValues; + + VTKM_CONT + BenchContToExecReadWrite(vtkm::Id numValues) + : NumValues(numValues) + { + } + + VTKM_CONT + std::string Description() const + { + std::ostringstream out; + out << "Copying from Control --> Execution (read-write): " << this->NumValues << " values (" + << (this->NumValues * static_cast(sizeof(ValueType))) << " bytes)"; + return out.str(); + } + + VTKM_CONT + vtkm::Float64 operator()() + { + std::vector vec(this->NumValues, ValueTypeTraits::ZeroInitialization()); + ArrayType array = vtkm::cont::make_ArrayHandle(vec); + + // Time the copy: + Timer timer; + ReadWriteValues functor(array.PrepareForInPlace(DeviceAdapter())); + Algo::Schedule(functor, this->NumValues); + return timer.GetElapsedTime(); + } + }; + VTKM_MAKE_BENCHMARK(ContToExecReadWrite, BenchContToExecReadWrite, ARRAY_SIZE); + + // Copies NumValues from control environment to execution environment and + // back, then accesses them as read-only. + template + struct BenchRoundTripRead + { + using ArrayType = vtkm::cont::ArrayHandle; + using PortalContType = typename ArrayType::PortalConstControl; + using PortalExecType = typename ArrayType::template ExecutionTypes::PortalConst; + using ValueTypeTraits = vtkm::TypeTraits; + + vtkm::Id NumValues; + + VTKM_CONT + BenchRoundTripRead(vtkm::Id numValues) + : NumValues(numValues) + { + } + + VTKM_CONT + std::string Description() const + { + std::ostringstream out; + out << "Copying from Control --> Execution --> Control (read-only): " << this->NumValues + << " values (" << (this->NumValues * static_cast(sizeof(ValueType))) + << " bytes)"; + return out.str(); + } + + VTKM_CONT + vtkm::Float64 operator()() + { + std::vector vec(this->NumValues, ValueTypeTraits::ZeroInitialization()); + ArrayType array = vtkm::cont::make_ArrayHandle(vec); + + // Ensure data is in control before we start: + array.ReleaseResourcesExecution(); + + // Time the copy: + Timer timer; + + // Copy to device: + ReadValues functor(array.PrepareForInput(DeviceAdapter()), + ValueTypeTraits::ZeroInitialization()); + Algo::Schedule(functor, this->NumValues); + + // Copy back to host and read: + ReadValues cFunctor(array.GetPortalConstControl(), + ValueTypeTraits::ZeroInitialization()); + for (vtkm::Id i = 0; i < this->NumValues; ++i) + { + cFunctor(i, 0); + } + + return timer.GetElapsedTime(); + } + }; + VTKM_MAKE_BENCHMARK(RoundTripRead, BenchRoundTripRead, ARRAY_SIZE); + + // Copies NumValues from control environment to execution environment and + // back, then reads and writes them in-place. + template + struct BenchRoundTripReadWrite + { + using ArrayType = vtkm::cont::ArrayHandle; + using PortalContType = typename ArrayType::PortalControl; + using PortalExecType = typename ArrayType::template ExecutionTypes::Portal; + using ValueTypeTraits = vtkm::TypeTraits; + + vtkm::Id NumValues; + + VTKM_CONT + BenchRoundTripReadWrite(vtkm::Id numValues) + : NumValues(numValues) + { + } + + VTKM_CONT + std::string Description() const + { + std::ostringstream out; + out << "Copying from Control --> Execution --> Control (read-write): " << this->NumValues + << " values (" << (this->NumValues * static_cast(sizeof(ValueType))) + << " bytes)"; + return out.str(); + } + + VTKM_CONT + vtkm::Float64 operator()() + { + std::vector vec(this->NumValues, ValueTypeTraits::ZeroInitialization()); + ArrayType array = vtkm::cont::make_ArrayHandle(vec); + + // Ensure data is in control before we start: + array.ReleaseResourcesExecution(); + + // Time the copy: + Timer timer; + + // Do work on device: + ReadWriteValues functor(array.PrepareForInPlace(DeviceAdapter())); + Algo::Schedule(functor, this->NumValues); + + ReadWriteValues cFunctor(array.GetPortalControl()); + for (vtkm::Id i = 0; i < this->NumValues; ++i) + { + cFunctor(i); + } + + return timer.GetElapsedTime(); + } + }; + VTKM_MAKE_BENCHMARK(RoundTripReadWrite, BenchRoundTripReadWrite, ARRAY_SIZE); + + // Write NumValues to device allocated memory and copies them back to control + // for reading. + template + struct BenchExecToContRead + { + using ArrayType = vtkm::cont::ArrayHandle; + using PortalContType = typename ArrayType::PortalConstControl; + using PortalExecType = typename ArrayType::template ExecutionTypes::Portal; + using ValueTypeTraits = vtkm::TypeTraits; + + vtkm::Id NumValues; + + VTKM_CONT + BenchExecToContRead(vtkm::Id numValues) + : NumValues(numValues) + { + } + + VTKM_CONT + std::string Description() const + { + std::ostringstream out; + out << "Copying from Execution --> Control (read-only on control): " << this->NumValues + << " values (" << (this->NumValues * static_cast(sizeof(ValueType))) + << " bytes)"; + return out.str(); + } + + VTKM_CONT + vtkm::Float64 operator()() + { + ArrayType array; + + // Time the copy: + Timer timer; + + // Allocate/write data on device + WriteValues functor(array.PrepareForOutput(this->NumValues, DeviceAdapter())); + Algo::Schedule(functor, this->NumValues); + + // Read back on host: + ReadValues cFunctor(array.GetPortalConstControl(), + ValueTypeTraits::ZeroInitialization()); + for (vtkm::Id i = 0; i < this->NumValues; ++i) + { + cFunctor(i, 0); + } + + return timer.GetElapsedTime(); + } + }; + VTKM_MAKE_BENCHMARK(ExecToContRead, BenchExecToContRead, ARRAY_SIZE); + + // Write NumValues to device allocated memory and copies them back to control + // and overwrites them. + template + struct BenchExecToContWrite + { + using ArrayType = vtkm::cont::ArrayHandle; + using PortalContType = typename ArrayType::PortalControl; + using PortalExecType = typename ArrayType::template ExecutionTypes::Portal; + using ValueTypeTraits = vtkm::TypeTraits; + + vtkm::Id NumValues; + + VTKM_CONT + BenchExecToContWrite(vtkm::Id numValues) + : NumValues(numValues) + { + } + + VTKM_CONT + std::string Description() const + { + std::ostringstream out; + out << "Copying from Execution --> Control (write-only on control): " << this->NumValues + << " values (" << (this->NumValues * static_cast(sizeof(ValueType))) + << " bytes)"; + return out.str(); + } + + VTKM_CONT + vtkm::Float64 operator()() + { + ArrayType array; + + // Time the copy: + Timer timer; + + // Allocate/write data on device + WriteValues functor(array.PrepareForOutput(this->NumValues, DeviceAdapter())); + Algo::Schedule(functor, this->NumValues); + + // Read back on host: + WriteValues cFunctor(array.GetPortalControl()); + for (vtkm::Id i = 0; i < this->NumValues; ++i) + { + cFunctor(i); + } + + return timer.GetElapsedTime(); + } + }; + VTKM_MAKE_BENCHMARK(ExecToContWrite, BenchExecToContWrite, ARRAY_SIZE); + + // Write NumValues to device allocated memory and copies them back to control + // for reading and writing. + template + struct BenchExecToContReadWrite + { + using ArrayType = vtkm::cont::ArrayHandle; + using PortalContType = typename ArrayType::PortalControl; + using PortalExecType = typename ArrayType::template ExecutionTypes::Portal; + using ValueTypeTraits = vtkm::TypeTraits; + + vtkm::Id NumValues; + + VTKM_CONT + BenchExecToContReadWrite(vtkm::Id numValues) + : NumValues(numValues) + { + } + + VTKM_CONT + std::string Description() const + { + std::ostringstream out; + out << "Copying from Execution --> Control (read-write on control): " << this->NumValues + << " values (" << (this->NumValues * static_cast(sizeof(ValueType))) + << " bytes)"; + return out.str(); + } + + VTKM_CONT + vtkm::Float64 operator()() + { + ArrayType array; + + // Time the copy: + Timer timer; + + // Allocate/write data on device + WriteValues functor(array.PrepareForOutput(this->NumValues, DeviceAdapter())); + Algo::Schedule(functor, this->NumValues); + + // Read back on host: + ReadWriteValues cFunctor(array.GetPortalControl()); + for (vtkm::Id i = 0; i < this->NumValues; ++i) + { + cFunctor(i); + } + + return timer.GetElapsedTime(); + } + }; + VTKM_MAKE_BENCHMARK(ExecToContReadWrite, BenchExecToContReadWrite, ARRAY_SIZE); + + //----------- Benchmark caller ----------------------------------------------- + + using TestTypes = vtkm::ListTagBase; + + static VTKM_CONT bool Run() + { + VTKM_RUN_BENCHMARK(ContToExecRead, TestTypes()); + VTKM_RUN_BENCHMARK(ContToExecWrite, TestTypes()); + VTKM_RUN_BENCHMARK(ContToExecReadWrite, TestTypes()); + VTKM_RUN_BENCHMARK(RoundTripRead, TestTypes()); + VTKM_RUN_BENCHMARK(RoundTripReadWrite, TestTypes()); + VTKM_RUN_BENCHMARK(ExecToContRead, TestTypes()); + VTKM_RUN_BENCHMARK(ExecToContWrite, TestTypes()); + VTKM_RUN_BENCHMARK(ExecToContReadWrite, TestTypes()); + + return true; + } +}; +} +} // end namespace vtkm::benchmarking + +int main(int, char* []) +{ + using DeviceAdapter = VTKM_DEFAULT_DEVICE_ADAPTER_TAG; + using Benchmarks = vtkm::benchmarking::BenchmarkArrayTransfer; + bool result = Benchmarks::Run(); + return result ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/vtkm/benchmarking/CMakeLists.txt b/vtkm/benchmarking/CMakeLists.txt index a9ee0e54b..f1482f703 100644 --- a/vtkm/benchmarking/CMakeLists.txt +++ b/vtkm/benchmarking/CMakeLists.txt @@ -19,6 +19,7 @@ ##============================================================================ set(benchmark_srcs + BenchmarkArrayTransfer.cxx BenchmarkDeviceAdapter.cxx BenchmarkFieldAlgorithms.cxx BenchmarkTopologyAlgorithms.cxx