diff --git a/vtkm/benchmarking/BenchmarkCopySpeeds.cxx b/vtkm/benchmarking/BenchmarkCopySpeeds.cxx new file mode 100644 index 000000000..64fddbb12 --- /dev/null +++ b/vtkm/benchmarking/BenchmarkCopySpeeds.cxx @@ -0,0 +1,190 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2017 National Technology & Engineering Solutions of Sandia, LLC (NTESS). +// Copyright 2017 UT-Battelle, LLC. +// Copyright 2017 Los Alamos National Security. +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================ + +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include + +#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB +#include +#endif // TBB + +// For the TBB implementation, the number of threads can be customized using a +// "NumThreads [numThreads]" argument. + +namespace vtkm +{ +namespace benchmarking +{ + +const vtkm::UInt64 COPY_SIZE_MIN = (1 << 10); // 1 KiB +const vtkm::UInt64 COPY_SIZE_MAX = (1 << 29); // 512 MiB +const vtkm::UInt64 COPY_SIZE_INC = 1; // Used as 'size <<= INC' + +const size_t COL_WIDTH = 32; + +template +struct MeasureCopySpeed +{ + using Algo = vtkm::cont::DeviceAdapterAlgorithm; + + vtkm::cont::ArrayHandle Source; + vtkm::cont::ArrayHandle Destination; + vtkm::UInt64 NumBytes; + + VTKM_CONT + MeasureCopySpeed(vtkm::UInt64 bytes) + : NumBytes(bytes) + { + vtkm::Id numValues = static_cast(bytes / sizeof(ValueType)); + this->Source.Allocate(numValues); + } + + VTKM_CONT vtkm::Float64 operator()() + { + vtkm::cont::Timer timer; + Algo::Copy(this->Source, this->Destination); + return timer.GetElapsedTime(); + } + + VTKM_CONT std::string Description() const + { + vtkm::UInt64 actualSize = + static_cast(this->Source.GetNumberOfValues() * sizeof(ValueType)); + std::ostringstream out; + out << "Copying " << HumanSize(static_cast(this->NumBytes)) + << " (actual=" << HumanSize(static_cast(actualSize)) << ") of " + << vtkm::testing::TypeName::Name() << "\n"; + return out.str(); + } +}; + +void PrintRow(std::ostream& out, const std::string& label, const std::string& data) +{ + out << "| " << std::setw(COL_WIDTH) << label << " | " << std::setw(COL_WIDTH) << data << " |" + << std::endl; +} + +void PrintDivider(std::ostream& out) +{ + const std::string fillStr(COL_WIDTH, '-'); + + out << "|-" << fillStr << "-|-" << fillStr << "-|" << std::endl; +} + +template +void BenchmarkValueType() +{ + PrintRow(std::cout, + vtkm::testing::TypeName::Name(), + vtkm::cont::DeviceAdapterTraits::GetName()); + + PrintDivider(std::cout); + + Benchmarker bench(15, 100); + for (vtkm::UInt64 size = COPY_SIZE_MIN; size <= COPY_SIZE_MAX; size <<= COPY_SIZE_INC) + { + MeasureCopySpeed functor(size); + bench.Reset(); + + std::string speedStr; + + try + { + bench.GatherSamples(functor); + vtkm::Float64 speed = static_cast(size) / stats::Mean(bench.GetSamples()); + speedStr = HumanSize(speed) + std::string("/s"); + } + catch (vtkm::cont::ErrorBadAllocation& e) + { + speedStr = "[allocation too large]"; + } + + PrintRow(std::cout, HumanSize(static_cast(size)), speedStr); + } + + std::cout << "\n"; +} +} +} // end namespace vtkm::benchmarking + +int main(int argc, char* argv[]) +{ + using namespace vtkm::benchmarking; + +#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB + int numThreads = tbb::task_scheduler_init::automatic; +#endif // TBB + + if (argc == 3) + { + if (std::string(argv[1]) == "NumThreads") + { +#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB + std::istringstream parse(argv[2]); + parse >> numThreads; + std::cout << "Selected " << numThreads << " TBB threads." << std::endl; +#else + std::cerr << "NumThreads valid only on TBB. Ignoring." << std::endl; +#endif // TBB + } + } + +#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB + // Must not be destroyed as long as benchmarks are running: + tbb::task_scheduler_init init(numThreads); +#endif // TBB + + BenchmarkValueType(); + BenchmarkValueType>(); + BenchmarkValueType>(); + BenchmarkValueType>(); + + BenchmarkValueType(); + BenchmarkValueType>(); + + BenchmarkValueType(); + BenchmarkValueType>(); + + BenchmarkValueType(); + BenchmarkValueType>(); + + BenchmarkValueType(); + BenchmarkValueType>(); + + BenchmarkValueType>(); + BenchmarkValueType>(); + BenchmarkValueType>(); + BenchmarkValueType>(); +} diff --git a/vtkm/benchmarking/CMakeLists.txt b/vtkm/benchmarking/CMakeLists.txt index ecf44b7d9..a50453bbb 100644 --- a/vtkm/benchmarking/CMakeLists.txt +++ b/vtkm/benchmarking/CMakeLists.txt @@ -20,6 +20,7 @@ set(benchmark_srcs BenchmarkArrayTransfer.cxx + BenchmarkCopySpeeds.cxx BenchmarkDeviceAdapter.cxx BenchmarkFieldAlgorithms.cxx BenchmarkTopologyAlgorithms.cxx