//============================================================================ // Copyright (c) Kitware, Inc. // All rights reserved. // See LICENSE.txt for details. // This software is distributed WITHOUT ANY WARRANTY; without even // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. See the above copyright notice for more information. // // Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS). // Copyright 2014 UT-Battelle, LLC. // Copyright 2014 Los Alamos National Security. // // Under the terms of Contract DE-NA0003525 with NTESS, // the U.S. Government retains certain rights in this software. // // Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National // Laboratory (LANL), the U.S. Government retains certain rights in // this software. //============================================================================ #include "Benchmarker.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VTKM_ENABLE_TBB #include #endif #ifdef VTKM_ENABLE_OPENMP #include #endif // This benchmark has a number of commandline options to customize its behavior. // See The BenchDevAlgoConfig documentations for details. // For the TBB implementation, the number of threads can be customized using a // "NumThreads [numThreads]" argument. namespace vtkm { namespace benchmarking { enum BenchmarkName { COPY = 1, COPY_IF = 1 << 1, LOWER_BOUNDS = 1 << 2, REDUCE = 1 << 3, REDUCE_BY_KEY = 1 << 4, SCAN_INCLUSIVE = 1 << 5, SCAN_EXCLUSIVE = 1 << 6, SORT = 1 << 7, SORT_BY_KEY = 1 << 8, STABLE_SORT_INDICES = 1 << 9, STABLE_SORT_INDICES_UNIQUE = 1 << 10, UNIQUE = 1 << 11, UPPER_BOUNDS = 1 << 12, ALL = COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY | SCAN_INCLUSIVE | SCAN_EXCLUSIVE | SORT | SORT_BY_KEY | STABLE_SORT_INDICES | STABLE_SORT_INDICES_UNIQUE | UNIQUE | UPPER_BOUNDS }; /// Configuration options. Can be modified using via command line args as /// described below: struct BenchDevAlgoConfig { /// Benchmarks to run. Possible values: /// Copy, CopyIf, LowerBounds, Reduce, ReduceByKey, ScanInclusive, /// ScanExclusive, Sort, SortByKey, StableSortIndices, StableSortIndicesUnique, /// Unique, UpperBounds, or All. (Default: All). // Zero is for parsing, will change to 'all' in main if needed. int BenchmarkFlags{ 0 }; /// ValueTypes to test. /// CLI arg: "TypeList [Base|Extended]" (Base is default). bool ExtendedTypeList{ false }; /// Run benchmarks using the same number of bytes for all arrays. /// CLI arg: "FixBytes [n|off]" (n is the number of bytes, default: 2097152, ie. 2MiB) /// @note FixBytes and FixSizes are not mutually exclusive. If both are /// specified, both will run. bool TestArraySizeBytes{ true }; vtkm::UInt64 ArraySizeBytes{ 1 << 21 }; /// Run benchmarks using the same number of values for all arrays. /// CLI arg: "FixSizes [n|off]" (n is the number of values, default: off) /// @note FixBytes and FixSizes are not mutually exclusive. If both are /// specified, both will run. bool TestArraySizeValues{ false }; vtkm::UInt64 ArraySizeValues{ 1 << 21 }; /// If true, operations like "Unique" will test with a wider range of unique /// values (5%, 10%, 15%, 20%, 25%, 30%, 35%, 40%, 45%, 50%, 75%, 100% /// unique). If false (default), the range is limited to 5%, 25%, 50%, 75%, /// 100%. /// CLI arg: "DetailedOutputRange" enables the extended range. bool DetailedOutputRangeScaling{ false }; // Internal: The benchmarking code will set this depending on execution phase: bool DoByteSizes{ false }; // Compute the number of values for an array with the given type: template VTKM_CONT vtkm::Id ComputeSize() { return this->DoByteSizes ? static_cast(this->ArraySizeBytes / static_cast(sizeof(T))) : static_cast(this->ArraySizeValues); } }; // Share a global instance of the config (only way to get it into the benchmark // functors): static BenchDevAlgoConfig Config = BenchDevAlgoConfig(); struct BaseTypes : vtkm::ListTagBase, vtkm::Float32, vtkm::Vec, vtkm::Float64, vtkm::Vec> { }; struct ExtendedTypes : vtkm::ListTagBase, vtkm::Int32, vtkm::Int64, vtkm::Pair, vtkm::Pair, vtkm::Pair, vtkm::Pair, vtkm::Float32, vtkm::Vec, vtkm::Float64, vtkm::Vec> { }; static const std::string DIVIDER(40, '-'); /// This class runs a series of micro-benchmarks to measure /// performance of the parallel primitives provided by each /// device adapter class BenchmarkDeviceAdapter { using StorageTag = vtkm::cont::StorageTagBasic; using IdArrayHandle = vtkm::cont::ArrayHandle; using Algorithm = vtkm::cont::Algorithm; using Timer = vtkm::cont::Timer; public: // Various kernels used by the different benchmarks to accelerate // initialization of data template struct FillTestValueKernel : vtkm::exec::FunctorBase { using ValueArrayHandle = vtkm::cont::ArrayHandle; PortalType Output; VTKM_CONT FillTestValueKernel(PortalType out) : Output(out) { } VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i, Value())); } }; template struct FillScaledTestValueKernel : vtkm::exec::FunctorBase { using ValueArrayHandle = vtkm::cont::ArrayHandle; PortalType Output; const vtkm::Id IdScale; VTKM_CONT FillScaledTestValueKernel(vtkm::Id id_scale, PortalType out) : Output(out) , IdScale(id_scale) { } VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i * IdScale, Value())); } }; template struct FillModuloTestValueKernel : vtkm::exec::FunctorBase { using ValueArrayHandle = vtkm::cont::ArrayHandle; PortalType Output; const vtkm::Id Modulus; VTKM_CONT FillModuloTestValueKernel(vtkm::Id modulus, PortalType out) : Output(out) , Modulus(modulus) { } VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i % Modulus, Value())); } }; template struct FillBinaryTestValueKernel : vtkm::exec::FunctorBase { using ValueArrayHandle = vtkm::cont::ArrayHandle; PortalType Output; const vtkm::Id Modulus; VTKM_CONT FillBinaryTestValueKernel(vtkm::Id modulus, PortalType out) : Output(out) , Modulus(modulus) { } VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, i % Modulus == 0 ? TestValue(vtkm::Id(1), Value()) : Value()); } }; private: template struct BenchCopy { using ValueArrayHandle = vtkm::cont::ArrayHandle; ValueArrayHandle ValueHandle_src; ValueArrayHandle ValueHandle_dst; std::mt19937 Rng; VTKM_CONT BenchCopy() { vtkm::Id arraySize = Config.ComputeSize(); this->ValueHandle_src.Allocate(arraySize); auto portal = this->ValueHandle_src.GetPortalControl(); for (vtkm::Id i = 0; i < portal.GetNumberOfValues(); ++i) { portal.Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value())); } } VTKM_CONT vtkm::Float64 operator()() { Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::Copy(ValueHandle_src, ValueHandle_dst); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "Copy " << arraySize << " values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ")"; return description.str(); } }; VTKM_MAKE_BENCHMARK(Copy, BenchCopy); template struct BenchCopyIf { using ValueArrayHandle = vtkm::cont::ArrayHandle; const vtkm::Id PERCENT_VALID; const vtkm::Id N_VALID; ValueArrayHandle ValueHandle, OutHandle; IdArrayHandle StencilHandle; VTKM_CONT BenchCopyIf(vtkm::Id percent_valid) : PERCENT_VALID(percent_valid) , N_VALID((Config.ComputeSize() * percent_valid) / 100) { vtkm::Id arraySize = Config.ComputeSize(); vtkm::Id modulo = arraySize / N_VALID; auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillTestValueKernel(vHPortal), arraySize); auto sHPortal = StencilHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillBinaryTestValueKernel(modulo, sHPortal), arraySize); } VTKM_CONT vtkm::Float64 operator()() { Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::CopyIf(ValueHandle, StencilHandle, OutHandle); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "CopyIf on " << arraySize << " values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ") with " << PERCENT_VALID << "% valid values"; return description.str(); } }; VTKM_MAKE_BENCHMARK(CopyIf5, BenchCopyIf, 5); VTKM_MAKE_BENCHMARK(CopyIf10, BenchCopyIf, 10); VTKM_MAKE_BENCHMARK(CopyIf15, BenchCopyIf, 15); VTKM_MAKE_BENCHMARK(CopyIf20, BenchCopyIf, 20); VTKM_MAKE_BENCHMARK(CopyIf25, BenchCopyIf, 25); VTKM_MAKE_BENCHMARK(CopyIf30, BenchCopyIf, 30); VTKM_MAKE_BENCHMARK(CopyIf35, BenchCopyIf, 35); VTKM_MAKE_BENCHMARK(CopyIf40, BenchCopyIf, 40); VTKM_MAKE_BENCHMARK(CopyIf45, BenchCopyIf, 45); VTKM_MAKE_BENCHMARK(CopyIf50, BenchCopyIf, 50); VTKM_MAKE_BENCHMARK(CopyIf75, BenchCopyIf, 75); VTKM_MAKE_BENCHMARK(CopyIf100, BenchCopyIf, 100); template struct BenchLowerBounds { using ValueArrayHandle = vtkm::cont::ArrayHandle; const vtkm::Id N_VALS; const vtkm::Id PERCENT_VALUES; ValueArrayHandle InputHandle, ValueHandle; IdArrayHandle OutHandle; VTKM_CONT BenchLowerBounds(vtkm::Id value_percent) : N_VALS((Config.ComputeSize() * value_percent) / 100) , PERCENT_VALUES(value_percent) { vtkm::Id arraySize = Config.ComputeSize(); auto iHPortal = InputHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillTestValueKernel(iHPortal), arraySize); auto vHPortal = ValueHandle.PrepareForOutput(N_VALS, DeviceAdapter()); Algorithm::Schedule(FillScaledTestValueKernel(2, vHPortal), N_VALS); } VTKM_CONT vtkm::Float64 operator()() { Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::LowerBounds(InputHandle, ValueHandle, OutHandle); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "LowerBounds on " << arraySize << " input values (" << "(" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ") (" << PERCENT_VALUES << "% configuration)"; return description.str(); } }; VTKM_MAKE_BENCHMARK(LowerBounds5, BenchLowerBounds, 5); VTKM_MAKE_BENCHMARK(LowerBounds10, BenchLowerBounds, 10); VTKM_MAKE_BENCHMARK(LowerBounds15, BenchLowerBounds, 15); VTKM_MAKE_BENCHMARK(LowerBounds20, BenchLowerBounds, 20); VTKM_MAKE_BENCHMARK(LowerBounds25, BenchLowerBounds, 25); VTKM_MAKE_BENCHMARK(LowerBounds30, BenchLowerBounds, 30); VTKM_MAKE_BENCHMARK(LowerBounds35, BenchLowerBounds, 35); VTKM_MAKE_BENCHMARK(LowerBounds40, BenchLowerBounds, 40); VTKM_MAKE_BENCHMARK(LowerBounds45, BenchLowerBounds, 45); VTKM_MAKE_BENCHMARK(LowerBounds50, BenchLowerBounds, 50); VTKM_MAKE_BENCHMARK(LowerBounds75, BenchLowerBounds, 75); VTKM_MAKE_BENCHMARK(LowerBounds100, BenchLowerBounds, 100); template struct BenchReduce { using ValueArrayHandle = vtkm::cont::ArrayHandle; ValueArrayHandle InputHandle; // We don't actually use this, but we need it to prevent sufficiently // smart compilers from optimizing the Reduce call out. Value Result; VTKM_CONT BenchReduce() { vtkm::Id arraySize = Config.ComputeSize(); auto iHPortal = this->InputHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillTestValueKernel(iHPortal), arraySize); this->Result = Algorithm::Reduce(this->InputHandle, vtkm::TypeTraits::ZeroInitialization()); } VTKM_CONT vtkm::Float64 operator()() { Timer timer{ DeviceAdapter() }; timer.Start(); Value tmp = Algorithm::Reduce(this->InputHandle, vtkm::TypeTraits::ZeroInitialization()); vtkm::Float64 time = timer.GetElapsedTime(); if (tmp != this->Result) { this->Result = tmp; } return time; } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "Reduce on " << arraySize << " values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ")"; return description.str(); } }; VTKM_MAKE_BENCHMARK(Reduce, BenchReduce); template struct BenchReduceByKey { using ValueArrayHandle = vtkm::cont::ArrayHandle; const vtkm::Id N_KEYS; const vtkm::Id PERCENT_KEYS; ValueArrayHandle ValueHandle, ValuesOut; IdArrayHandle KeyHandle, KeysOut; VTKM_CONT BenchReduceByKey(vtkm::Id key_percent) : N_KEYS((Config.ComputeSize() * key_percent) / 100) , PERCENT_KEYS(key_percent) { vtkm::Id arraySize = Config.ComputeSize(); auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillTestValueKernel(vHPortal), arraySize); auto kHPortal = KeyHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillModuloTestValueKernel(N_KEYS, kHPortal), arraySize); Algorithm::SortByKey(KeyHandle, ValueHandle); } VTKM_CONT vtkm::Float64 operator()() { Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::ReduceByKey(KeyHandle, ValueHandle, KeysOut, ValuesOut, vtkm::Add()); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "ReduceByKey on " << arraySize << " values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ") with " << N_KEYS << " (" << PERCENT_KEYS << "%) distinct vtkm::Id keys"; return description.str(); } }; VTKM_MAKE_BENCHMARK(ReduceByKey5, BenchReduceByKey, 5); VTKM_MAKE_BENCHMARK(ReduceByKey10, BenchReduceByKey, 10); VTKM_MAKE_BENCHMARK(ReduceByKey15, BenchReduceByKey, 15); VTKM_MAKE_BENCHMARK(ReduceByKey20, BenchReduceByKey, 20); VTKM_MAKE_BENCHMARK(ReduceByKey25, BenchReduceByKey, 25); VTKM_MAKE_BENCHMARK(ReduceByKey30, BenchReduceByKey, 30); VTKM_MAKE_BENCHMARK(ReduceByKey35, BenchReduceByKey, 35); VTKM_MAKE_BENCHMARK(ReduceByKey40, BenchReduceByKey, 40); VTKM_MAKE_BENCHMARK(ReduceByKey45, BenchReduceByKey, 45); VTKM_MAKE_BENCHMARK(ReduceByKey50, BenchReduceByKey, 50); VTKM_MAKE_BENCHMARK(ReduceByKey75, BenchReduceByKey, 75); VTKM_MAKE_BENCHMARK(ReduceByKey100, BenchReduceByKey, 100); template struct BenchScanInclusive { using ValueArrayHandle = vtkm::cont::ArrayHandle; ValueArrayHandle ValueHandle, OutHandle; VTKM_CONT BenchScanInclusive() { vtkm::Id arraySize = Config.ComputeSize(); auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillTestValueKernel(vHPortal), arraySize); } VTKM_CONT vtkm::Float64 operator()() { Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::ScanInclusive(ValueHandle, OutHandle); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "ScanInclusive on " << arraySize << " values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ")"; return description.str(); } }; VTKM_MAKE_BENCHMARK(ScanInclusive, BenchScanInclusive); template struct BenchScanExclusive { using ValueArrayHandle = vtkm::cont::ArrayHandle; ValueArrayHandle ValueHandle, OutHandle; VTKM_CONT BenchScanExclusive() { vtkm::Id arraySize = Config.ComputeSize(); auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillTestValueKernel(vHPortal), arraySize); } VTKM_CONT vtkm::Float64 operator()() { Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::ScanExclusive(ValueHandle, OutHandle); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "ScanExclusive on " << arraySize << " values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ")"; return description.str(); } }; VTKM_MAKE_BENCHMARK(ScanExclusive, BenchScanExclusive); template struct BenchSort { using ValueArrayHandle = vtkm::cont::ArrayHandle; ValueArrayHandle ValueHandle; std::mt19937 Rng; VTKM_CONT BenchSort() { this->ValueHandle.Allocate(Config.ComputeSize()); auto portal = this->ValueHandle.GetPortalControl(); for (vtkm::Id i = 0; i < portal.GetNumberOfValues(); ++i) { portal.Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value())); } } VTKM_CONT vtkm::Float64 operator()() { ValueArrayHandle array; Algorithm::Copy(this->ValueHandle, array); Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::Sort(array); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "Sort on " << arraySize << " random values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ")"; return description.str(); } }; VTKM_MAKE_BENCHMARK(Sort, BenchSort); template struct BenchSortByKey { using ValueArrayHandle = vtkm::cont::ArrayHandle; std::mt19937 Rng; vtkm::Id N_KEYS; vtkm::Id PERCENT_KEYS; ValueArrayHandle ValueHandle; IdArrayHandle KeyHandle; VTKM_CONT BenchSortByKey(vtkm::Id percent_key) : N_KEYS((Config.ComputeSize() * percent_key) / 100) , PERCENT_KEYS(percent_key) { vtkm::Id arraySize = Config.ComputeSize(); this->ValueHandle.Allocate(arraySize); auto portal = this->ValueHandle.GetPortalControl(); for (vtkm::Id i = 0; i < portal.GetNumberOfValues(); ++i) { portal.Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value())); } auto kHPortal = KeyHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillModuloTestValueKernel(N_KEYS, kHPortal), arraySize); } VTKM_CONT vtkm::Float64 operator()() { IdArrayHandle keys; ValueArrayHandle values; Algorithm::Copy(this->KeyHandle, keys); Algorithm::Copy(this->ValueHandle, values); Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::SortByKey(keys, values); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "SortByKey on " << arraySize << " random values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ") with " << N_KEYS << " (" << PERCENT_KEYS << "%) different vtkm::Id keys"; return description.str(); } }; VTKM_MAKE_BENCHMARK(SortByKey5, BenchSortByKey, 5); VTKM_MAKE_BENCHMARK(SortByKey10, BenchSortByKey, 10); VTKM_MAKE_BENCHMARK(SortByKey15, BenchSortByKey, 15); VTKM_MAKE_BENCHMARK(SortByKey20, BenchSortByKey, 20); VTKM_MAKE_BENCHMARK(SortByKey25, BenchSortByKey, 25); VTKM_MAKE_BENCHMARK(SortByKey30, BenchSortByKey, 30); VTKM_MAKE_BENCHMARK(SortByKey35, BenchSortByKey, 35); VTKM_MAKE_BENCHMARK(SortByKey40, BenchSortByKey, 40); VTKM_MAKE_BENCHMARK(SortByKey45, BenchSortByKey, 45); VTKM_MAKE_BENCHMARK(SortByKey50, BenchSortByKey, 50); VTKM_MAKE_BENCHMARK(SortByKey75, BenchSortByKey, 75); VTKM_MAKE_BENCHMARK(SortByKey100, BenchSortByKey, 100); template struct BenchStableSortIndices { using SSI = vtkm::worklet::StableSortIndices; using ValueArrayHandle = vtkm::cont::ArrayHandle; ValueArrayHandle ValueHandle; std::mt19937 Rng; VTKM_CONT BenchStableSortIndices() { vtkm::Id arraySize = Config.ComputeSize(); this->ValueHandle.Allocate(arraySize); auto portal = this->ValueHandle.GetPortalControl(); for (vtkm::Id i = 0; i < portal.GetNumberOfValues(); ++i) { portal.Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value())); } } VTKM_CONT vtkm::Float64 operator()() { vtkm::Id arraySize = Config.ComputeSize(); vtkm::cont::ArrayHandle indices; Algorithm::Copy(vtkm::cont::ArrayHandleIndex(arraySize), indices); Timer timer{ DeviceAdapter() }; timer.Start(); SSI::Sort(ValueHandle, indices); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "StableSortIndices::Sort on " << arraySize << " random values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ")"; return description.str(); } }; VTKM_MAKE_BENCHMARK(StableSortIndices, BenchStableSortIndices); template struct BenchStableSortIndicesUnique { using SSI = vtkm::worklet::StableSortIndices; using IndexArrayHandle = typename SSI::IndexArrayType; using ValueArrayHandle = vtkm::cont::ArrayHandle; const vtkm::Id N_VALID; const vtkm::Id PERCENT_VALID; ValueArrayHandle ValueHandle; IndexArrayHandle IndexHandle; VTKM_CONT BenchStableSortIndicesUnique(vtkm::Id percent_valid) : N_VALID((Config.ComputeSize() * percent_valid) / 100) , PERCENT_VALID(percent_valid) { vtkm::Id arraySize = Config.ComputeSize(); auto vHPortal = this->ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillModuloTestValueKernel(N_VALID, vHPortal), arraySize); this->IndexHandle = SSI::Sort(this->ValueHandle); } VTKM_CONT vtkm::Float64 operator()() { IndexArrayHandle indices; Algorithm::Copy(this->IndexHandle, indices); Timer timer{ DeviceAdapter() }; timer.Start(); SSI::Unique(this->ValueHandle, indices); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "StableSortIndices::Unique on " << arraySize << " values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ") with " << this->N_VALID << " (" << PERCENT_VALID << "%) valid values"; return description.str(); } }; VTKM_MAKE_BENCHMARK(StableSortIndicesUnique5, BenchStableSortIndicesUnique, 5); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique10, BenchStableSortIndicesUnique, 10); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique15, BenchStableSortIndicesUnique, 15); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique20, BenchStableSortIndicesUnique, 20); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique25, BenchStableSortIndicesUnique, 25); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique30, BenchStableSortIndicesUnique, 30); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique35, BenchStableSortIndicesUnique, 35); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique40, BenchStableSortIndicesUnique, 40); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique45, BenchStableSortIndicesUnique, 45); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique50, BenchStableSortIndicesUnique, 50); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique75, BenchStableSortIndicesUnique, 75); VTKM_MAKE_BENCHMARK(StableSortIndicesUnique100, BenchStableSortIndicesUnique, 100); template struct BenchUnique { using ValueArrayHandle = vtkm::cont::ArrayHandle; const vtkm::Id N_VALID; const vtkm::Id PERCENT_VALID; ValueArrayHandle ValueHandle; VTKM_CONT BenchUnique(vtkm::Id percent_valid) : N_VALID((Config.ComputeSize() * percent_valid) / 100) , PERCENT_VALID(percent_valid) { vtkm::Id arraySize = Config.ComputeSize(); auto vHPortal = ValueHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillModuloTestValueKernel(N_VALID, vHPortal), arraySize); Algorithm::Sort(ValueHandle); } VTKM_CONT vtkm::Float64 operator()() { ValueArrayHandle array; Algorithm::Copy(this->ValueHandle, array); Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::Unique(array); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "Unique on " << arraySize << " values (" << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ") with " << N_VALID << " (" << PERCENT_VALID << "%) valid values"; return description.str(); } }; VTKM_MAKE_BENCHMARK(Unique5, BenchUnique, 5); VTKM_MAKE_BENCHMARK(Unique10, BenchUnique, 10); VTKM_MAKE_BENCHMARK(Unique15, BenchUnique, 15); VTKM_MAKE_BENCHMARK(Unique20, BenchUnique, 20); VTKM_MAKE_BENCHMARK(Unique25, BenchUnique, 25); VTKM_MAKE_BENCHMARK(Unique30, BenchUnique, 30); VTKM_MAKE_BENCHMARK(Unique35, BenchUnique, 35); VTKM_MAKE_BENCHMARK(Unique40, BenchUnique, 40); VTKM_MAKE_BENCHMARK(Unique45, BenchUnique, 45); VTKM_MAKE_BENCHMARK(Unique50, BenchUnique, 50); VTKM_MAKE_BENCHMARK(Unique75, BenchUnique, 75); VTKM_MAKE_BENCHMARK(Unique100, BenchUnique, 100); template struct BenchUpperBounds { using ValueArrayHandle = vtkm::cont::ArrayHandle; const vtkm::Id N_VALS; const vtkm::Id PERCENT_VALS; ValueArrayHandle InputHandle, ValueHandle; IdArrayHandle OutHandle; VTKM_CONT BenchUpperBounds(vtkm::Id percent_vals) : N_VALS((Config.ComputeSize() * percent_vals) / 100) , PERCENT_VALS(percent_vals) { vtkm::Id arraySize = Config.ComputeSize(); auto iHPortal = InputHandle.PrepareForOutput(arraySize, DeviceAdapter()); Algorithm::Schedule(FillTestValueKernel(iHPortal), arraySize); auto vHPortal = ValueHandle.PrepareForOutput(N_VALS, DeviceAdapter()); Algorithm::Schedule(FillScaledTestValueKernel(2, vHPortal), N_VALS); } VTKM_CONT vtkm::Float64 operator()() { vtkm::cont::Timer timer{ DeviceAdapter() }; timer.Start(); Algorithm::UpperBounds(InputHandle, ValueHandle, OutHandle); return timer.GetElapsedTime(); } VTKM_CONT std::string Description() const { vtkm::Id arraySize = Config.ComputeSize(); std::stringstream description; description << "UpperBounds on " << arraySize << " input and " << N_VALS << " (" << PERCENT_VALS << "%) values (input array size: " << vtkm::cont::GetHumanReadableSize(static_cast(arraySize) * sizeof(Value)) << ")"; return description.str(); } }; VTKM_MAKE_BENCHMARK(UpperBounds5, BenchUpperBounds, 5); VTKM_MAKE_BENCHMARK(UpperBounds10, BenchUpperBounds, 10); VTKM_MAKE_BENCHMARK(UpperBounds15, BenchUpperBounds, 15); VTKM_MAKE_BENCHMARK(UpperBounds20, BenchUpperBounds, 20); VTKM_MAKE_BENCHMARK(UpperBounds25, BenchUpperBounds, 25); VTKM_MAKE_BENCHMARK(UpperBounds30, BenchUpperBounds, 30); VTKM_MAKE_BENCHMARK(UpperBounds35, BenchUpperBounds, 35); VTKM_MAKE_BENCHMARK(UpperBounds40, BenchUpperBounds, 40); VTKM_MAKE_BENCHMARK(UpperBounds45, BenchUpperBounds, 45); VTKM_MAKE_BENCHMARK(UpperBounds50, BenchUpperBounds, 50); VTKM_MAKE_BENCHMARK(UpperBounds75, BenchUpperBounds, 75); VTKM_MAKE_BENCHMARK(UpperBounds100, BenchUpperBounds, 100); public: static VTKM_CONT int Run(vtkm::cont::DeviceAdapterId id) { // Run fixed bytes / size tests: for (int sizeType = 0; sizeType < 2; ++sizeType) { if (sizeType == 0 && Config.TestArraySizeBytes) { std::cout << DIVIDER << "\nTesting fixed array byte sizes\n"; Config.DoByteSizes = true; if (!Config.ExtendedTypeList) { RunInternal(id); } else { RunInternal(id); } } if (sizeType == 1 && Config.TestArraySizeValues) { std::cout << DIVIDER << "\nTesting fixed array element counts\n"; Config.DoByteSizes = false; if (!Config.ExtendedTypeList) { RunInternal(id); } else { RunInternal(id); } } } return 0; } template static VTKM_CONT void RunInternal(vtkm::cont::DeviceAdapterId id) { if (Config.BenchmarkFlags & COPY) { std::cout << DIVIDER << "\nBenchmarking Copy\n"; VTKM_RUN_BENCHMARK(Copy, ValueTypes(), id); } if (Config.BenchmarkFlags & COPY_IF) { std::cout << "\n" << DIVIDER << "\nBenchmarking CopyIf\n"; if (Config.DetailedOutputRangeScaling) { VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf10, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf15, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf20, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf30, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf35, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf40, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf45, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes(), id); } else { VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf50, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf75, ValueTypes(), id); VTKM_RUN_BENCHMARK(CopyIf100, ValueTypes(), id); } } if (Config.BenchmarkFlags & LOWER_BOUNDS) { std::cout << DIVIDER << "\nBenchmarking LowerBounds\n"; if (Config.DetailedOutputRangeScaling) { VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds10, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds15, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds20, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds30, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds35, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds40, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds45, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes(), id); } else { VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds50, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds75, ValueTypes(), id); VTKM_RUN_BENCHMARK(LowerBounds100, ValueTypes(), id); } } if (Config.BenchmarkFlags & REDUCE) { std::cout << "\n" << DIVIDER << "\nBenchmarking Reduce\n"; VTKM_RUN_BENCHMARK(Reduce, ValueTypes(), id); } if (Config.BenchmarkFlags & REDUCE_BY_KEY) { std::cout << "\n" << DIVIDER << "\nBenchmarking ReduceByKey\n"; if (Config.DetailedOutputRangeScaling) { VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey10, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey15, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey20, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey30, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey35, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey40, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey45, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes(), id); } else { VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey50, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey75, ValueTypes(), id); VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes(), id); } } if (Config.BenchmarkFlags & SCAN_INCLUSIVE) { std::cout << "\n" << DIVIDER << "\nBenchmarking ScanInclusive\n"; VTKM_RUN_BENCHMARK(ScanInclusive, ValueTypes(), id); } if (Config.BenchmarkFlags & SCAN_EXCLUSIVE) { std::cout << "\n" << DIVIDER << "\nBenchmarking ScanExclusive\n"; VTKM_RUN_BENCHMARK(ScanExclusive, ValueTypes(), id); } if (Config.BenchmarkFlags & SORT) { std::cout << "\n" << DIVIDER << "\nBenchmarking Sort\n"; VTKM_RUN_BENCHMARK(Sort, ValueTypes(), id); } if (Config.BenchmarkFlags & SORT_BY_KEY) { std::cout << "\n" << DIVIDER << "\nBenchmarking SortByKey\n"; if (Config.DetailedOutputRangeScaling) { VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey10, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey15, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey20, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey30, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey35, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey40, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey45, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes(), id); } else { VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey50, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey75, ValueTypes(), id); VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes(), id); } } if (Config.BenchmarkFlags & STABLE_SORT_INDICES) { std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Sort\n"; VTKM_RUN_BENCHMARK(StableSortIndices, ValueTypes(), id); } if (Config.BenchmarkFlags & STABLE_SORT_INDICES_UNIQUE) { std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Unique\n"; if (Config.DetailedOutputRangeScaling) { VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique10, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique15, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique20, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique30, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique35, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique40, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique45, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes(), id); } else { VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique50, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique75, ValueTypes(), id); VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes(), id); } } if (Config.BenchmarkFlags & UNIQUE) { std::cout << "\n" << DIVIDER << "\nBenchmarking Unique\n"; if (Config.DetailedOutputRangeScaling) { VTKM_RUN_BENCHMARK(Unique5, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique10, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique15, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique20, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique25, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique30, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique35, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique40, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique45, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique50, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique75, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique100, ValueTypes(), id); } else { VTKM_RUN_BENCHMARK(Unique5, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique25, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique50, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique75, ValueTypes(), id); VTKM_RUN_BENCHMARK(Unique100, ValueTypes(), id); } } if (Config.BenchmarkFlags & UPPER_BOUNDS) { std::cout << "\n" << DIVIDER << "\nBenchmarking UpperBounds\n"; if (Config.DetailedOutputRangeScaling) { VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds10, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds15, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds20, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds30, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds35, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds40, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds45, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes(), id); } else { VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds50, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds75, ValueTypes(), id); VTKM_RUN_BENCHMARK(UpperBounds100, ValueTypes(), id); } } } }; #undef ARRAY_SIZE } } // namespace vtkm::benchmarking int main(int argc, char* argv[]) { auto opt = vtkm::cont::InitializeOptions::RequireDevice; auto initConfig = vtkm::cont::Initialize(argc, argv, opt); int numThreads{ 0 }; #ifdef VTKM_ENABLE_TBB if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB()) { numThreads = tbb::task_scheduler_init::automatic; } #endif #ifdef VTKM_ENABLE_OPENMP if (initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP()) { numThreads = omp_get_max_threads(); } #endif vtkm::benchmarking::BenchDevAlgoConfig& config = vtkm::benchmarking::Config; for (size_t i = 0; i < initConfig.Arguments.size(); ++i) { std::string arg = initConfig.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); if (arg == "copy") { config.BenchmarkFlags |= vtkm::benchmarking::COPY; } else if (arg == "copyif") { config.BenchmarkFlags |= vtkm::benchmarking::COPY_IF; } else if (arg == "lowerbounds") { config.BenchmarkFlags |= vtkm::benchmarking::LOWER_BOUNDS; } else if (arg == "reduce") { config.BenchmarkFlags |= vtkm::benchmarking::REDUCE; } else if (arg == "reducebykey") { config.BenchmarkFlags |= vtkm::benchmarking::REDUCE_BY_KEY; } else if (arg == "scaninclusive") { config.BenchmarkFlags |= vtkm::benchmarking::SCAN_INCLUSIVE; } else if (arg == "scanexclusive") { config.BenchmarkFlags |= vtkm::benchmarking::SCAN_EXCLUSIVE; } else if (arg == "sort") { config.BenchmarkFlags |= vtkm::benchmarking::SORT; } else if (arg == "sortbykey") { config.BenchmarkFlags |= vtkm::benchmarking::SORT_BY_KEY; } else if (arg == "stablesortindices") { config.BenchmarkFlags |= vtkm::benchmarking::STABLE_SORT_INDICES; } else if (arg == "stablesortindicesunique") { config.BenchmarkFlags |= vtkm::benchmarking::STABLE_SORT_INDICES_UNIQUE; } else if (arg == "unique") { config.BenchmarkFlags |= vtkm::benchmarking::UNIQUE; } else if (arg == "upperbounds") { config.BenchmarkFlags |= vtkm::benchmarking::UPPER_BOUNDS; } else if (arg == "typelist") { ++i; arg = initConfig.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); if (arg == "base") { config.ExtendedTypeList = false; } else if (arg == "extended") { config.ExtendedTypeList = true; } else { std::cerr << "Unrecognized TypeList: " << initConfig.Arguments[i] << std::endl; return 1; } } else if (arg == "fixbytes") { ++i; arg = initConfig.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); if (arg == "off") { config.TestArraySizeBytes = false; } else { std::istringstream parse(arg); config.TestArraySizeBytes = true; parse >> config.ArraySizeBytes; } } else if (arg == "fixsizes") { ++i; arg = initConfig.Arguments[i]; std::transform(arg.begin(), arg.end(), arg.begin(), [](char c) { return static_cast(std::tolower(static_cast(c))); }); if (arg == "off") { config.TestArraySizeValues = false; } else { std::istringstream parse(arg); config.TestArraySizeValues = true; parse >> config.ArraySizeValues; } } else if (arg == "detailedoutputrange") { config.DetailedOutputRangeScaling = true; } else if (arg == "numthreads") { ++i; if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB() || initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP()) { std::istringstream parse(initConfig.Arguments[i]); parse >> numThreads; std::cout << "Selected " << numThreads << " " << initConfig.Device.GetName() << " threads." << std::endl; } else { std::cerr << "NumThreads not valid on this device. Ignoring." << std::endl; } } else { std::cerr << "Unrecognized benchmark: " << initConfig.Arguments[i] << std::endl; return 1; } } #ifdef VTKM_ENABLE_TBB // Must not be destroyed as long as benchmarks are running: if (initConfig.Device == vtkm::cont::DeviceAdapterTagTBB()) { tbb::task_scheduler_init init(numThreads); } #endif #ifdef VTKM_ENABLE_OPENMP if (initConfig.Device == vtkm::cont::DeviceAdapterTagOpenMP()) { omp_set_num_threads(numThreads); } #endif // TBB if (config.BenchmarkFlags == 0) { config.BenchmarkFlags = vtkm::benchmarking::ALL; } //now actually execute the benchmarks return vtkm::benchmarking::BenchmarkDeviceAdapter::Run(initConfig.Device); }