vtk-m/vtkm/benchmarking/BenchmarkDeviceAdapter.cxx

929 lines
28 KiB
C++
Raw Normal View History

2015-07-06 21:44:29 +00:00
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
2015-07-06 21:44:29 +00:00
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
2015-07-06 21:44:29 +00:00
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/TypeTraits.h>
2017-05-18 14:51:24 +00:00
#include <vtkm/benchmarking/Benchmarker.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleConstant.h>
2017-05-18 14:51:24 +00:00
#include <vtkm/cont/ArrayHandleCounting.h>
#include <vtkm/cont/ArrayHandlePermutation.h>
#include <vtkm/cont/ArrayHandleZip.h>
#include <vtkm/cont/ArrayPortalToIterators.h>
2017-05-18 14:51:24 +00:00
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/StorageBasic.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/internal/DeviceAdapterError.h>
#include <vtkm/cont/testing/Testing.h>
2015-07-06 21:44:29 +00:00
#include <vtkm/worklet/StableSortIndices.h>
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
#include <algorithm>
#include <cmath>
#include <random>
#include <string>
#include <utility>
#include <vtkm/internal/Windows.h>
2017-05-18 14:29:41 +00:00
namespace vtkm
{
namespace benchmarking
{
#define ARRAY_SIZE (1 << 21)
const static std::string DIVIDER(40, '-');
2017-05-18 14:29:41 +00:00
enum BenchmarkName
{
COPY = 1,
COPY_IF = 1 << 1,
LOWER_BOUNDS = 1 << 2,
REDUCE = 1 << 3,
REDUCE_BY_KEY = 1 << 4,
SCAN_INCLUSIVE = 1 << 5,
SCAN_EXCLUSIVE = 1 << 6,
2017-05-18 14:29:41 +00:00
SORT = 1 << 7,
SORT_BY_KEY = 1 << 8,
STABLE_SORT_INDICES = 1 << 9,
STABLE_SORT_INDICES_UNIQUE = 1 << 10,
UNIQUE = 1 << 11,
UPPER_BOUNDS = 1 << 12,
2017-05-18 14:29:41 +00:00
ALL = COPY | COPY_IF | LOWER_BOUNDS | REDUCE | REDUCE_BY_KEY | SCAN_INCLUSIVE | SCAN_EXCLUSIVE |
SORT |
SORT_BY_KEY |
STABLE_SORT_INDICES |
STABLE_SORT_INDICES_UNIQUE |
2017-05-18 14:29:41 +00:00
UNIQUE |
UPPER_BOUNDS
};
/// This class runs a series of micro-benchmarks to measure
/// performance of the parallel primitives provided by each
/// device adapter
2017-05-18 14:29:41 +00:00
template <class DeviceAdapterTag>
class BenchmarkDeviceAdapter
{
typedef vtkm::cont::StorageTagBasic StorageTag;
typedef vtkm::cont::ArrayHandle<vtkm::Id, StorageTag> IdArrayHandle;
2017-05-18 14:29:41 +00:00
typedef vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag> Algorithm;
typedef vtkm::cont::Timer<DeviceAdapterTag> Timer;
public:
// Various kernels used by the different benchmarks to accelerate
// initialization of data
2017-05-18 14:29:41 +00:00
template <typename Value>
struct FillTestValueKernel : vtkm::exec::FunctorBase
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
2017-05-18 14:29:41 +00:00
typedef typename ValueArrayHandle::template ExecutionTypes<DeviceAdapterTag>::Portal PortalType;
PortalType Output;
VTKM_CONT
2017-05-18 14:29:41 +00:00
FillTestValueKernel(PortalType out)
: Output(out)
{
}
2017-05-18 14:29:41 +00:00
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i, Value())); }
};
2017-05-18 14:29:41 +00:00
template <typename Value>
struct FillScaledTestValueKernel : vtkm::exec::FunctorBase
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
2017-05-18 14:29:41 +00:00
typedef typename ValueArrayHandle::template ExecutionTypes<DeviceAdapterTag>::Portal PortalType;
PortalType Output;
const vtkm::Id IdScale;
VTKM_CONT
2017-05-18 14:29:41 +00:00
FillScaledTestValueKernel(vtkm::Id id_scale, PortalType out)
: Output(out)
, IdScale(id_scale)
{
}
2017-05-18 14:29:41 +00:00
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i * IdScale, Value())); }
};
2017-05-18 14:29:41 +00:00
template <typename Value>
struct FillModuloTestValueKernel : vtkm::exec::FunctorBase
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
2017-05-18 14:29:41 +00:00
typedef typename ValueArrayHandle::template ExecutionTypes<DeviceAdapterTag>::Portal PortalType;
PortalType Output;
const vtkm::Id Modulus;
VTKM_CONT
2017-05-18 14:29:41 +00:00
FillModuloTestValueKernel(vtkm::Id modulus, PortalType out)
: Output(out)
, Modulus(modulus)
{
}
2017-05-18 14:29:41 +00:00
VTKM_EXEC void operator()(vtkm::Id i) const { Output.Set(i, TestValue(i % Modulus, Value())); }
};
2017-05-18 14:29:41 +00:00
template <typename Value>
struct FillBinaryTestValueKernel : vtkm::exec::FunctorBase
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
2017-05-18 14:29:41 +00:00
typedef typename ValueArrayHandle::template ExecutionTypes<DeviceAdapterTag>::Portal PortalType;
PortalType Output;
const vtkm::Id Modulus;
VTKM_CONT
2017-05-18 14:29:41 +00:00
FillBinaryTestValueKernel(vtkm::Id modulus, PortalType out)
: Output(out)
, Modulus(modulus)
{
}
2017-05-18 14:29:41 +00:00
VTKM_EXEC void operator()(vtkm::Id i) const
{
Output.Set(i, i % Modulus == 0 ? TestValue(vtkm::Id(1), Value()) : Value());
}
};
private:
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchCopy
{
2016-05-10 19:22:55 +00:00
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
ValueArrayHandle ValueHandle_src;
ValueArrayHandle ValueHandle_dst;
std::mt19937 Rng;
2016-05-10 19:22:55 +00:00
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchCopy()
{
2016-05-10 19:22:55 +00:00
ValueHandle_src.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag());
ValueHandle_dst.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag());
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
for (vtkm::Id i = 0; i < ValueHandle_src.GetNumberOfValues(); ++i)
{
ValueHandle_src.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
2016-05-10 19:22:55 +00:00
}
Timer timer;
2017-05-18 14:29:41 +00:00
Algorithm::Copy(ValueHandle_src, ValueHandle_dst);
2016-05-10 19:22:55 +00:00
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
2016-05-10 19:22:55 +00:00
std::stringstream description;
description << "Copy " << ARRAY_SIZE << " values";
return description.str();
}
};
2017-05-18 14:29:41 +00:00
VTKM_MAKE_BENCHMARK(Copy, BenchCopy);
2016-05-10 19:22:55 +00:00
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchCopyIf
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
const vtkm::Id N_VALID;
ValueArrayHandle ValueHandle, OutHandle;
IdArrayHandle StencilHandle;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchCopyIf(vtkm::Id percent_valid)
: N_VALID((ARRAY_SIZE * percent_valid) / 100)
{
vtkm::Id modulo = ARRAY_SIZE / N_VALID;
2017-05-18 14:29:41 +00:00
Algorithm::Schedule(
FillTestValueKernel<Value>(ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
Algorithm::Schedule(FillBinaryTestValueKernel<vtkm::Id>(
modulo, StencilHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
Timer timer;
Algorithm::CopyIf(ValueHandle, StencilHandle, OutHandle);
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
description << "CopyIf on " << ARRAY_SIZE << " "
2017-05-18 14:29:41 +00:00
<< " values with " << OutHandle.GetNumberOfValues() << " valid values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(CopyIf5, BenchCopyIf, 5);
VTKM_MAKE_BENCHMARK(CopyIf10, BenchCopyIf, 10);
VTKM_MAKE_BENCHMARK(CopyIf15, BenchCopyIf, 15);
VTKM_MAKE_BENCHMARK(CopyIf20, BenchCopyIf, 20);
VTKM_MAKE_BENCHMARK(CopyIf25, BenchCopyIf, 25);
VTKM_MAKE_BENCHMARK(CopyIf30, BenchCopyIf, 30);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchLowerBounds
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
const vtkm::Id N_VALS;
ValueArrayHandle InputHandle, ValueHandle;
IdArrayHandle OutHandle;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchLowerBounds(vtkm::Id value_percent)
: N_VALS((ARRAY_SIZE * value_percent) / 100)
{
2017-05-18 14:29:41 +00:00
Algorithm::Schedule(
FillTestValueKernel<Value>(InputHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
Algorithm::Schedule(FillScaledTestValueKernel<Value>(
2, ValueHandle.PrepareForOutput(N_VALS, DeviceAdapterTag())),
N_VALS);
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
Timer timer;
Algorithm::LowerBounds(InputHandle, ValueHandle, OutHandle);
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
2017-05-18 14:29:41 +00:00
description << "LowerBounds on " << ARRAY_SIZE << " input and " << N_VALS << " values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(LowerBounds5, BenchLowerBounds, 5);
VTKM_MAKE_BENCHMARK(LowerBounds10, BenchLowerBounds, 10);
VTKM_MAKE_BENCHMARK(LowerBounds15, BenchLowerBounds, 15);
VTKM_MAKE_BENCHMARK(LowerBounds20, BenchLowerBounds, 20);
VTKM_MAKE_BENCHMARK(LowerBounds25, BenchLowerBounds, 25);
VTKM_MAKE_BENCHMARK(LowerBounds30, BenchLowerBounds, 30);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchReduce
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
ValueArrayHandle InputHandle;
// We don't actually use this, but we need it to prevent sufficently
// smart compilers from optimizing the Reduce call out.
Value Result;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchReduce()
{
Algorithm::Schedule(
FillTestValueKernel<Value>(InputHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
this->Result =
Algorithm::Reduce(this->InputHandle, vtkm::TypeTraits<Value>::ZeroInitialization());
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
Timer timer;
Value tmp = Algorithm::Reduce(InputHandle, vtkm::TypeTraits<Value>::ZeroInitialization());
vtkm::Float64 time = timer.GetElapsedTime();
if (tmp != this->Result)
{
this->Result = tmp;
}
return time;
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
description << "Reduce on " << ARRAY_SIZE << " values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(Reduce, BenchReduce);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchReduceByKey
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
const vtkm::Id N_KEYS;
ValueArrayHandle ValueHandle, ValuesOut;
IdArrayHandle KeyHandle, KeysOut;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchReduceByKey(vtkm::Id key_percent)
: N_KEYS((ARRAY_SIZE * key_percent) / 100)
{
2017-05-18 14:29:41 +00:00
Algorithm::Schedule(
FillTestValueKernel<Value>(ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
Algorithm::Schedule(FillModuloTestValueKernel<vtkm::Id>(
N_KEYS, KeyHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
Algorithm::SortByKey(KeyHandle, ValueHandle);
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
Timer timer;
2017-05-18 14:29:41 +00:00
Algorithm::ReduceByKey(KeyHandle, ValueHandle, KeysOut, ValuesOut, vtkm::Add());
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
2017-05-18 14:29:41 +00:00
description << "ReduceByKey on " << ARRAY_SIZE << " values with " << N_KEYS
<< " distinct vtkm::Id keys";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(ReduceByKey5, BenchReduceByKey, 5);
VTKM_MAKE_BENCHMARK(ReduceByKey10, BenchReduceByKey, 10);
VTKM_MAKE_BENCHMARK(ReduceByKey15, BenchReduceByKey, 15);
VTKM_MAKE_BENCHMARK(ReduceByKey20, BenchReduceByKey, 20);
VTKM_MAKE_BENCHMARK(ReduceByKey25, BenchReduceByKey, 25);
VTKM_MAKE_BENCHMARK(ReduceByKey30, BenchReduceByKey, 30);
VTKM_MAKE_BENCHMARK(ReduceByKey100, BenchReduceByKey, 100);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchScanInclusive
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
ValueArrayHandle ValueHandle, OutHandle;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchScanInclusive()
{
Algorithm::Schedule(
FillTestValueKernel<Value>(ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
Timer timer;
Algorithm::ScanInclusive(ValueHandle, OutHandle);
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
description << "ScanInclusive on " << ARRAY_SIZE << " values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(ScanInclusive, BenchScanInclusive);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchScanExclusive
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
ValueArrayHandle ValueHandle, OutHandle;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchScanExclusive()
{
Algorithm::Schedule(
FillTestValueKernel<Value>(ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
Timer timer;
Algorithm::ScanExclusive(ValueHandle, OutHandle);
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
description << "ScanExclusive on " << ARRAY_SIZE << " values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(ScanExclusive, BenchScanExclusive);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchSort
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
ValueArrayHandle ValueHandle;
std::mt19937 Rng;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchSort() { ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag()); }
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
for (vtkm::Id i = 0; i < ValueHandle.GetNumberOfValues(); ++i)
{
ValueHandle.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
}
Timer timer;
Algorithm::Sort(ValueHandle);
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
description << "Sort on " << ARRAY_SIZE << " random values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(Sort, BenchSort);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchSortByKey
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
std::mt19937 Rng;
vtkm::Id N_KEYS;
ValueArrayHandle ValueHandle;
IdArrayHandle KeyHandle;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchSortByKey(vtkm::Id percent_key)
: N_KEYS((ARRAY_SIZE * percent_key) / 100)
{
ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag());
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
for (vtkm::Id i = 0; i < ValueHandle.GetNumberOfValues(); ++i)
{
ValueHandle.GetPortalControl().Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
}
2017-05-18 14:29:41 +00:00
Algorithm::Schedule(FillModuloTestValueKernel<vtkm::Id>(
N_KEYS, KeyHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
Timer timer;
Algorithm::SortByKey(ValueHandle, KeyHandle);
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
2017-05-18 14:29:41 +00:00
description << "SortByKey on " << ARRAY_SIZE << " random values with " << N_KEYS
<< " different vtkm::Id keys";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(SortByKey5, BenchSortByKey, 5);
VTKM_MAKE_BENCHMARK(SortByKey10, BenchSortByKey, 10);
VTKM_MAKE_BENCHMARK(SortByKey15, BenchSortByKey, 15);
VTKM_MAKE_BENCHMARK(SortByKey20, BenchSortByKey, 20);
VTKM_MAKE_BENCHMARK(SortByKey25, BenchSortByKey, 25);
VTKM_MAKE_BENCHMARK(SortByKey30, BenchSortByKey, 30);
VTKM_MAKE_BENCHMARK(SortByKey100, BenchSortByKey, 100);
template <typename Value>
struct BenchStableSortIndices
{
using SSI = vtkm::worklet::StableSortIndices<DeviceAdapterTag>;
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
ValueArrayHandle ValueHandle;
std::mt19937 Rng;
VTKM_CONT
BenchStableSortIndices()
{
this->ValueHandle.Allocate(ARRAY_SIZE);
auto portal = this->ValueHandle.GetPortalControl();
for (vtkm::Id i = 0; i < portal.GetNumberOfValues(); ++i)
{
portal.Set(vtkm::Id(i), TestValue(vtkm::Id(Rng()), Value()));
}
}
VTKM_CONT
vtkm::Float64 operator()()
{
vtkm::cont::ArrayHandle<vtkm::Id> indices;
Algorithm::Copy(vtkm::cont::ArrayHandleIndex(ARRAY_SIZE), indices);
Timer timer;
SSI::Sort(ValueHandle, indices);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
std::stringstream description;
description << "StableSortIndices::Sort on " << ARRAY_SIZE << " random values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(StableSortIndices, BenchStableSortIndices);
template <typename Value>
struct BenchStableSortIndicesUnique
{
using SSI = vtkm::worklet::StableSortIndices<DeviceAdapterTag>;
using IndexArrayHandle = typename SSI::IndexArrayType;
using ValueArrayHandle = vtkm::cont::ArrayHandle<Value, StorageTag>;
const vtkm::Id N_VALID;
ValueArrayHandle ValueHandle;
VTKM_CONT
BenchStableSortIndicesUnique(vtkm::Id percent_valid)
: N_VALID((ARRAY_SIZE * percent_valid) / 100)
{
Algorithm::Schedule(
FillModuloTestValueKernel<Value>(
N_VALID, this->ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
}
VTKM_CONT
vtkm::Float64 operator()()
{
IndexArrayHandle indices = SSI::Sort(this->ValueHandle);
Timer timer;
SSI::Unique(this->ValueHandle, indices);
return timer.GetElapsedTime();
}
VTKM_CONT
std::string Description() const
{
std::stringstream description;
description << "StableSortIndices::Unique on " << ARRAY_SIZE << " values with "
<< this->N_VALID << " valid values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique5, BenchStableSortIndicesUnique, 5);
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique10, BenchStableSortIndicesUnique, 10);
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique15, BenchStableSortIndicesUnique, 15);
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique20, BenchStableSortIndicesUnique, 20);
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique25, BenchStableSortIndicesUnique, 25);
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique30, BenchStableSortIndicesUnique, 30);
VTKM_MAKE_BENCHMARK(StableSortIndicesUnique100, BenchStableSortIndicesUnique, 100);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchUnique
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
const vtkm::Id N_VALID;
ValueArrayHandle ValueHandle;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchUnique(vtkm::Id percent_valid)
: N_VALID((ARRAY_SIZE * percent_valid) / 100)
{
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
Algorithm::Schedule(FillModuloTestValueKernel<Value>(
N_VALID, ValueHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
Algorithm::Sort(ValueHandle);
Timer timer;
Algorithm::Unique(ValueHandle);
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
description << "Unique on " << ARRAY_SIZE << " values with "
2017-05-18 14:29:41 +00:00
<< ValueHandle.GetNumberOfValues() << " valid values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(Unique5, BenchUnique, 5);
VTKM_MAKE_BENCHMARK(Unique10, BenchUnique, 10);
VTKM_MAKE_BENCHMARK(Unique15, BenchUnique, 15);
VTKM_MAKE_BENCHMARK(Unique20, BenchUnique, 20);
VTKM_MAKE_BENCHMARK(Unique25, BenchUnique, 25);
VTKM_MAKE_BENCHMARK(Unique30, BenchUnique, 30);
2017-05-18 14:29:41 +00:00
template <typename Value>
struct BenchUpperBounds
{
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
const vtkm::Id N_VALS;
ValueArrayHandle InputHandle, ValueHandle;
IdArrayHandle OutHandle;
VTKM_CONT
2017-05-18 14:29:41 +00:00
BenchUpperBounds(vtkm::Id percent_vals)
: N_VALS((ARRAY_SIZE * percent_vals) / 100)
{
2017-05-18 14:29:41 +00:00
Algorithm::Schedule(
FillTestValueKernel<Value>(InputHandle.PrepareForOutput(ARRAY_SIZE, DeviceAdapterTag())),
ARRAY_SIZE);
Algorithm::Schedule(FillScaledTestValueKernel<Value>(
2, ValueHandle.PrepareForOutput(N_VALS, DeviceAdapterTag())),
N_VALS);
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
vtkm::Float64 operator()()
{
Timer timer;
Algorithm::UpperBounds(InputHandle, ValueHandle, OutHandle);
return timer.GetElapsedTime();
}
VTKM_CONT
2017-05-18 14:29:41 +00:00
std::string Description() const
{
std::stringstream description;
2017-05-18 14:29:41 +00:00
description << "UpperBounds on " << ARRAY_SIZE << " input and " << N_VALS << " values";
return description.str();
}
};
VTKM_MAKE_BENCHMARK(UpperBounds5, BenchUpperBounds, 5);
VTKM_MAKE_BENCHMARK(UpperBounds10, BenchUpperBounds, 10);
VTKM_MAKE_BENCHMARK(UpperBounds15, BenchUpperBounds, 15);
VTKM_MAKE_BENCHMARK(UpperBounds20, BenchUpperBounds, 20);
VTKM_MAKE_BENCHMARK(UpperBounds25, BenchUpperBounds, 25);
VTKM_MAKE_BENCHMARK(UpperBounds30, BenchUpperBounds, 30);
public:
struct ValueTypes : vtkm::ListTagBase<vtkm::UInt8,
vtkm::UInt32,
vtkm::Int32,
vtkm::Int64,
vtkm::Vec<vtkm::Int32, 2>,
vtkm::Vec<vtkm::UInt8, 4>,
vtkm::Float32,
vtkm::Float64,
vtkm::Vec<vtkm::Float64, 3>,
vtkm::Vec<vtkm::Float32, 4>>
2017-05-18 14:29:41 +00:00
{
};
2017-05-18 14:29:41 +00:00
static VTKM_CONT int Run(int benchmarks)
{
std::cout << DIVIDER << "\nRunning DeviceAdapter benchmarks\n";
2017-05-18 14:29:41 +00:00
if (benchmarks & COPY)
{
2016-05-10 19:22:55 +00:00
std::cout << DIVIDER << "\nBenchmarking Copy\n";
VTKM_RUN_BENCHMARK(Copy, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & COPY_IF)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking CopyIf\n";
VTKM_RUN_BENCHMARK(CopyIf5, ValueTypes());
VTKM_RUN_BENCHMARK(CopyIf10, ValueTypes());
VTKM_RUN_BENCHMARK(CopyIf15, ValueTypes());
VTKM_RUN_BENCHMARK(CopyIf20, ValueTypes());
VTKM_RUN_BENCHMARK(CopyIf25, ValueTypes());
VTKM_RUN_BENCHMARK(CopyIf30, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & LOWER_BOUNDS)
{
std::cout << DIVIDER << "\nBenchmarking LowerBounds\n";
VTKM_RUN_BENCHMARK(LowerBounds5, ValueTypes());
VTKM_RUN_BENCHMARK(LowerBounds10, ValueTypes());
VTKM_RUN_BENCHMARK(LowerBounds15, ValueTypes());
VTKM_RUN_BENCHMARK(LowerBounds20, ValueTypes());
VTKM_RUN_BENCHMARK(LowerBounds25, ValueTypes());
VTKM_RUN_BENCHMARK(LowerBounds30, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & REDUCE)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking Reduce\n";
VTKM_RUN_BENCHMARK(Reduce, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & REDUCE_BY_KEY)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking ReduceByKey\n";
VTKM_RUN_BENCHMARK(ReduceByKey5, ValueTypes());
VTKM_RUN_BENCHMARK(ReduceByKey10, ValueTypes());
VTKM_RUN_BENCHMARK(ReduceByKey15, ValueTypes());
VTKM_RUN_BENCHMARK(ReduceByKey20, ValueTypes());
VTKM_RUN_BENCHMARK(ReduceByKey25, ValueTypes());
VTKM_RUN_BENCHMARK(ReduceByKey30, ValueTypes());
VTKM_RUN_BENCHMARK(ReduceByKey100, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & SCAN_INCLUSIVE)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking ScanInclusive\n";
VTKM_RUN_BENCHMARK(ScanInclusive, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & SCAN_EXCLUSIVE)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking ScanExclusive\n";
VTKM_RUN_BENCHMARK(ScanExclusive, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & SORT)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking Sort\n";
VTKM_RUN_BENCHMARK(Sort, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & SORT_BY_KEY)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking SortByKey\n";
VTKM_RUN_BENCHMARK(SortByKey5, ValueTypes());
VTKM_RUN_BENCHMARK(SortByKey10, ValueTypes());
VTKM_RUN_BENCHMARK(SortByKey15, ValueTypes());
VTKM_RUN_BENCHMARK(SortByKey20, ValueTypes());
VTKM_RUN_BENCHMARK(SortByKey25, ValueTypes());
VTKM_RUN_BENCHMARK(SortByKey30, ValueTypes());
VTKM_RUN_BENCHMARK(SortByKey100, ValueTypes());
}
if (benchmarks & STABLE_SORT_INDICES)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Sort\n";
VTKM_RUN_BENCHMARK(StableSortIndices, ValueTypes());
}
if (benchmarks & STABLE_SORT_INDICES_UNIQUE)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking StableSortIndices::Unique\n";
VTKM_RUN_BENCHMARK(StableSortIndicesUnique5, ValueTypes());
VTKM_RUN_BENCHMARK(StableSortIndicesUnique10, ValueTypes());
VTKM_RUN_BENCHMARK(StableSortIndicesUnique15, ValueTypes());
VTKM_RUN_BENCHMARK(StableSortIndicesUnique20, ValueTypes());
VTKM_RUN_BENCHMARK(StableSortIndicesUnique25, ValueTypes());
VTKM_RUN_BENCHMARK(StableSortIndicesUnique30, ValueTypes());
VTKM_RUN_BENCHMARK(StableSortIndicesUnique100, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & UNIQUE)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking Unique\n";
VTKM_RUN_BENCHMARK(Unique5, ValueTypes());
VTKM_RUN_BENCHMARK(Unique10, ValueTypes());
VTKM_RUN_BENCHMARK(Unique15, ValueTypes());
VTKM_RUN_BENCHMARK(Unique20, ValueTypes());
VTKM_RUN_BENCHMARK(Unique25, ValueTypes());
VTKM_RUN_BENCHMARK(Unique30, ValueTypes());
}
2017-05-18 14:29:41 +00:00
if (benchmarks & UPPER_BOUNDS)
{
std::cout << "\n" << DIVIDER << "\nBenchmarking UpperBounds\n";
VTKM_RUN_BENCHMARK(UpperBounds5, ValueTypes());
VTKM_RUN_BENCHMARK(UpperBounds10, ValueTypes());
VTKM_RUN_BENCHMARK(UpperBounds15, ValueTypes());
VTKM_RUN_BENCHMARK(UpperBounds20, ValueTypes());
VTKM_RUN_BENCHMARK(UpperBounds25, ValueTypes());
VTKM_RUN_BENCHMARK(UpperBounds30, ValueTypes());
}
return 0;
}
};
#undef ARRAY_SIZE
}
} // namespace vtkm::benchmarking
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
2017-05-18 14:29:41 +00:00
int main(int argc, char* argv[])
2015-07-06 21:44:29 +00:00
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
int benchmarks = 0;
2017-05-18 14:29:41 +00:00
if (argc < 2)
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks = vtkm::benchmarking::ALL;
}
2017-05-18 14:29:41 +00:00
else
{
for (int i = 1; i < argc; ++i)
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
std::string arg = argv[i];
std::transform(arg.begin(), arg.end(), arg.begin(), ::tolower);
2017-05-18 14:29:41 +00:00
if (arg == "copy")
{
benchmarks |= vtkm::benchmarking::COPY;
}
2017-05-18 14:29:41 +00:00
else if (arg == "copyif")
{
benchmarks |= vtkm::benchmarking::COPY_IF;
}
2017-05-18 14:29:41 +00:00
else if (arg == "lowerbounds")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::LOWER_BOUNDS;
}
2017-05-18 14:29:41 +00:00
else if (arg == "reduce")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::REDUCE;
}
2017-05-18 14:29:41 +00:00
else if (arg == "reducebykey")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::REDUCE_BY_KEY;
}
2017-05-18 14:29:41 +00:00
else if (arg == "scaninclusive")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::SCAN_INCLUSIVE;
}
2017-05-18 14:29:41 +00:00
else if (arg == "scanexclusive")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::SCAN_EXCLUSIVE;
}
2017-05-18 14:29:41 +00:00
else if (arg == "sort")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::SORT;
}
2017-05-18 14:29:41 +00:00
else if (arg == "sortbykey")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::SORT_BY_KEY;
}
else if (arg == "stablesortindices")
{
benchmarks |= vtkm::benchmarking::STABLE_SORT_INDICES;
}
else if (arg == "stablesortindicesunique")
{
benchmarks |= vtkm::benchmarking::STABLE_SORT_INDICES_UNIQUE;
}
2017-05-18 14:29:41 +00:00
else if (arg == "unique")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::UNIQUE;
}
2017-05-18 14:29:41 +00:00
else if (arg == "upperbounds")
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
benchmarks |= vtkm::benchmarking::UPPER_BOUNDS;
}
2017-05-18 14:29:41 +00:00
else
{
Measurement and general improvements to the benchmark suite - A warm up run is done and not timed to allow for any allocation of room for output data without accounting for it in the run times. Previously this time spent allocating memory would be included in the time we measured for the benchmark. - Benchmarks are run multiple times and we then compute some statistics about the run time of the benchmark to give a better picture of the expected run time of the function. To this end we run the benchmark either 500 times or for 1.5s, whichever comes sooner (though these are easily changeable). We then perform outlier limiting by Winsorising the data (similar to how Rust's benchmarking library works) and print out the median, mean, min and max run times along with the median absolute deviation and standard deviation. - Because benchmarks are run many times they can now perform some initial setup in the constructor, eg. to fill some test input data array with values to let the main benchmark loop run faster. - To allow for benchmarks to have members of the data type being benchmarked the struct must now be templated on this type, leading to a bit of awkwardness. I've worked around this by adding the `VTKM_MAKE_BENCHMARK` and `VTKM_RUN_BENCHMARK` macros, the make benchmark macro generates a struct that has an `operator()` templated on the value type which will construct and return the benchmark functor templated on that type. The run macro will then use this generated struct to run the benchmark functor on the type list passed. You can also pass arguments to the benchmark functor's constructor through the make macro however this makes things more awkward because the name of the MakeBench struct must be different for each variation of constructor arguments (for example see `BenchLowerBounds`). - Added a short comment on how to add benchmarks in `vtkm/benchmarking/Benchmarker.h` as the new system is a bit different from how the tests work. - You can now pass an extra argument when running the benchmark suite to only benchmark specific functions, eg. `Benchmarks_TBB BenchmarkDeviceAdapter ScanInclusive Sort` will only benchmark ScanInclusive and Sort. Running without any extra arguments will run all the benchmarks as before.
2015-07-24 21:22:10 +00:00
std::cout << "Unrecognized benchmark: " << argv[i] << std::endl;
return 1;
}
}
}
//now actually execute the benchmarks
2017-05-18 14:29:41 +00:00
return vtkm::benchmarking::BenchmarkDeviceAdapter<VTKM_DEFAULT_DEVICE_ADAPTER_TAG>::Run(
benchmarks);
2015-07-06 21:44:29 +00:00
}