//============================================================================ // Copyright (c) Kitware, Inc. // All rights reserved. // See LICENSE.txt for details. // // This software is distributed WITHOUT ANY WARRANTY; without even // the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR // PURPOSE. See the above copyright notice for more information. //============================================================================ #include #include #include #include #include #include #include #include #include #include #include #include "Benchmarker.h" #include #include #include #include namespace { //============================================================================== // Benchmark Parameters #define ARRAY_SIZE (1 << 22) #define CUBE_SIZE 256 using ValueTypes = vtkm::List; using InterpValueTypes = vtkm::List; //============================================================================== // Worklets and helpers // Hold configuration state (e.g. active device) vtkm::cont::InitializeResult Config; template class BlackScholes : public vtkm::worklet::WorkletMapField { T Riskfree; T Volatility; public: using ControlSignature = void(FieldIn, FieldIn, FieldIn, FieldOut, FieldOut); using ExecutionSignature = void(_1, _2, _3, _4, _5); BlackScholes(T risk, T volatility) : Riskfree(risk) , Volatility(volatility) { } VTKM_EXEC T CumulativeNormalDistribution(T d) const { const vtkm::Float32 A1 = 0.31938153f; const vtkm::Float32 A2 = -0.356563782f; const vtkm::Float32 A3 = 1.781477937f; const vtkm::Float32 A4 = -1.821255978f; const vtkm::Float32 A5 = 1.330274429f; const vtkm::Float32 RSQRT2PI = 0.39894228040143267793994605993438f; const vtkm::Float32 df = static_cast(d); const vtkm::Float32 K = 1.0f / (1.0f + 0.2316419f * vtkm::Abs(df)); vtkm::Float32 cnd = RSQRT2PI * vtkm::Exp(-0.5f * df * df) * (K * (A1 + K * (A2 + K * (A3 + K * (A4 + K * A5))))); if (df > 0.0f) { cnd = 1.0f - cnd; } return static_cast(cnd); } template VTKM_EXEC void operator()(const U& sp, const V& os, const W& oy, T& callResult, T& putResult) const { const T stockPrice = static_cast(sp); const T optionStrike = static_cast(os); const T optionYears = static_cast(oy); // Black-Scholes formula for both call and put const T sqrtYears = vtkm::Sqrt(optionYears); const T volMultSqY = this->Volatility * sqrtYears; const T d1 = (vtkm::Log(stockPrice / optionStrike) + (this->Riskfree + 0.5f * Volatility * Volatility) * optionYears) / (volMultSqY); const T d2 = d1 - volMultSqY; const T CNDD1 = CumulativeNormalDistribution(d1); const T CNDD2 = CumulativeNormalDistribution(d2); //Calculate Call and Put simultaneously T expRT = vtkm::Exp(-this->Riskfree * optionYears); callResult = stockPrice * CNDD1 - optionStrike * expRT * CNDD2; putResult = optionStrike * expRT * (1.0f - CNDD2) - stockPrice * (1.0f - CNDD1); } }; class Mag : public vtkm::worklet::WorkletMapField { public: using ControlSignature = void(FieldIn, FieldOut); using ExecutionSignature = void(_1, _2); template VTKM_EXEC void operator()(const vtkm::Vec& vec, U& result) const { result = static_cast(vtkm::Magnitude(vec)); } }; class Square : public vtkm::worklet::WorkletMapField { public: using ControlSignature = void(FieldIn, FieldOut); using ExecutionSignature = void(_1, _2); template VTKM_EXEC void operator()(T input, U& output) const { output = static_cast(input * input); } }; class Sin : public vtkm::worklet::WorkletMapField { public: using ControlSignature = void(FieldIn, FieldOut); using ExecutionSignature = void(_1, _2); template VTKM_EXEC void operator()(T input, U& output) const { output = static_cast(vtkm::Sin(input)); } }; class Cos : public vtkm::worklet::WorkletMapField { public: using ControlSignature = void(FieldIn, FieldOut); using ExecutionSignature = void(_1, _2); template VTKM_EXEC void operator()(T input, U& output) const { output = static_cast(vtkm::Cos(input)); } }; class FusedMath : public vtkm::worklet::WorkletMapField { public: using ControlSignature = void(FieldIn, FieldOut); using ExecutionSignature = void(_1, _2); template VTKM_EXEC void operator()(const vtkm::Vec& vec, T& result) const { const T m = vtkm::Magnitude(vec); result = vtkm::Cos(vtkm::Sin(m) * vtkm::Sin(m)); } template VTKM_EXEC void operator()(const vtkm::Vec&, U&) const { this->RaiseError("Mixed types unsupported."); } }; class GenerateEdges : public vtkm::worklet::WorkletVisitCellsWithPoints { public: using ControlSignature = void(CellSetIn cellset, WholeArrayOut edgeIds); using ExecutionSignature = void(PointIndices, ThreadIndices, _2); using InputDomain = _1; template VTKM_EXEC void operator()(const ConnectivityInVec& connectivity, const ThreadIndicesType threadIndices, const IdPairTableType& edgeIds) const { const vtkm::Id writeOffset = (threadIndices.GetInputIndex() * 12); const vtkm::IdComponent edgeTable[24] = { 0, 1, 1, 2, 3, 2, 0, 3, 4, 5, 5, 6, 7, 6, 4, 7, 0, 4, 1, 5, 2, 6, 3, 7 }; for (vtkm::Id i = 0; i < 12; ++i) { const vtkm::Id offset = (i * 2); const vtkm::Id2 edge(connectivity[edgeTable[offset]], connectivity[edgeTable[offset + 1]]); edgeIds.Set(writeOffset + i, edge); } } }; class InterpolateField : public vtkm::worklet::WorkletMapField { public: using ControlSignature = void(FieldIn interpolation_ids, FieldIn interpolation_weights, WholeArrayIn inputField, FieldOut output); using ExecutionSignature = void(_1, _2, _3, _4); using InputDomain = _1; template VTKM_EXEC void operator()(const vtkm::Id2& low_high, const WeightType& weight, const PortalType& inPortal, U& result) const { using T = typename PortalType::ValueType; this->DoIt(low_high, weight, inPortal, result, typename std::is_same::type{}); } template VTKM_EXEC void DoIt(const vtkm::Id2& low_high, const WeightType& weight, const PortalType& inPortal, typename PortalType::ValueType& result, std::true_type) const { //fetch the low / high values from inPortal result = vtkm::Lerp(inPortal.Get(low_high[0]), inPortal.Get(low_high[1]), weight); } template VTKM_EXEC void DoIt(const vtkm::Id2&, const WeightType&, const PortalType&, U&, std::false_type) const { //the inPortal and result need to be the same type so this version only //exists to generate code when using dynamic arrays this->RaiseError("Mixed types unsupported."); } }; class EvaluateImplicitFunction : public vtkm::worklet::WorkletMapField { public: using ControlSignature = void(FieldIn, FieldOut, ExecObject); using ExecutionSignature = void(_1, _2, _3); template VTKM_EXEC void operator()(const VecType& point, ScalarType& val, const FunctionType& function) const { val = function.Value(point); } }; class Evaluate2ImplicitFunctions : public vtkm::worklet::WorkletMapField { public: using ControlSignature = void(FieldIn, FieldOut, ExecObject, ExecObject); using ExecutionSignature = void(_1, _2, _3, _4); template VTKM_EXEC void operator()(const VecType& point, ScalarType& val, const FType1& function1, const FType2& function2) const { val = function1.Value(point) + function2.Value(point); } }; struct PassThroughFunctor { template VTKM_EXEC_CONT T operator()(const T& x) const { return x; } }; template using ArrayHandlePassThrough = vtkm::cont::ArrayHandleTransform; template struct JunkArrayHandle : vtkm::cont::ArrayHandle { }; template using BMArrayHandleMultiplexer = vtkm::cont::ArrayHandleMultiplexer, JunkArrayHandle, JunkArrayHandle, JunkArrayHandle, JunkArrayHandle, JunkArrayHandle, JunkArrayHandle, JunkArrayHandle, JunkArrayHandle, JunkArrayHandle, ArrayHandlePassThrough>; template BMArrayHandleMultiplexer make_ArrayHandleMultiplexer0(const ArrayHandleType& array) { VTKM_IS_ARRAY_HANDLE(ArrayHandleType); return BMArrayHandleMultiplexer(array); } template BMArrayHandleMultiplexer make_ArrayHandleMultiplexerN(const ArrayHandleType& array) { VTKM_IS_ARRAY_HANDLE(ArrayHandleType); return BMArrayHandleMultiplexer(ArrayHandlePassThrough(array)); } //============================================================================== // Benchmark implementations: template struct BenchBlackScholesImpl { using ValueArrayHandle = vtkm::cont::ArrayHandle; ValueArrayHandle StockPrice; ValueArrayHandle OptionStrike; ValueArrayHandle OptionYears; ::benchmark::State& State; vtkm::Id ArraySize; vtkm::cont::Timer Timer; vtkm::cont::Invoker Invoker; VTKM_CONT BenchBlackScholesImpl(::benchmark::State& state) : State{ state } , ArraySize{ ARRAY_SIZE } , Timer{ Config.Device } , Invoker{ Config.Device } { { // Initialize arrays std::mt19937 rng; std::uniform_real_distribution price_range(Value(5.0f), Value(30.0f)); std::uniform_real_distribution strike_range(Value(1.0f), Value(100.0f)); std::uniform_real_distribution year_range(Value(0.25f), Value(10.0f)); this->StockPrice.Allocate(this->ArraySize); this->OptionStrike.Allocate(this->ArraySize); this->OptionYears.Allocate(this->ArraySize); auto stockPricePortal = this->StockPrice.WritePortal(); auto optionStrikePortal = this->OptionStrike.WritePortal(); auto optionYearsPortal = this->OptionYears.WritePortal(); for (vtkm::Id i = 0; i < this->ArraySize; ++i) { stockPricePortal.Set(i, price_range(rng)); optionStrikePortal.Set(i, strike_range(rng)); optionYearsPortal.Set(i, year_range(rng)); } } { // Configure label: const vtkm::Id numBytes = this->ArraySize * static_cast(sizeof(Value)); std::ostringstream desc; desc << "NumValues:" << this->ArraySize << " (" << vtkm::cont::GetHumanReadableSize(numBytes) << ")"; this->State.SetLabel(desc.str()); } } template VTKM_CONT void Run(const BenchArrayType& stockPrice, const BenchArrayType& optionStrike, const BenchArrayType& optionYears) { static constexpr Value RISKFREE = 0.02f; static constexpr Value VOLATILITY = 0.30f; BlackScholes worklet(RISKFREE, VOLATILITY); vtkm::cont::ArrayHandle callResultHandle; vtkm::cont::ArrayHandle putResultHandle; for (auto _ : this->State) { (void)_; this->Timer.Start(); this->Invoker( worklet, stockPrice, optionStrike, optionYears, callResultHandle, putResultHandle); this->Timer.Stop(); this->State.SetIterationTime(this->Timer.GetElapsedTime()); } const int64_t iterations = static_cast(this->State.iterations()); const int64_t numValues = static_cast(this->ArraySize); this->State.SetItemsProcessed(numValues * iterations); } }; template void BenchBlackScholesStatic(::benchmark::State& state) { BenchBlackScholesImpl impl{ state }; impl.Run(impl.StockPrice, impl.OptionStrike, impl.OptionYears); }; VTKM_BENCHMARK_TEMPLATES(BenchBlackScholesStatic, ValueTypes); template void BenchBlackScholesMultiplexer0(::benchmark::State& state) { BenchBlackScholesImpl impl{ state }; impl.Run(make_ArrayHandleMultiplexer0(impl.StockPrice), make_ArrayHandleMultiplexer0(impl.OptionStrike), make_ArrayHandleMultiplexer0(impl.OptionYears)); }; VTKM_BENCHMARK_TEMPLATES(BenchBlackScholesMultiplexer0, ValueTypes); template void BenchBlackScholesMultiplexerN(::benchmark::State& state) { BenchBlackScholesImpl impl{ state }; impl.Run(make_ArrayHandleMultiplexerN(impl.StockPrice), make_ArrayHandleMultiplexerN(impl.OptionStrike), make_ArrayHandleMultiplexerN(impl.OptionYears)); }; VTKM_BENCHMARK_TEMPLATES(BenchBlackScholesMultiplexerN, ValueTypes); template struct BenchMathImpl { vtkm::cont::ArrayHandle> InputHandle; vtkm::cont::ArrayHandle TempHandle1; vtkm::cont::ArrayHandle TempHandle2; ::benchmark::State& State; vtkm::Id ArraySize; vtkm::cont::Timer Timer; vtkm::cont::Invoker Invoker; VTKM_CONT BenchMathImpl(::benchmark::State& state) : State{ state } , ArraySize{ ARRAY_SIZE } , Timer{ Config.Device } , Invoker{ Config.Device } { { // Initialize input std::mt19937 rng; std::uniform_real_distribution range; this->InputHandle.Allocate(this->ArraySize); auto portal = this->InputHandle.WritePortal(); for (vtkm::Id i = 0; i < this->ArraySize; ++i) { portal.Set(i, vtkm::Vec{ range(rng), range(rng), range(rng) }); } } } template VTKM_CONT void Run(const InputArrayType& inputHandle, const BenchArrayType& tempHandle1, const BenchArrayType& tempHandle2) { { // Configure label: const vtkm::Id numBytes = this->ArraySize * static_cast(sizeof(Value)); std::ostringstream desc; desc << "NumValues:" << this->ArraySize << " (" << vtkm::cont::GetHumanReadableSize(numBytes) << ")"; this->State.SetLabel(desc.str()); } for (auto _ : this->State) { (void)_; this->Timer.Start(); this->Invoker(Mag{}, inputHandle, tempHandle1); this->Invoker(Sin{}, tempHandle1, tempHandle2); this->Invoker(Square{}, tempHandle2, tempHandle1); this->Invoker(Cos{}, tempHandle1, tempHandle2); this->Timer.Stop(); this->State.SetIterationTime(this->Timer.GetElapsedTime()); } const int64_t iterations = static_cast(this->State.iterations()); const int64_t numValues = static_cast(this->ArraySize); this->State.SetItemsProcessed(numValues * iterations); } }; template void BenchMathStatic(::benchmark::State& state) { BenchMathImpl impl{ state }; impl.Run(impl.InputHandle, impl.TempHandle1, impl.TempHandle2); }; VTKM_BENCHMARK_TEMPLATES(BenchMathStatic, ValueTypes); template void BenchMathMultiplexer0(::benchmark::State& state) { BenchMathImpl impl{ state }; impl.Run(make_ArrayHandleMultiplexer0(impl.InputHandle), make_ArrayHandleMultiplexer0(impl.TempHandle1), make_ArrayHandleMultiplexer0(impl.TempHandle2)); }; VTKM_BENCHMARK_TEMPLATES(BenchMathMultiplexer0, ValueTypes); template void BenchMathMultiplexerN(::benchmark::State& state) { BenchMathImpl impl{ state }; impl.Run(make_ArrayHandleMultiplexerN(impl.InputHandle), make_ArrayHandleMultiplexerN(impl.TempHandle1), make_ArrayHandleMultiplexerN(impl.TempHandle2)); }; VTKM_BENCHMARK_TEMPLATES(BenchMathMultiplexerN, ValueTypes); template struct BenchFusedMathImpl { vtkm::cont::ArrayHandle> InputHandle; ::benchmark::State& State; vtkm::Id ArraySize; vtkm::cont::Timer Timer; vtkm::cont::Invoker Invoker; VTKM_CONT BenchFusedMathImpl(::benchmark::State& state) : State{ state } , ArraySize{ ARRAY_SIZE } , Timer{ Config.Device } , Invoker{ Config.Device } { { // Initialize input std::mt19937 rng; std::uniform_real_distribution range; this->InputHandle.Allocate(this->ArraySize); auto portal = this->InputHandle.WritePortal(); for (vtkm::Id i = 0; i < this->ArraySize; ++i) { portal.Set(i, vtkm::Vec{ range(rng), range(rng), range(rng) }); } } { // Configure label: const vtkm::Id numBytes = this->ArraySize * static_cast(sizeof(Value)); std::ostringstream desc; desc << "NumValues:" << this->ArraySize << " (" << vtkm::cont::GetHumanReadableSize(numBytes) << ")"; this->State.SetLabel(desc.str()); } } template VTKM_CONT void Run(const BenchArrayType& inputHandle) { vtkm::cont::ArrayHandle result; for (auto _ : this->State) { (void)_; this->Timer.Start(); this->Invoker(FusedMath{}, inputHandle, result); this->Timer.Stop(); this->State.SetIterationTime(this->Timer.GetElapsedTime()); } const int64_t iterations = static_cast(this->State.iterations()); const int64_t numValues = static_cast(this->ArraySize); this->State.SetItemsProcessed(numValues * iterations); } }; template void BenchFusedMathStatic(::benchmark::State& state) { BenchFusedMathImpl impl{ state }; impl.Run(impl.InputHandle); }; VTKM_BENCHMARK_TEMPLATES(BenchFusedMathStatic, ValueTypes); template void BenchFusedMathMultiplexer0(::benchmark::State& state) { BenchFusedMathImpl impl{ state }; impl.Run(make_ArrayHandleMultiplexer0(impl.InputHandle)); }; VTKM_BENCHMARK_TEMPLATES(BenchFusedMathMultiplexer0, ValueTypes); template void BenchFusedMathMultiplexerN(::benchmark::State& state) { BenchFusedMathImpl impl{ state }; impl.Run(make_ArrayHandleMultiplexerN(impl.InputHandle)); }; VTKM_BENCHMARK_TEMPLATES(BenchFusedMathMultiplexerN, ValueTypes); template struct BenchEdgeInterpImpl { vtkm::cont::ArrayHandle WeightHandle; vtkm::cont::ArrayHandle FieldHandle; vtkm::cont::ArrayHandle EdgePairHandle; ::benchmark::State& State; vtkm::Id CubeSize; vtkm::cont::Timer Timer; vtkm::cont::Invoker Invoker; VTKM_CONT BenchEdgeInterpImpl(::benchmark::State& state) : State{ state } , CubeSize{ CUBE_SIZE } , Timer{ Config.Device } , Invoker{ Config.Device } { { // Initialize arrays using CT = typename vtkm::VecTraits::ComponentType; std::mt19937 rng; std::uniform_real_distribution weight_range(0.0f, 1.0f); std::uniform_real_distribution field_range; //basically the core challenge is to generate an array whose //indexing pattern matches that of a edge based algorithm. // //So for this kind of problem we generate the 12 edges of each //cell and place them into array. vtkm::cont::CellSetStructured<3> cellSet; cellSet.SetPointDimensions(vtkm::Id3{ this->CubeSize, this->CubeSize, this->CubeSize }); const vtkm::Id numberOfEdges = cellSet.GetNumberOfCells() * 12; this->EdgePairHandle.Allocate(numberOfEdges); this->Invoker(GenerateEdges{}, cellSet, this->EdgePairHandle); { // Per-edge weights this->WeightHandle.Allocate(numberOfEdges); auto portal = this->WeightHandle.WritePortal(); for (vtkm::Id i = 0; i < numberOfEdges; ++i) { portal.Set(i, weight_range(rng)); } } { // Point field this->FieldHandle.Allocate(cellSet.GetNumberOfPoints()); auto portal = this->FieldHandle.WritePortal(); for (vtkm::Id i = 0; i < portal.GetNumberOfValues(); ++i) { portal.Set(i, field_range(rng)); } } } { // Configure label: const vtkm::Id numValues = this->FieldHandle.GetNumberOfValues(); const vtkm::Id numBytes = numValues * static_cast(sizeof(Value)); std::ostringstream desc; desc << "FieldValues:" << numValues << " (" << vtkm::cont::GetHumanReadableSize(numBytes) << ") | CubeSize: " << this->CubeSize; this->State.SetLabel(desc.str()); } } template VTKM_CONT void Run(const EdgePairArrayType& edgePairs, const WeightArrayType& weights, const FieldArrayType& field) { vtkm::cont::ArrayHandle result; for (auto _ : this->State) { (void)_; this->Timer.Start(); this->Invoker(InterpolateField{}, edgePairs, weights, field, result); this->Timer.Stop(); this->State.SetIterationTime(this->Timer.GetElapsedTime()); } } }; template void BenchEdgeInterpStatic(::benchmark::State& state) { BenchEdgeInterpImpl impl{ state }; impl.Run(impl.EdgePairHandle, impl.WeightHandle, impl.FieldHandle); }; VTKM_BENCHMARK_TEMPLATES(BenchEdgeInterpStatic, InterpValueTypes); struct ImplicitFunctionBenchData { vtkm::cont::ArrayHandle Points; vtkm::cont::ArrayHandle Result; vtkm::Sphere Sphere1; vtkm::Sphere Sphere2; }; static ImplicitFunctionBenchData MakeImplicitFunctionBenchData() { vtkm::Id count = ARRAY_SIZE; vtkm::FloatDefault bounds[6] = { -2.0f, 2.0f, -2.0f, 2.0f, -2.0f, 2.0f }; ImplicitFunctionBenchData data; data.Points.Allocate(count); data.Result.Allocate(count); std::default_random_engine rangen; std::uniform_real_distribution distx(bounds[0], bounds[1]); std::uniform_real_distribution disty(bounds[2], bounds[3]); std::uniform_real_distribution distz(bounds[4], bounds[5]); auto portal = data.Points.WritePortal(); for (vtkm::Id i = 0; i < count; ++i) { portal.Set(i, vtkm::make_Vec(distx(rangen), disty(rangen), distz(rangen))); } data.Sphere1 = vtkm::Sphere({ 0.22f, 0.33f, 0.44f }, 0.55f); data.Sphere2 = vtkm::Sphere({ 0.22f, 0.33f, 0.11f }, 0.77f); return data; } void BenchImplicitFunction(::benchmark::State& state) { using EvalWorklet = EvaluateImplicitFunction; const vtkm::cont::DeviceAdapterId device = Config.Device; auto data = MakeImplicitFunctionBenchData(); { std::ostringstream desc; desc << data.Points.GetNumberOfValues() << " points"; state.SetLabel(desc.str()); } EvalWorklet eval; vtkm::cont::Timer timer{ device }; vtkm::cont::Invoker invoker{ device }; for (auto _ : state) { (void)_; timer.Start(); invoker(eval, data.Points, data.Result, data.Sphere1); timer.Stop(); state.SetIterationTime(timer.GetElapsedTime()); } } VTKM_BENCHMARK(BenchImplicitFunction); void Bench2ImplicitFunctions(::benchmark::State& state) { using EvalWorklet = Evaluate2ImplicitFunctions; const vtkm::cont::DeviceAdapterId device = Config.Device; auto data = MakeImplicitFunctionBenchData(); { std::ostringstream desc; desc << data.Points.GetNumberOfValues() << " points"; state.SetLabel(desc.str()); } EvalWorklet eval; vtkm::cont::Timer timer{ device }; vtkm::cont::Invoker invoker{ device }; for (auto _ : state) { (void)_; timer.Start(); invoker(eval, data.Points, data.Result, data.Sphere1, data.Sphere2); timer.Stop(); state.SetIterationTime(timer.GetElapsedTime()); } } VTKM_BENCHMARK(Bench2ImplicitFunctions); } // end anon namespace int main(int argc, char* argv[]) { // Parse VTK-m options: auto opts = vtkm::cont::InitializeOptions::RequireDevice; std::vector args(argv, argv + argc); vtkm::bench::detail::InitializeArgs(&argc, args, opts); // Parse VTK-m options: Config = vtkm::cont::Initialize(argc, args.data(), opts); // This occurs when it is help if (opts == vtkm::cont::InitializeOptions::None) { std::cout << Config.Usage << std::endl; } else { vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device); } // handle benchmarking related args and run benchmarks: VTKM_EXECUTE_BENCHMARKS(argc, args.data()); }