From 238d4fa7594330486bc31c2023c05af6d0735cc0 Mon Sep 17 00:00:00 2001 From: Will Usher Date: Mon, 6 Jul 2015 15:44:29 -0600 Subject: [PATCH] Adding micro benchmark suite --- CMake/VTKmMacros.cmake | 144 ++++++ CMakeLists.txt | 1 + vtkm/CMakeLists.txt | 5 + vtkm/benchmarking/BenchmarkDeviceAdapter.cxx | 30 ++ vtkm/benchmarking/BenchmarkDeviceAdapter.h | 441 +++++++++++++++++++ vtkm/benchmarking/CMakeLists.txt | 35 ++ 6 files changed, 656 insertions(+) create mode 100644 vtkm/benchmarking/BenchmarkDeviceAdapter.cxx create mode 100644 vtkm/benchmarking/BenchmarkDeviceAdapter.h create mode 100644 vtkm/benchmarking/CMakeLists.txt diff --git a/CMake/VTKmMacros.cmake b/CMake/VTKmMacros.cmake index 02f97393e..23daf6e3b 100644 --- a/CMake/VTKmMacros.cmake +++ b/CMake/VTKmMacros.cmake @@ -388,6 +388,150 @@ function(vtkm_worklet_unit_tests device_adapter) set(CUDA_NVCC_FLAGS ${old_nvcc_flags}) endfunction(vtkm_worklet_unit_tests) +# Save the benchmarks to run with each device adapter +# This is based on vtkm_save_worklet_unit_tests +# Usage: +# +# vtkm_save_benchmarks( sources ) +# +# notes: will save the sources absolute path as the +# vtkm_benchmarks_sources global property +function(vtkm_save_benchmarks) + + #create the benchmarks driver when we are called, since + #the driver expects the files to be in the same + #directory as the test driver + #TODO: This is probably ok to use for benchmarks as well + create_test_sourcelist(bench_sources BenchmarkDriver.cxx ${ARGN}) + + #store the absolute path for the driver and all the test + #files + set(driver ${CMAKE_CURRENT_BINARY_DIR}/BenchmarkDriver.cxx) + set(cxx_sources) + set(cu_sources) + + #we need to store the absolute source for the file so that + #we can properly compile it into the benchmark driver. At + #the same time we want to configure each file into the build + #directory as a .cu file so that we can compile it with cuda + #if needed + foreach(fname ${ARGN}) + set(absPath) + + get_filename_component(absPath ${fname} ABSOLUTE) + get_filename_component(file_name_only ${fname} NAME_WE) + + set(cuda_file_name "${CMAKE_CURRENT_BINARY_DIR}/${file_name_only}.cu") + configure_file("${absPath}" + "${cuda_file_name}" + COPYONLY) + list(APPEND cxx_sources ${absPath}) + list(APPEND cu_sources ${cuda_file_name}) + endforeach() + + #we create a property that holds all the worklets to test, + #but don't actually attempt to create a unit test with the yet. + #That is done by each device adapter + set_property( GLOBAL APPEND + PROPERTY vtkm_benchmarks_sources ${cxx_sources}) + set_property( GLOBAL APPEND + PROPERTY vtkm_benchmarks_cu_sources ${cu_sources}) + set_property( GLOBAL APPEND + PROPERTY vtkm_benchmarks_drivers ${driver}) + +endfunction(vtkm_save_benchmarks) + +# Call each benchmark for the given device adapter +# Usage: +# +# vtkm_benchmark( device_adapter ) +# +# notes: will look for the vtkm_benchmarks_sources global +# property to find what are the benchmarks that need to be +# compiled for the give device adapter +function(vtkm_benchmarks device_adapter) + + set(benchmark_srcs) + get_property(benchmark_srcs GLOBAL + PROPERTY vtkm_benchmarks_sources ) + + set(benchmark_drivers) + get_property(benchmark_drivers GLOBAL + PROPERTY vtkm_benchmarks_drivers ) + + #detect if we are generating a .cu files + set(is_cuda FALSE) + set(old_nvcc_flags ${CUDA_NVCC_FLAGS}) + if("${device_adapter}" STREQUAL "VTKM_DEVICE_ADAPTER_CUDA") + set(is_cuda TRUE) + #if we are generating cu files need to setup three things. + #1. us the configured .cu files + #2. Explicitly set the cuda device adapter as a define this is currently + # done as a work around since the cuda executable ignores compile + # definitions + #3. Set BOOST_SP_DISABLE_THREADS to disable threading warnings + #4. Disable unused function warnings + # the FindCUDA module and helper methods don't read target level + # properties so we have to modify CUDA_NVCC_FLAGS instead of using + # target and source level COMPILE_FLAGS and COMPILE_DEFINITIONS + get_property(benchmark_srcs GLOBAL PROPERTY vtkm_benchmarks_cu_sources ) + + list(APPEND CUDA_NVCC_FLAGS "-DVTKM_DEVICE_ADAPTER=${device_adapter}") + list(APPEND CUDA_NVCC_FLAGS "-DBOOST_SP_DISABLE_THREADS") + list(APPEND CUDA_NVCC_FLAGS "-w") + endif() + + + if(VTKm_ENABLE_BENCHMARKS AND VTKm_ENABLE_TESTING) + string(REPLACE "VTKM_DEVICE_ADAPTER_" "" device_type ${device_adapter}) + + vtkm_get_kit_name(kit) + + #inject the device adapter into the benchmark program name so each one is unique + set(benchmark_prog Benchmarks_${device_type}) + + if(is_cuda) + cuda_add_executable(${benchmark_prog} ${benchmark_drivers} ${benchmark_srcs}) + else() + add_executable(${benchmark_prog} ${benchmark_drivers} ${benchmark_srcs}) + if("${device_adapter}" STREQUAL "VTKM_DEVICE_ADAPTER_TBB") + target_link_libraries(${benchmark_prog} ${TBB_LIBRARIES}) + endif() + endif() + + if(MSVC) + #disable MSVC CRT and SCL warnings as they recommend using non standard + #c++ extensions + set_property(TARGET ${benchmark_prog} + APPEND PROPERTY COMPILE_DEFINITIONS + "_SCL_SECURE_NO_WARNINGS" + "_CRT_SECURE_NO_WARNINGS" + ) + + #enable large object support 2^32 addressable sections + set_property(TARGET ${benchmark_prog} + APPEND PROPERTY COMPILE_FLAGS + "/bigobj" + ) + endif() + + #increase warning level if needed, we are going to skip cuda here + #to remove all the false positive unused function warnings that cuda + #generates + if(VTKm_EXTRA_COMPILER_WARNINGS) + set_property(TARGET ${benchmark_prog} + APPEND PROPERTY COMPILE_FLAGS ${CMAKE_CXX_FLAGS_WARN_EXTRA} ) + endif() + + #set the device adapter on the executable + set_property(TARGET ${benchmark_prog} + APPEND + PROPERTY COMPILE_DEFINITIONS "VTKM_DEVICE_ADAPTER=${device_adapter}" ) + endif() + + set(CUDA_NVCC_FLAGS ${old_nvcc_flags}) +endfunction(vtkm_benchmarks) + # The Thrust project is not as careful as the VTKm project in avoiding warnings # on shadow variables and unused arguments. With a real GCC compiler, you # can disable these warnings inline, but with something like nvcc, those diff --git a/CMakeLists.txt b/CMakeLists.txt index df60c1487..91d8bdae1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,6 +56,7 @@ include(CMake/VTKmCompilerExtras.cmake) option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF) option(VTKm_ENABLE_TBB "Enable TBB support" OFF) option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON) +option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF) option(VTKm_USE_DOUBLE_PRECISION "Use double precision for floating point calculations" diff --git a/vtkm/CMakeLists.txt b/vtkm/CMakeLists.txt index db1a920bc..3796750b8 100644 --- a/vtkm/CMakeLists.txt +++ b/vtkm/CMakeLists.txt @@ -53,3 +53,8 @@ add_subdirectory(exec) #----------------------------------------------------------------------------- #add the worklet folder add_subdirectory(worklet) + +#----------------------------------------------------------------------------- +#add the benchmarking folder +add_subdirectory(benchmarking) + diff --git a/vtkm/benchmarking/BenchmarkDeviceAdapter.cxx b/vtkm/benchmarking/BenchmarkDeviceAdapter.cxx new file mode 100644 index 000000000..20a49e1a0 --- /dev/null +++ b/vtkm/benchmarking/BenchmarkDeviceAdapter.cxx @@ -0,0 +1,30 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2014 Sandia Corporation. +// Copyright 2014 UT-Battelle, LLC. +// Copyright 2014 Los Alamos National Security. +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================================ + +#include + +#include + +int BenchmarkDeviceAdapter(int, char *[]) +{ + return vtkm::benchmarking::BenchmarkDeviceAdapter + ::Run(); +} + diff --git a/vtkm/benchmarking/BenchmarkDeviceAdapter.h b/vtkm/benchmarking/BenchmarkDeviceAdapter.h new file mode 100644 index 000000000..9309ab79f --- /dev/null +++ b/vtkm/benchmarking/BenchmarkDeviceAdapter.h @@ -0,0 +1,441 @@ +//============================================================================ +// Copyright (c) Kitware, Inc. +// All rights reserved. +// See LICENSE.txt for details. +// This software is distributed WITHOUT ANY WARRANTY; without even +// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +// PURPOSE. See the above copyright notice for more information. +// +// Copyright 2014 Sandia Corporation. +// Copyright 2014 UT-Battelle, LLC. +// Copyright 2014 Los Alamos National Security. +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +// Laboratory (LANL), the U.S. Government retains certain rights in +// this software. +//============================================================================ + +#ifndef vtk_m_benchmarking_BenchmarkDeviceAdapter_h +#define vtk_m_benchmarking_BenchmarkDeviceAdapter_h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#undef NOMINMAX +#undef WIN32_LEAN_AND_MEAN +#endif + +namespace vtkm { +namespace benchmarking { + +#define ARRAY_SIZE (1 << 20) +const static std::string DIVIDER(40, '-'); + +/// This class runs a series of micro-benchmarks to measure +/// performance of the parallel primitives provided by each +/// device adapter +/// +template +struct BenchmarkDeviceAdapter { +private: + typedef vtkm::cont::StorageTagBasic StorageTagBasic; + typedef vtkm::cont::StorageTagBasic StorageTag; + + typedef vtkm::cont::ArrayHandle IdArrayHandle; + + typedef vtkm::cont::DeviceAdapterAlgorithm + Algorithm; + + typedef vtkm::cont::Timer Timer; + + struct BenchLowerBounds { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + + std::vector input(ARRAY_SIZE, Value()); + for (size_t i = 0; i < input.size(); ++i){ + input[i] = TestValue(vtkm::Id(i), Value()); + } + ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input); + + // We benchmark finding indices for the elements using various + // ratios of values to input from 5-30% of # of elements in input + for (size_t p = 5; p <= 30; p += 5){ + size_t n_vals = (ARRAY_SIZE * p) / 100; + std::vector values(n_vals, Value()); + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(2 * i), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + IdArrayHandle out_handle; + timer.Reset(); + Algorithm::LowerBounds(input_handle, value_handle, out_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "LowerBounds on " << ARRAY_SIZE << " input and " + << n_vals << " values took " << elapsed << "s\n"; + } + } + }; + + struct BenchReduce { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + std::vector input(ARRAY_SIZE, Value()); + for (size_t i = 0; i < input.size(); ++i){ + input[i] = TestValue(vtkm::Id(i), Value()); + } + ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input); + timer.Reset(); + Algorithm::Reduce(input_handle, Value()); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "Reduce on " << ARRAY_SIZE + << " values took " << elapsed << "s\n"; + } + }; + + struct BenchReduceByKey { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + // We benchmark 5% to 30% of ARRAY_SIZE keys in 5% increments + for (size_t p = 5; p <= 30; p += 5){ + size_t n_keys = (ARRAY_SIZE * p) / 100; + std::vector values(ARRAY_SIZE, Value()); + std::vector keys(ARRAY_SIZE, 0); + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(i), Value()); + keys[i] = vtkm::Id(i % n_keys); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + ValueArrayHandle values_out; + IdArrayHandle key_handle = vtkm::cont::make_ArrayHandle(keys); + IdArrayHandle keys_out; + Algorithm::SortByKey(key_handle, value_handle); + timer.Reset(); + Algorithm::ReduceByKey(key_handle, value_handle, keys_out, values_out, + vtkm::internal::Add()); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "ReduceByKey on " << ARRAY_SIZE + << " values with " << n_keys << " distinct vtkm::Id" + << " keys took " << elapsed << "s\n"; + } + } + }; + + struct BenchScanInclusive { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + std::vector values(ARRAY_SIZE, Value()); + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(i), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + ValueArrayHandle out_handle; + timer.Reset(); + Algorithm::ScanInclusive(value_handle, out_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "ScanInclusive on " << ARRAY_SIZE + << " values took " << elapsed << "s\n"; + } + }; + + struct BenchScanExclusive { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + std::vector values(ARRAY_SIZE, Value()); + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(i), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + ValueArrayHandle out_handle; + timer.Reset(); + Algorithm::ScanExclusive(value_handle, out_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "ScanExclusive on " << ARRAY_SIZE + << " values took " << elapsed << "s\n"; + } + }; + + /// This benchmark tests sort on a few configurations of data + /// sorted, reverse-ordered, almost sorted and random + /// TODO: Is it really worth testing all these possible configurations + /// of data? How often will we really care about anything besides unsorted data? + struct BenchSort { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + std::vector values(ARRAY_SIZE, Value()); + // Test sort on already sorted data + { + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(i), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + timer.Reset(); + Algorithm::Sort(value_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "Sort on " << ARRAY_SIZE << " already sorted " + << " values took " << elapsed << "s\n"; + } + // Test sort on reverse-sorted data + { + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(values.size() - i), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + timer.Reset(); + Algorithm::Sort(value_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "Sort on " << ARRAY_SIZE << " reverse-ordered " + << " values took " << elapsed << "s\n"; + } + // Test on almost sorted data + { + size_t modulus = values.size() / 4; + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(i % modulus), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + timer.Reset(); + Algorithm::Sort(value_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "Sort on " << ARRAY_SIZE << " almost-sorted " + << " values took " << elapsed << "s\n"; + } + // Test on random data + { + boost::mt19937 rng; + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(rng()), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + timer.Reset(); + Algorithm::Sort(value_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "Sort on " << ARRAY_SIZE << " random " + << " values took " << elapsed << "s\n"; + } + } + }; + + struct BenchSortByKey { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + boost::mt19937 rng; + // We benchmark 5% to 30% of ARRAY_SIZE keys in 5% increments + for (size_t p = 5; p <= 30; p += 5){ + size_t n_keys = (ARRAY_SIZE * p) / 100; + std::vector values(ARRAY_SIZE, Value()); + std::vector keys(ARRAY_SIZE, 0); + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(rng()), Value()); + keys[i] = vtkm::Id(i % n_keys); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + IdArrayHandle key_handle = vtkm::cont::make_ArrayHandle(keys); + timer.Reset(); + Algorithm::SortByKey(value_handle, key_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "SortByKey on " << ARRAY_SIZE + << " random values with " << n_keys << " different vtkm::Id keys took " + << elapsed << "s\n"; + } + } + }; + + struct BenchStreamCompact { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + // We benchmark 5% to 30% valid values in 5% increments + for (size_t p = 5; p <= 30; p += 5){ + size_t n_valid = (ARRAY_SIZE * p) / 100; + size_t modulo = ARRAY_SIZE / n_valid; + std::vector values(ARRAY_SIZE, Value()); + for (size_t i = 0; i < values.size(); ++i){ + values[i] = i % modulo == 0 ? TestValue(1, Value()) : Value(); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + IdArrayHandle out_handle; + timer.Reset(); + Algorithm::StreamCompact(value_handle, out_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "StreamCompact on " << ARRAY_SIZE << " " + << " values with " << out_handle.GetNumberOfValues() + << " valid values took " << elapsed << "s\n"; + + std::vector stencil(ARRAY_SIZE, 0); + for (size_t i = 0; i < stencil.size(); ++i){ + stencil[i] = i % modulo == 0 ? 1 : vtkm::Id(); + } + IdArrayHandle stencil_handle = vtkm::cont::make_ArrayHandle(stencil); + ValueArrayHandle out_val_handle; + timer.Reset(); + Algorithm::StreamCompact(value_handle, stencil_handle, out_val_handle); + elapsed = timer.GetElapsedTime(); + std::cout << "StreamCompact with stencil on " << ARRAY_SIZE + << " values with " << out_val_handle.GetNumberOfValues() + << " valid values took " << elapsed << "s\n"; + } + } + }; + + struct BenchUnique { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + // We benchmark 5% to 30% valid values in 5% increments + for (size_t p = 5; p <= 30; p += 5){ + size_t n_valid = (ARRAY_SIZE * p) / 100; + std::vector values(ARRAY_SIZE, Value()); + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(i % n_valid), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + Algorithm::Sort(value_handle); + timer.Reset(); + Algorithm::Unique(value_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "Unique on " << ARRAY_SIZE << " values with " + << value_handle.GetNumberOfValues() << " valid values took " + << elapsed << "s\n"; + } + } + }; + + struct BenchUpperBounds { + template + VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const { + typedef vtkm::cont::ArrayHandle ValueArrayHandle; + + Timer timer; + std::vector input(ARRAY_SIZE, Value()); + for (size_t i = 0; i < input.size(); ++i){ + input[i] = TestValue(vtkm::Id(i), Value()); + } + ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input); + + // We benchmark finding indices for the elements using various + // ratios of values to input from 5-30% of # of elements in input + for (size_t p = 5; p <= 30; p += 5){ + size_t n_vals = (ARRAY_SIZE * p) / 100; + std::vector values(n_vals, Value()); + for (size_t i = 0; i < values.size(); ++i){ + values[i] = TestValue(vtkm::Id(2 * i), Value()); + } + ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values); + IdArrayHandle out_handle; + timer.Reset(); + Algorithm::UpperBounds(input_handle, value_handle, out_handle); + vtkm::Float64 elapsed = timer.GetElapsedTime(); + std::cout << "UpperBounds on " << ARRAY_SIZE << " input and " + << n_vals << " values took " << elapsed << "s\n"; + } + } + }; + +public: + + struct ValueTypes : vtkm::ListTagBase, + vtkm::Vec, vtkm::Float32, + vtkm::Float64, vtkm::Vec, + vtkm::Vec >{}; + + + static VTKM_CONT_EXPORT int Run(){ + std::cout << DIVIDER << "\nRunning DeviceAdapter benchmarks\n"; + + std::cout << DIVIDER << "\nBenchmarking LowerBounds\n"; + vtkm::testing::Testing::TryTypes(BenchLowerBounds(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking Reduce\n"; + vtkm::testing::Testing::TryTypes(BenchReduce(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking ReduceByKey\n"; + vtkm::testing::Testing::TryTypes(BenchReduceByKey(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking ScanInclusive\n"; + vtkm::testing::Testing::TryTypes(BenchScanInclusive(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking ScanExclusive\n"; + vtkm::testing::Testing::TryTypes(BenchScanExclusive(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking Sort\n"; + vtkm::testing::Testing::TryTypes(BenchSort(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking SortByKey\n"; + vtkm::testing::Testing::TryTypes(BenchSortByKey(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking StreamCompact\n"; + vtkm::testing::Testing::TryTypes(BenchStreamCompact(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking Unique\n"; + vtkm::testing::Testing::TryTypes(BenchUnique(), ValueTypes()); + + std::cout << "\n" << DIVIDER << "\nBenchmarking UpperBounds\n"; + vtkm::testing::Testing::TryTypes(BenchUpperBounds(), ValueTypes()); + return 0; + } +}; + +#undef ARRAY_SIZE + +} +} // namespace vtkm::benchmarking + +#endif + diff --git a/vtkm/benchmarking/CMakeLists.txt b/vtkm/benchmarking/CMakeLists.txt new file mode 100644 index 000000000..8934552d3 --- /dev/null +++ b/vtkm/benchmarking/CMakeLists.txt @@ -0,0 +1,35 @@ +##============================================================================ +## Copyright (c) Kitware, Inc. +## All rights reserved. +## See LICENSE.txt for details. +## This software is distributed WITHOUT ANY WARRANTY; without even +## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +## PURPOSE. See the above copyright notice for more information. +## +## Copyright 2014 Sandia Corporation. +## Copyright 2014 UT-Battelle, LLC. +## Copyright 2014 Los Alamos National Security. +## +## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +## the U.S. Government retains certain rights in this software. +## +## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National +## Laboratory (LANL), the U.S. Government retains certain rights in +## this software. +##============================================================================ + +set(benchmarks + BenchmarkDeviceAdapter.cxx + ) + +vtkm_save_benchmarks(${benchmarks}) + +vtkm_benchmarks(VTKM_DEVICE_ADAPTER_SERIAL) + +if (VTKm_ENABLE_CUDA) + vtkm_benchmarks(VTKM_DEVICE_ADAPTER_CUDA) +endif() +if (VTKm_ENABLE_TBB) + vtkm_benchmarks(VTKM_DEVICE_ADAPTER_TBB) +endif() +