Adding micro benchmark suite
This commit is contained in:
parent
2f0205f421
commit
238d4fa759
@ -388,6 +388,150 @@ function(vtkm_worklet_unit_tests device_adapter)
|
||||
set(CUDA_NVCC_FLAGS ${old_nvcc_flags})
|
||||
endfunction(vtkm_worklet_unit_tests)
|
||||
|
||||
# Save the benchmarks to run with each device adapter
|
||||
# This is based on vtkm_save_worklet_unit_tests
|
||||
# Usage:
|
||||
#
|
||||
# vtkm_save_benchmarks( sources )
|
||||
#
|
||||
# notes: will save the sources absolute path as the
|
||||
# vtkm_benchmarks_sources global property
|
||||
function(vtkm_save_benchmarks)
|
||||
|
||||
#create the benchmarks driver when we are called, since
|
||||
#the driver expects the files to be in the same
|
||||
#directory as the test driver
|
||||
#TODO: This is probably ok to use for benchmarks as well
|
||||
create_test_sourcelist(bench_sources BenchmarkDriver.cxx ${ARGN})
|
||||
|
||||
#store the absolute path for the driver and all the test
|
||||
#files
|
||||
set(driver ${CMAKE_CURRENT_BINARY_DIR}/BenchmarkDriver.cxx)
|
||||
set(cxx_sources)
|
||||
set(cu_sources)
|
||||
|
||||
#we need to store the absolute source for the file so that
|
||||
#we can properly compile it into the benchmark driver. At
|
||||
#the same time we want to configure each file into the build
|
||||
#directory as a .cu file so that we can compile it with cuda
|
||||
#if needed
|
||||
foreach(fname ${ARGN})
|
||||
set(absPath)
|
||||
|
||||
get_filename_component(absPath ${fname} ABSOLUTE)
|
||||
get_filename_component(file_name_only ${fname} NAME_WE)
|
||||
|
||||
set(cuda_file_name "${CMAKE_CURRENT_BINARY_DIR}/${file_name_only}.cu")
|
||||
configure_file("${absPath}"
|
||||
"${cuda_file_name}"
|
||||
COPYONLY)
|
||||
list(APPEND cxx_sources ${absPath})
|
||||
list(APPEND cu_sources ${cuda_file_name})
|
||||
endforeach()
|
||||
|
||||
#we create a property that holds all the worklets to test,
|
||||
#but don't actually attempt to create a unit test with the yet.
|
||||
#That is done by each device adapter
|
||||
set_property( GLOBAL APPEND
|
||||
PROPERTY vtkm_benchmarks_sources ${cxx_sources})
|
||||
set_property( GLOBAL APPEND
|
||||
PROPERTY vtkm_benchmarks_cu_sources ${cu_sources})
|
||||
set_property( GLOBAL APPEND
|
||||
PROPERTY vtkm_benchmarks_drivers ${driver})
|
||||
|
||||
endfunction(vtkm_save_benchmarks)
|
||||
|
||||
# Call each benchmark for the given device adapter
|
||||
# Usage:
|
||||
#
|
||||
# vtkm_benchmark( device_adapter )
|
||||
#
|
||||
# notes: will look for the vtkm_benchmarks_sources global
|
||||
# property to find what are the benchmarks that need to be
|
||||
# compiled for the give device adapter
|
||||
function(vtkm_benchmarks device_adapter)
|
||||
|
||||
set(benchmark_srcs)
|
||||
get_property(benchmark_srcs GLOBAL
|
||||
PROPERTY vtkm_benchmarks_sources )
|
||||
|
||||
set(benchmark_drivers)
|
||||
get_property(benchmark_drivers GLOBAL
|
||||
PROPERTY vtkm_benchmarks_drivers )
|
||||
|
||||
#detect if we are generating a .cu files
|
||||
set(is_cuda FALSE)
|
||||
set(old_nvcc_flags ${CUDA_NVCC_FLAGS})
|
||||
if("${device_adapter}" STREQUAL "VTKM_DEVICE_ADAPTER_CUDA")
|
||||
set(is_cuda TRUE)
|
||||
#if we are generating cu files need to setup three things.
|
||||
#1. us the configured .cu files
|
||||
#2. Explicitly set the cuda device adapter as a define this is currently
|
||||
# done as a work around since the cuda executable ignores compile
|
||||
# definitions
|
||||
#3. Set BOOST_SP_DISABLE_THREADS to disable threading warnings
|
||||
#4. Disable unused function warnings
|
||||
# the FindCUDA module and helper methods don't read target level
|
||||
# properties so we have to modify CUDA_NVCC_FLAGS instead of using
|
||||
# target and source level COMPILE_FLAGS and COMPILE_DEFINITIONS
|
||||
get_property(benchmark_srcs GLOBAL PROPERTY vtkm_benchmarks_cu_sources )
|
||||
|
||||
list(APPEND CUDA_NVCC_FLAGS "-DVTKM_DEVICE_ADAPTER=${device_adapter}")
|
||||
list(APPEND CUDA_NVCC_FLAGS "-DBOOST_SP_DISABLE_THREADS")
|
||||
list(APPEND CUDA_NVCC_FLAGS "-w")
|
||||
endif()
|
||||
|
||||
|
||||
if(VTKm_ENABLE_BENCHMARKS AND VTKm_ENABLE_TESTING)
|
||||
string(REPLACE "VTKM_DEVICE_ADAPTER_" "" device_type ${device_adapter})
|
||||
|
||||
vtkm_get_kit_name(kit)
|
||||
|
||||
#inject the device adapter into the benchmark program name so each one is unique
|
||||
set(benchmark_prog Benchmarks_${device_type})
|
||||
|
||||
if(is_cuda)
|
||||
cuda_add_executable(${benchmark_prog} ${benchmark_drivers} ${benchmark_srcs})
|
||||
else()
|
||||
add_executable(${benchmark_prog} ${benchmark_drivers} ${benchmark_srcs})
|
||||
if("${device_adapter}" STREQUAL "VTKM_DEVICE_ADAPTER_TBB")
|
||||
target_link_libraries(${benchmark_prog} ${TBB_LIBRARIES})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
#disable MSVC CRT and SCL warnings as they recommend using non standard
|
||||
#c++ extensions
|
||||
set_property(TARGET ${benchmark_prog}
|
||||
APPEND PROPERTY COMPILE_DEFINITIONS
|
||||
"_SCL_SECURE_NO_WARNINGS"
|
||||
"_CRT_SECURE_NO_WARNINGS"
|
||||
)
|
||||
|
||||
#enable large object support 2^32 addressable sections
|
||||
set_property(TARGET ${benchmark_prog}
|
||||
APPEND PROPERTY COMPILE_FLAGS
|
||||
"/bigobj"
|
||||
)
|
||||
endif()
|
||||
|
||||
#increase warning level if needed, we are going to skip cuda here
|
||||
#to remove all the false positive unused function warnings that cuda
|
||||
#generates
|
||||
if(VTKm_EXTRA_COMPILER_WARNINGS)
|
||||
set_property(TARGET ${benchmark_prog}
|
||||
APPEND PROPERTY COMPILE_FLAGS ${CMAKE_CXX_FLAGS_WARN_EXTRA} )
|
||||
endif()
|
||||
|
||||
#set the device adapter on the executable
|
||||
set_property(TARGET ${benchmark_prog}
|
||||
APPEND
|
||||
PROPERTY COMPILE_DEFINITIONS "VTKM_DEVICE_ADAPTER=${device_adapter}" )
|
||||
endif()
|
||||
|
||||
set(CUDA_NVCC_FLAGS ${old_nvcc_flags})
|
||||
endfunction(vtkm_benchmarks)
|
||||
|
||||
# The Thrust project is not as careful as the VTKm project in avoiding warnings
|
||||
# on shadow variables and unused arguments. With a real GCC compiler, you
|
||||
# can disable these warnings inline, but with something like nvcc, those
|
||||
|
@ -56,6 +56,7 @@ include(CMake/VTKmCompilerExtras.cmake)
|
||||
option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
|
||||
option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
|
||||
option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON)
|
||||
option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
|
||||
|
||||
option(VTKm_USE_DOUBLE_PRECISION
|
||||
"Use double precision for floating point calculations"
|
||||
|
@ -53,3 +53,8 @@ add_subdirectory(exec)
|
||||
#-----------------------------------------------------------------------------
|
||||
#add the worklet folder
|
||||
add_subdirectory(worklet)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#add the benchmarking folder
|
||||
add_subdirectory(benchmarking)
|
||||
|
||||
|
30
vtkm/benchmarking/BenchmarkDeviceAdapter.cxx
Normal file
30
vtkm/benchmarking/BenchmarkDeviceAdapter.cxx
Normal file
@ -0,0 +1,30 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2014 Sandia Corporation.
|
||||
// Copyright 2014 UT-Battelle, LLC.
|
||||
// Copyright 2014 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#include <vtkm/cont/DeviceAdapter.h>
|
||||
|
||||
#include <vtkm/benchmarking/BenchmarkDeviceAdapter.h>
|
||||
|
||||
int BenchmarkDeviceAdapter(int, char *[])
|
||||
{
|
||||
return vtkm::benchmarking::BenchmarkDeviceAdapter
|
||||
<VTKM_DEFAULT_DEVICE_ADAPTER_TAG>::Run();
|
||||
}
|
||||
|
441
vtkm/benchmarking/BenchmarkDeviceAdapter.h
Normal file
441
vtkm/benchmarking/BenchmarkDeviceAdapter.h
Normal file
@ -0,0 +1,441 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2014 Sandia Corporation.
|
||||
// Copyright 2014 UT-Battelle, LLC.
|
||||
// Copyright 2014 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_benchmarking_BenchmarkDeviceAdapter_h
|
||||
#define vtk_m_benchmarking_BenchmarkDeviceAdapter_h
|
||||
|
||||
#include <vtkm/TypeTraits.h>
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/ArrayHandleCounting.h>
|
||||
#include <vtkm/cont/ArrayHandleConstant.h>
|
||||
#include <vtkm/cont/ArrayHandlePermutation.h>
|
||||
#include <vtkm/cont/ArrayHandleZip.h>
|
||||
#include <vtkm/cont/ArrayPortalToIterators.h>
|
||||
#include <vtkm/cont/ErrorControlOutOfMemory.h>
|
||||
#include <vtkm/cont/ErrorExecution.h>
|
||||
#include <vtkm/cont/StorageBasic.h>
|
||||
#include <vtkm/cont/Timer.h>
|
||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||
|
||||
#include <vtkm/cont/internal/DeviceAdapterError.h>
|
||||
|
||||
#include <vtkm/cont/testing/Testing.h>
|
||||
|
||||
#include <boost/random.hpp>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <ctime>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#undef NOMINMAX
|
||||
#undef WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
|
||||
namespace vtkm {
|
||||
namespace benchmarking {
|
||||
|
||||
#define ARRAY_SIZE (1 << 20)
|
||||
const static std::string DIVIDER(40, '-');
|
||||
|
||||
/// This class runs a series of micro-benchmarks to measure
|
||||
/// performance of the parallel primitives provided by each
|
||||
/// device adapter
|
||||
///
|
||||
template<class DeviceAdapterTag>
|
||||
struct BenchmarkDeviceAdapter {
|
||||
private:
|
||||
typedef vtkm::cont::StorageTagBasic StorageTagBasic;
|
||||
typedef vtkm::cont::StorageTagBasic StorageTag;
|
||||
|
||||
typedef vtkm::cont::ArrayHandle<vtkm::Id, StorageTag> IdArrayHandle;
|
||||
|
||||
typedef vtkm::cont::DeviceAdapterAlgorithm<DeviceAdapterTag>
|
||||
Algorithm;
|
||||
|
||||
typedef vtkm::cont::Timer<DeviceAdapterTag> Timer;
|
||||
|
||||
struct BenchLowerBounds {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
|
||||
std::vector<Value> input(ARRAY_SIZE, Value());
|
||||
for (size_t i = 0; i < input.size(); ++i){
|
||||
input[i] = TestValue(vtkm::Id(i), Value());
|
||||
}
|
||||
ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input);
|
||||
|
||||
// We benchmark finding indices for the elements using various
|
||||
// ratios of values to input from 5-30% of # of elements in input
|
||||
for (size_t p = 5; p <= 30; p += 5){
|
||||
size_t n_vals = (ARRAY_SIZE * p) / 100;
|
||||
std::vector<Value> values(n_vals, Value());
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(2 * i), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
IdArrayHandle out_handle;
|
||||
timer.Reset();
|
||||
Algorithm::LowerBounds(input_handle, value_handle, out_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "LowerBounds on " << ARRAY_SIZE << " input and "
|
||||
<< n_vals << " values took " << elapsed << "s\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BenchReduce {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
std::vector<Value> input(ARRAY_SIZE, Value());
|
||||
for (size_t i = 0; i < input.size(); ++i){
|
||||
input[i] = TestValue(vtkm::Id(i), Value());
|
||||
}
|
||||
ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input);
|
||||
timer.Reset();
|
||||
Algorithm::Reduce(input_handle, Value());
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "Reduce on " << ARRAY_SIZE
|
||||
<< " values took " << elapsed << "s\n";
|
||||
}
|
||||
};
|
||||
|
||||
struct BenchReduceByKey {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
// We benchmark 5% to 30% of ARRAY_SIZE keys in 5% increments
|
||||
for (size_t p = 5; p <= 30; p += 5){
|
||||
size_t n_keys = (ARRAY_SIZE * p) / 100;
|
||||
std::vector<Value> values(ARRAY_SIZE, Value());
|
||||
std::vector<vtkm::Id> keys(ARRAY_SIZE, 0);
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(i), Value());
|
||||
keys[i] = vtkm::Id(i % n_keys);
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
ValueArrayHandle values_out;
|
||||
IdArrayHandle key_handle = vtkm::cont::make_ArrayHandle(keys);
|
||||
IdArrayHandle keys_out;
|
||||
Algorithm::SortByKey(key_handle, value_handle);
|
||||
timer.Reset();
|
||||
Algorithm::ReduceByKey(key_handle, value_handle, keys_out, values_out,
|
||||
vtkm::internal::Add());
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "ReduceByKey on " << ARRAY_SIZE
|
||||
<< " values with " << n_keys << " distinct vtkm::Id"
|
||||
<< " keys took " << elapsed << "s\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BenchScanInclusive {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
std::vector<Value> values(ARRAY_SIZE, Value());
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(i), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
ValueArrayHandle out_handle;
|
||||
timer.Reset();
|
||||
Algorithm::ScanInclusive(value_handle, out_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "ScanInclusive on " << ARRAY_SIZE
|
||||
<< " values took " << elapsed << "s\n";
|
||||
}
|
||||
};
|
||||
|
||||
struct BenchScanExclusive {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
std::vector<Value> values(ARRAY_SIZE, Value());
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(i), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
ValueArrayHandle out_handle;
|
||||
timer.Reset();
|
||||
Algorithm::ScanExclusive(value_handle, out_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "ScanExclusive on " << ARRAY_SIZE
|
||||
<< " values took " << elapsed << "s\n";
|
||||
}
|
||||
};
|
||||
|
||||
/// This benchmark tests sort on a few configurations of data
|
||||
/// sorted, reverse-ordered, almost sorted and random
|
||||
/// TODO: Is it really worth testing all these possible configurations
|
||||
/// of data? How often will we really care about anything besides unsorted data?
|
||||
struct BenchSort {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
std::vector<Value> values(ARRAY_SIZE, Value());
|
||||
// Test sort on already sorted data
|
||||
{
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(i), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
timer.Reset();
|
||||
Algorithm::Sort(value_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "Sort on " << ARRAY_SIZE << " already sorted "
|
||||
<< " values took " << elapsed << "s\n";
|
||||
}
|
||||
// Test sort on reverse-sorted data
|
||||
{
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(values.size() - i), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
timer.Reset();
|
||||
Algorithm::Sort(value_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "Sort on " << ARRAY_SIZE << " reverse-ordered "
|
||||
<< " values took " << elapsed << "s\n";
|
||||
}
|
||||
// Test on almost sorted data
|
||||
{
|
||||
size_t modulus = values.size() / 4;
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(i % modulus), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
timer.Reset();
|
||||
Algorithm::Sort(value_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "Sort on " << ARRAY_SIZE << " almost-sorted "
|
||||
<< " values took " << elapsed << "s\n";
|
||||
}
|
||||
// Test on random data
|
||||
{
|
||||
boost::mt19937 rng;
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(rng()), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
timer.Reset();
|
||||
Algorithm::Sort(value_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "Sort on " << ARRAY_SIZE << " random "
|
||||
<< " values took " << elapsed << "s\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BenchSortByKey {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
boost::mt19937 rng;
|
||||
// We benchmark 5% to 30% of ARRAY_SIZE keys in 5% increments
|
||||
for (size_t p = 5; p <= 30; p += 5){
|
||||
size_t n_keys = (ARRAY_SIZE * p) / 100;
|
||||
std::vector<Value> values(ARRAY_SIZE, Value());
|
||||
std::vector<vtkm::Id> keys(ARRAY_SIZE, 0);
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(rng()), Value());
|
||||
keys[i] = vtkm::Id(i % n_keys);
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
IdArrayHandle key_handle = vtkm::cont::make_ArrayHandle(keys);
|
||||
timer.Reset();
|
||||
Algorithm::SortByKey(value_handle, key_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "SortByKey on " << ARRAY_SIZE
|
||||
<< " random values with " << n_keys << " different vtkm::Id keys took "
|
||||
<< elapsed << "s\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BenchStreamCompact {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
// We benchmark 5% to 30% valid values in 5% increments
|
||||
for (size_t p = 5; p <= 30; p += 5){
|
||||
size_t n_valid = (ARRAY_SIZE * p) / 100;
|
||||
size_t modulo = ARRAY_SIZE / n_valid;
|
||||
std::vector<Value> values(ARRAY_SIZE, Value());
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = i % modulo == 0 ? TestValue(1, Value()) : Value();
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
IdArrayHandle out_handle;
|
||||
timer.Reset();
|
||||
Algorithm::StreamCompact(value_handle, out_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "StreamCompact on " << ARRAY_SIZE << " "
|
||||
<< " values with " << out_handle.GetNumberOfValues()
|
||||
<< " valid values took " << elapsed << "s\n";
|
||||
|
||||
std::vector<vtkm::Id> stencil(ARRAY_SIZE, 0);
|
||||
for (size_t i = 0; i < stencil.size(); ++i){
|
||||
stencil[i] = i % modulo == 0 ? 1 : vtkm::Id();
|
||||
}
|
||||
IdArrayHandle stencil_handle = vtkm::cont::make_ArrayHandle(stencil);
|
||||
ValueArrayHandle out_val_handle;
|
||||
timer.Reset();
|
||||
Algorithm::StreamCompact(value_handle, stencil_handle, out_val_handle);
|
||||
elapsed = timer.GetElapsedTime();
|
||||
std::cout << "StreamCompact with stencil on " << ARRAY_SIZE
|
||||
<< " values with " << out_val_handle.GetNumberOfValues()
|
||||
<< " valid values took " << elapsed << "s\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BenchUnique {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
// We benchmark 5% to 30% valid values in 5% increments
|
||||
for (size_t p = 5; p <= 30; p += 5){
|
||||
size_t n_valid = (ARRAY_SIZE * p) / 100;
|
||||
std::vector<Value> values(ARRAY_SIZE, Value());
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(i % n_valid), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
Algorithm::Sort(value_handle);
|
||||
timer.Reset();
|
||||
Algorithm::Unique(value_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "Unique on " << ARRAY_SIZE << " values with "
|
||||
<< value_handle.GetNumberOfValues() << " valid values took "
|
||||
<< elapsed << "s\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct BenchUpperBounds {
|
||||
template<typename Value>
|
||||
VTKM_CONT_EXPORT void operator()(const Value vtkmNotUsed(v)) const {
|
||||
typedef vtkm::cont::ArrayHandle<Value, StorageTag> ValueArrayHandle;
|
||||
|
||||
Timer timer;
|
||||
std::vector<Value> input(ARRAY_SIZE, Value());
|
||||
for (size_t i = 0; i < input.size(); ++i){
|
||||
input[i] = TestValue(vtkm::Id(i), Value());
|
||||
}
|
||||
ValueArrayHandle input_handle = vtkm::cont::make_ArrayHandle(input);
|
||||
|
||||
// We benchmark finding indices for the elements using various
|
||||
// ratios of values to input from 5-30% of # of elements in input
|
||||
for (size_t p = 5; p <= 30; p += 5){
|
||||
size_t n_vals = (ARRAY_SIZE * p) / 100;
|
||||
std::vector<Value> values(n_vals, Value());
|
||||
for (size_t i = 0; i < values.size(); ++i){
|
||||
values[i] = TestValue(vtkm::Id(2 * i), Value());
|
||||
}
|
||||
ValueArrayHandle value_handle = vtkm::cont::make_ArrayHandle(values);
|
||||
IdArrayHandle out_handle;
|
||||
timer.Reset();
|
||||
Algorithm::UpperBounds(input_handle, value_handle, out_handle);
|
||||
vtkm::Float64 elapsed = timer.GetElapsedTime();
|
||||
std::cout << "UpperBounds on " << ARRAY_SIZE << " input and "
|
||||
<< n_vals << " values took " << elapsed << "s\n";
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
|
||||
struct ValueTypes : vtkm::ListTagBase<vtkm::UInt8, vtkm::UInt32, vtkm::Int32,
|
||||
vtkm::Int64, vtkm::Vec<vtkm::Int32, 2>,
|
||||
vtkm::Vec<vtkm::UInt8, 4>, vtkm::Float32,
|
||||
vtkm::Float64, vtkm::Vec<vtkm::Float64, 3>,
|
||||
vtkm::Vec<vtkm::Float32, 4> >{};
|
||||
|
||||
|
||||
static VTKM_CONT_EXPORT int Run(){
|
||||
std::cout << DIVIDER << "\nRunning DeviceAdapter benchmarks\n";
|
||||
|
||||
std::cout << DIVIDER << "\nBenchmarking LowerBounds\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchLowerBounds(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking Reduce\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchReduce(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking ReduceByKey\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchReduceByKey(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking ScanInclusive\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchScanInclusive(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking ScanExclusive\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchScanExclusive(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking Sort\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchSort(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking SortByKey\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchSortByKey(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking StreamCompact\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchStreamCompact(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking Unique\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchUnique(), ValueTypes());
|
||||
|
||||
std::cout << "\n" << DIVIDER << "\nBenchmarking UpperBounds\n";
|
||||
vtkm::testing::Testing::TryTypes(BenchUpperBounds(), ValueTypes());
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
#undef ARRAY_SIZE
|
||||
|
||||
}
|
||||
} // namespace vtkm::benchmarking
|
||||
|
||||
#endif
|
||||
|
35
vtkm/benchmarking/CMakeLists.txt
Normal file
35
vtkm/benchmarking/CMakeLists.txt
Normal file
@ -0,0 +1,35 @@
|
||||
##============================================================================
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2014 Sandia Corporation.
|
||||
## Copyright 2014 UT-Battelle, LLC.
|
||||
## Copyright 2014 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
##
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##============================================================================
|
||||
|
||||
set(benchmarks
|
||||
BenchmarkDeviceAdapter.cxx
|
||||
)
|
||||
|
||||
vtkm_save_benchmarks(${benchmarks})
|
||||
|
||||
vtkm_benchmarks(VTKM_DEVICE_ADAPTER_SERIAL)
|
||||
|
||||
if (VTKm_ENABLE_CUDA)
|
||||
vtkm_benchmarks(VTKM_DEVICE_ADAPTER_CUDA)
|
||||
endif()
|
||||
if (VTKm_ENABLE_TBB)
|
||||
vtkm_benchmarks(VTKM_DEVICE_ADAPTER_TBB)
|
||||
endif()
|
||||
|
Loading…
Reference in New Issue
Block a user