perftest: add perf regression test

This commit is contained in:
Vicente Adolfo Bolea Sanchez 2022-10-07 17:42:12 -04:00
parent ebdd6b55a5
commit 7e99e256bb
9 changed files with 435 additions and 0 deletions

@ -0,0 +1,129 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
#-----------------------------------------------------------------------------
# Adds a performance benchmark test
#
# add_benchmark_test(benchmark [ <filter_regex...> ])
#
# Usage:
# add_benchmark_test(FiltersBenchmark BenchThreshold BenchTetrahedralize)
#
# benchmark: Target of an executable that uses Google Benchmark.
#
# filter_regex: CMake regexes that selects the specific benchmarks within the binary
# to be used. It populates the Google Benchmark
# --benchmark_filter parameter. When multiple regexes are passed
# as independent positional arguments, they are joined using the "|"
# regex operator before populating the `--benchmark_filter` parameter
#
function(add_benchmark_test benchmark)
# We need JSON support among other things for this to work
if (CMAKE_VERSION VERSION_LESS 3.19)
message(FATAL_ERROR "Performance regression testing needs CMAKE >= 3.19")
return()
endif()
###TEST VARIABLES############################################################
# Optional positional parameters for filter_regex
set(VTKm_PERF_FILTER_NAME ".*")
if (${ARGC} GREATER_EQUAL 2)
string(REPLACE ";" "|" VTKm_PERF_FILTER_NAME "${ARGN}")
endif()
set(VTKm_PERF_REMOTE_URL "https://gitlab.kitware.com/vbolea/vtk-m-benchmark-records.git")
# Parameters for the null hypothesis test
set(VTKm_PERF_ALPHA 0.05)
set(VTKm_PERF_REPETITIONS 10)
set(VTKm_PERF_MIN_TIME 1)
set(VTKm_PERF_DIST "normal")
set(VTKm_PERF_REPO "${CMAKE_BINARY_DIR}/vtk-m-benchmark-records")
set(VTKm_PERF_COMPARE_JSON "${CMAKE_BINARY_DIR}/nocommit_${benchmark}.json")
set(VTKm_PERF_STDOUT "${CMAKE_BINARY_DIR}/benchmark_${benchmark}.stdout")
set(VTKm_PERF_COMPARE_STDOUT "${CMAKE_BINARY_DIR}/compare_${benchmark}.stdout")
if (DEFINED ENV{CI_COMMIT_SHA})
set(VTKm_PERF_COMPARE_JSON "${CMAKE_BINARY_DIR}/$ENV{CI_COMMIT_SHA}_${benchmark}.json")
endif()
set(test_name "PerformanceTest${benchmark}")
###TEST INVOKATIONS##########################################################
add_test(NAME "${test_name}Run"
COMMAND ${CMAKE_COMMAND}
"-DVTKm_PERF_BENCH_DEVICE=Any"
"-DVTKm_PERF_BENCH_PATH=${CMAKE_BINARY_DIR}/bin/${benchmark}"
"-DVTKm_PERF_FILTER_NAME=${VTKm_PERF_FILTER_NAME}"
"-DVTKm_PERF_REPETITIONS=${VTKm_PERF_REPETITIONS}"
"-DVTKm_PERF_MIN_TIME=${VTKm_PERF_MIN_TIME}"
"-DVTKm_PERF_COMPARE_JSON=${VTKm_PERF_COMPARE_JSON}"
"-DVTKm_PERF_STDOUT=${VTKm_PERF_STDOUT}"
"-DVTKm_SOURCE_DIR=${VTKm_SOURCE_DIR}"
-P "${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTestRun.cmake"
)
add_test(NAME "${test_name}Fetch"
COMMAND ${CMAKE_COMMAND}
"-DVTKm_PERF_REPO=${VTKm_PERF_REPO}"
"-DVTKm_SOURCE_DIR=${VTKm_SOURCE_DIR}"
"-DVTKm_PERF_REMOTE_URL=${VTKm_PERF_REMOTE_URL}"
-P "${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTestFetch.cmake"
)
add_test(NAME "${test_name}Upload"
COMMAND ${CMAKE_COMMAND}
"-DVTKm_PERF_REPO=${VTKm_PERF_REPO}"
"-DVTKm_PERF_COMPARE_JSON=${VTKm_PERF_COMPARE_JSON}"
"-DVTKm_SOURCE_DIR=${VTKm_SOURCE_DIR}"
-P "${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTestUpload.cmake"
)
add_test(NAME "${test_name}Report"
COMMAND ${CMAKE_COMMAND}
"-DBENCHMARK_NAME=${benchmark}"
"-DVTKm_PERF_ALPHA=${VTKm_PERF_ALPHA}"
"-DVTKm_PERF_DIST=${VTKm_PERF_DIST}"
"-DVTKm_PERF_REPO=${VTKm_PERF_REPO}"
"-DVTKm_PERF_COMPARE_JSON=${VTKm_PERF_COMPARE_JSON}"
"-DVTKm_SOURCE_DIR=${VTKm_SOURCE_DIR}"
"-DVTKm_BINARY_DIR=${VTKm_BINARY_DIR}"
"-DVTKm_PERF_COMPARE_STDOUT=${VTKm_PERF_COMPARE_STDOUT}"
-P "${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTestReport.cmake"
)
add_test(NAME "${test_name}CleanUp"
COMMAND ${CMAKE_COMMAND} -E rm -rf "${VTKm_PERF_REPO}"
)
###TEST PROPERTIES###########################################################
set_tests_properties("${test_name}Report" "${test_name}Upload"
PROPERTIES
FIXTURE_REQUIRED "${test_name}Run;${test_name}Fetch"
FIXTURE_CLEANUP "${test_name}CleanUp"
REQUIRED_FILES "${VTKm_PERF_COMPARE_JSON}")
set_tests_properties("${test_name}Run"
"${test_name}Report"
"${test_name}Upload"
"${test_name}Fetch"
"${test_name}CleanUp"
PROPERTIES RUN_SERIAL ON)
set_tests_properties(${test_name}Run PROPERTIES TIMEOUT 1800)
# Only upload when we are inside a CI build
if (NOT DEFINED ENV{CI_COMMIT_SHA} OR NOT DEFINED ENV{VTKM_BENCH_RECORDS_TOKEN})
set_tests_properties(${test_name}Upload PROPERTIES DISABLED TRUE)
endif()
endfunction()

@ -0,0 +1,17 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
include(${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTestLib.cmake)
REQUIRE_FLAG_MUTABLE("VTKm_PERF_REPO")
REQUIRE_FLAG_MUTABLE("VTKm_PERF_REMOTE_URL")
file(REMOVE_RECURSE vtk-m-benchmark-records)
execute(COMMAND /usr/bin/git clone -b records ${VTKm_PERF_REMOTE_URL} ${VTKm_PERF_REPO})

@ -0,0 +1,36 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
macro(REQUIRE_FLAG flag)
if (NOT DEFINED ${flag})
message(FATAL_ERROR "Need to pass the ${flag}")
endif()
endmacro(REQUIRE_FLAG)
macro(REQUIRE_FLAG_MUTABLE flag)
REQUIRE_FLAG(${flag})
# Env var overrides default value
if (DEFINED ENV{${flag}})
set(${flag} "$ENV{${flag}}")
endif()
endmacro(REQUIRE_FLAG_MUTABLE)
macro(execute)
execute_process(
${ARGV}
COMMAND_ECHO STDOUT
ECHO_OUTPUT_VARIABLE
ECHO_ERROR_VARIABLE
COMMAND_ERROR_IS_FATAL ANY
)
endmacro()
message("CTEST_FULL_OUTPUT")

@ -0,0 +1,77 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
include("${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTestLib.cmake")
REQUIRE_FLAG("BENCHMARK_NAME")
REQUIRE_FLAG("VTKm_PERF_COMPARE_JSON")
REQUIRE_FLAG("VTKm_PERF_COMPARE_STDOUT")
REQUIRE_FLAG_MUTABLE("VTKm_PERF_REPO")
REQUIRE_FLAG_MUTABLE("VTKm_PERF_ALPHA")
REQUIRE_FLAG_MUTABLE("VTKm_PERF_DIST")
###FIND MOST RECENT BASELINE####################################################
execute(COMMAND /usr/bin/git -C "${VTKm_SOURCE_DIR}" merge-base origin/master @
OUTPUT_VARIABLE GIT_BASE_COMMIT)
string(STRIP "${GIT_BASE_COMMIT}" GIT_BASE_COMMIT)
execute_process(COMMAND /usr/bin/git -C "${VTKm_SOURCE_DIR}" log --format=%H --first-parent "${GIT_BASE_COMMIT}"
OUTPUT_VARIABLE GIT_ANCESTOR_COMMITS
COMMAND_ECHO STDOUT
ECHO_ERROR_VARIABLE
COMMAND_ERROR_IS_FATAL ANY
)
string(REPLACE "\n" ";" GIT_ANCESTOR_COMMITS ${GIT_ANCESTOR_COMMITS})
foreach(commit IN LISTS GIT_ANCESTOR_COMMITS)
if (EXISTS "${VTKm_PERF_REPO}/${commit}_${BENCHMARK_NAME}.json")
set(BASELINE_REPORT "${VTKm_PERF_REPO}/${commit}_${BENCHMARK_NAME}.json")
break()
endif()
endforeach()
if (NOT DEFINED BASELINE_REPORT)
message(FATAL_ERROR "PerformanceTestReport: no ancestor benchmarks record found")
endif()
###RUN COMPARE_PY SCRIPT########################################################
execute(COMMAND /usr/bin/python3
"${VTKm_SOURCE_DIR}/Utilities/Scripts/compare.py"
--alpha "${VTKm_PERF_ALPHA}"
--dist "${VTKm_PERF_DIST}"
benchmarks "${BASELINE_REPORT}" "${VTKm_PERF_COMPARE_JSON}"
OUTPUT_VARIABLE compare_py_output
)
# Write compare.py output to disk
file(WRITE "${VTKm_PERF_COMPARE_STDOUT}" "${compare_py_output}")
###PERFORM NULL HYPHOTESIS######################################################
string(REGEX MATCHALL "[^\n]*time_pvalue[^\n]*" pvalues_list ${compare_py_output})
foreach(pvalue_line IN LISTS pvalues_list)
# We only take into consideration the wall time of the test
string(REGEX MATCH "(.*)/manual_time_pvalue[ \t]+([0-9.]+)[ ]+" ignoreme ${pvalue_line})
if (CMAKE_MATCH_0)
set(benchmark_name "${CMAKE_MATCH_1}")
set(benchmark_pvalue "${CMAKE_MATCH_2}")
if("${benchmark_pvalue}" LESS "${VTKm_PERF_ALPHA}")
list(APPEND time_failed_benchs ${benchmark_name})
endif()
endif()
endforeach()
if(time_failed_benchs)
string(REPLACE ";" "\n" time_failed_benchs "${time_failed_benchs}")
message(FATAL_ERROR "Time-failed Benchmarks:\n${time_failed_benchs}")
endif()

@ -0,0 +1,35 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
include("${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTestLib.cmake")
REQUIRE_FLAG("VTKm_PERF_BENCH_PATH")
REQUIRE_FLAG("VTKm_PERF_FILTER_NAME")
REQUIRE_FLAG("VTKm_PERF_COMPARE_JSON")
REQUIRE_FLAG("VTKm_PERF_STDOUT")
REQUIRE_FLAG_MUTABLE("VTKm_PERF_BENCH_DEVICE")
REQUIRE_FLAG_MUTABLE("VTKm_PERF_REPETITIONS")
REQUIRE_FLAG_MUTABLE("VTKm_PERF_MIN_TIME")
execute(
COMMAND "${VTKm_PERF_BENCH_PATH}"
--vtkm-device "${VTKm_PERF_BENCH_DEVICE}"
"--benchmark_filter=${VTKm_PERF_FILTER_NAME}"
"--benchmark_out=${VTKm_PERF_COMPARE_JSON}"
"--benchmark_repetitions=${VTKm_PERF_REPETITIONS}"
"--benchmark_min_time=${VTKm_PERF_MIN_TIME}"
--benchmark_out_format=json
--benchmark_counters_tabular=true
OUTPUT_VARIABLE report_output
)
# Write compare.py output to disk
file(WRITE "${VTKm_PERF_STDOUT}" "${report_output}")

@ -0,0 +1,23 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
include("${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTestLib.cmake")
REQUIRE_FLAG("VTKm_PERF_COMPARE_JSON")
REQUIRE_FLAG_MUTABLE("VTKm_PERF_REPO")
file(COPY "${VTKm_PERF_COMPARE_JSON}" DESTINATION "${VTKm_PERF_REPO}/")
get_filename_component(perf_report_name "${VTKm_PERF_COMPARE_JSON}" NAME)
execute(COMMAND /usr/bin/git -C "${VTKm_PERF_REPO}" config --local user.name vtk-m\ benchmark\ job)
execute(COMMAND /usr/bin/git -C "${VTKm_PERF_REPO}" config --local user.email do_not_email_the_robot@kitware.com)
execute(COMMAND /usr/bin/git -C "${VTKm_PERF_REPO}" add "${perf_report_name}")
execute(COMMAND /usr/bin/git -C "${VTKm_PERF_REPO}" commit -m "Added ${perf_report_name} record")
execute(COMMAND /usr/bin/git -C "${VTKm_PERF_REPO}" push origin records)

@ -63,3 +63,18 @@ if(TARGET vtkm_rendering)
add_benchmark(NAME BenchmarkRayTracing FILE BenchmarkRayTracing.cxx LIBS vtkm_rendering vtkm_source)
add_benchmark(NAME BenchmarkInSitu FILE BenchmarkInSitu.cxx LIBS vtkm_rendering vtkm_source vtkm_filter vtkm_io)
endif()
if(VTKm_ENABLE_PERFORMANCE_TESTING)
include("${VTKm_SOURCE_DIR}/CMake/testing/VTKmPerformanceTest.cmake")
add_benchmark_test(BenchmarkFilters
BenchThreshold
BenchContour/IsStructuredDataSet:1/NIsoVals:12/MergePts:1/GenNormals:0
BenchContour/IsStructuredDataSet:1/NIsoVals:12/MergePts:0/GenNormals:1/FastNormals:1
BenchContour/IsStructuredDataSet:0/NIsoVals:12/MergePts:1/GenNormals:0
BenchContour/IsStructuredDataSet:0/NIsoVals:12/MergePts:0/GenNormals:/FastNormals:1
BenchTetrahedralize
BenchVertexClustering/NumDivs:256)
if(TARGET vtkm_rendering)
add_benchmark_test(BenchmarkInSitu "BenchContour")
endif()
endif()

@ -19,6 +19,10 @@ Gitlab CI
4. ECP Continuous Integration
- OLCF Ascent testing machine
5. Automated Performance Regression tests
- Overview
- Details
# Kitware Gitlab CI
GitLab CI/CD allows for software development through continous integration, delivery, and deployment.
@ -308,4 +312,103 @@ GCC8.
For a view of only ascent jobs refer to the following [link][cdash-ascent].
# Automated Performance Regression tests
## Overview
The design of the performance regression test is composed of the following
components:
1. The Kitware Gitlab instance which trigger the benchmark jobs when a git
commit is pushed.
2. Gitlab CI jobs for performing the benchmarks and for generating the
comparison with the historical results.
3. A Git repository that is used for storing the historical results.
4. The Kitware CDASH instance which files, displays the performance report and
inform the developer if a performance regression has occurred.
The performance regression test is performed whenever a git commit is pushed.
The job _performancetest_ which invoke the benchmark suite in a Gitlab runner
job and later compare its results against the historical results, stored in
CDASH, of its most immediate master ancestor. The results of this comparison are
then displayed in a brief report in the form of a comment in its corresponding
Gitlab merge-request.
![perftest_arch](perftest_arch.png)
## Details
### Selection of Benchmarks
While we can possibly run all of the provided benchmarks in the continuous
build track to avoid potential performance and latency issues in the CI, I
have initially limited the benchmark suites to:
- BenchmarkFilters
- BenchmarkInsitu
### Benchmark ctest
We provide a CMake function named `add_benchmark_test` which sets the
performance regression test for the given Google Benchmark suite. It admits one
argument to filter the number of benchmarks to be executed. If ran locally, it
will not upload the results to the online record repository.
### Requirements
- Python3 with the SciPy package
- Benchmark tests will be enabled in a CMAKE build that sets both
`VTKm_ENABLE_BENCHMARKS` and `VTKm_ENABLE_PERFORMANCE_TESTING`
### New Gitlab Runner requirements
- It must have disabled every type of CPU scaling option both at the BIOS and
Kernel level (`cpugovern`).
- It must provide a gitlab runner with a concurrency level 1 to avoid other
jobs being scheduled while the benchmark is being executed.
### How to make sense of the results
Results of both of the benchmark and the comparison against its most recent
commit ancestor can be accessed in the CDASH Notes for the performance
regression build. The CDASH Notes can be accessed by clicking the note-like
miniature image in the build name column.
Performance Regressions test that report a performance failure are reported in
the form of a test failure of the test `PerformanceTest($TestName)Report`. The
results of the comparison can be seen by clicking this failed test.
Performance regression test success is determined by the performance of a null
hypothesis test with the hypothesis that the given tests performs similarly
or better than the baseline test with a confidence level 1-alpha. If a pvalue is
small enough (less than alpha), we reject the null hypothesis and we report that
the current commit introduces a performance regression. By default we use a
t-distribution with an alpha value of 5%. The pvalues can be seen in the
uploaded reports.
The following parameters can be modified by editing the corresponding
environmental variables:
- Alpha value: `VTKm_PERF_ALPHA`
- Minimum number of repetitions for each benchmark: `VTKm_PERF_REPETITIONS`
- Minimum time to spend for each benchmark: `VTKm_PERF_MIN_TIME`
- Statistical distribution to use: `VTKm_PERF_DIST`
Below is an example of this raw output of the comparison of the current commit
against the baseline results:
```
Benchmark Time CPU Time Old Time New CPU Old CPU New
------------------------------------------------------------------------------------------------------------
BenchThreshold/manual_time +0.0043 +0.0036 73 73 92 92
BenchThreshold/manual_time +0.0074 +0.0060 73 73 91 92
BenchThreshold/manual_time -0.0003 -0.0007 73 73 92 92
BenchThreshold/manual_time -0.0019 -0.0018 73 73 92 92
BenchThreshold/manual_time -0.0021 -0.0017 73 73 92 92
BenchThreshold/manual_time +0.0001 +0.0006 73 73 92 92
BenchThreshold/manual_time +0.0033 +0.0031 73 73 92 92
BenchThreshold/manual_time -0.0071 -0.0057 73 73 92 92
BenchThreshold/manual_time -0.0050 -0.0041 73 73 92 92
```
[cdash-ascent]: https://open.cdash.org/index.php?project=VTKM&filtercount=1&showfilters=1&field1=site&compare1=63&value1=ascent

BIN
docs/perftest_arch.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 70 KiB