diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index dd24cba71..e34c31d0f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -132,6 +132,12 @@ when: on_success - when: never +.run_ecp_ci: &run_ecp_ci + rules: + - if: '$CI_PROJECT_PATH == "ecpcitest/vtk-m"' + when: on_success + - when: never + # General Longer Term Tasks: # - Setup clang tidy as sub-pipeline # - Setup a machine to replicate the issue in https://gitlab.kitware.com/vtk/vtk-m/-/issues/447 @@ -170,6 +176,41 @@ stages: script: - "ctest -VV -S .gitlab/ci/ctest_build.cmake" - sccache --show-stats + extends: + - .cmake_build_artifacts + +.cmake_test_linux: &cmake_test_linux + stage: test + timeout: 50 minutes + interruptible: true + before_script: + - *install_cmake + script: + - "ctest $CTEST_TIMEOUT -VV -S .gitlab/ci/ctest_test.cmake" + extends: + - .cmake_test_artifacts + +.cmake_memcheck_linux: &cmake_memcheck_linux + stage: test + timeout: 2 hours + interruptible: true + before_script: + - *install_cmake + script: + - "ctest -VV -S .gitlab/ci/ctest_memcheck.cmake" + artifacts: + expire_in: 24 hours + when: always + paths: + # The generated regression testing images + - build/*.png + - build/*.pnm + - build/*.pmm + reports: + junit: + - build/junit.xml + +.cmake_build_artifacts: &cmake_build_artifacts artifacts: expire_in: 24 hours when: always @@ -198,14 +239,7 @@ stages: # CDash files. - build/DartConfiguration.tcl -.cmake_test_linux: &cmake_test_linux - stage: test - timeout: 50 minutes - interruptible: true - before_script: - - *install_cmake - script: - - "ctest $CTEST_TIMEOUT -VV -S .gitlab/ci/ctest_test.cmake" +.cmake_test_artifacts: &cmake_test_artifacts artifacts: expire_in: 24 hours when: always @@ -227,25 +261,6 @@ stages: junit: - build/junit.xml -.cmake_memcheck_linux: &cmake_memcheck_linux - stage: test - timeout: 2 hours - interruptible: true - before_script: - - *install_cmake - script: - - "ctest -VV -S .gitlab/ci/ctest_memcheck.cmake" - artifacts: - expire_in: 24 hours - when: always - paths: - # The generated regression testing images - - build/*.png - - build/*.pnm - - build/*.pmm - reports: - junit: - - build/junit.xml include: - local: '/.gitlab/ci/centos7.yml' @@ -256,3 +271,4 @@ include: - local: '/.gitlab/ci/ubuntu1804.yml' - local: '/.gitlab/ci/ubuntu2004.yml' - local: '/.gitlab/ci/windows10.yml' + - local: '/.gitlab/ci/ascent.yml' diff --git a/.gitlab/ci/ascent.yml b/.gitlab/ci/ascent.yml new file mode 100644 index 000000000..66bf0a675 --- /dev/null +++ b/.gitlab/ci/ascent.yml @@ -0,0 +1,92 @@ +# Ad-hoc build that runs in the ECP Hardware, concretely in OLCF Ascent. + +build:ascent_gcc_cuda: + tags: [olcf, ascent, nobatch] + extends: + - .ascent_gcc_cuda + - .ascent_build + - .run_ecp_ci + - .cmake_build_artifacts + +test:ascent_gcc_cuda: + tags: [olcf, ascent, batch] + extends: + - .ascent_gcc_cuda + - .ascent_test + - .run_ecp_ci + - .cmake_test_artifacts + +.ascent_gcc_cuda: + variables: + CCACHE_BASEDIR: /gpfs/wolf/ + CCACHE_DIR: "/gpfs/wolf/proj-shared/csc331/vtk-m/ci/ccache/" + + # -isystem= is not affected by CCACHE_BASEDIR, thus we must ignore it + CCACHE_IGNOREOPTIONS: "-isystem=*" + CCACHE_NOHASHDIR: "true" + + CMAKE_BUILD_TYPE: RelWithDebInfo + CMAKE_GENERATOR: Unix Makefiles + CUSTOM_CI_BUILDS_DIR: "/gpfs/wolf/proj-shared/csc331/vtk-m/ci/runtime" + FF_ENABLE_JOB_CLEANUP: "true" + + CC: gcc + CXX: g++ + CUDAHOSTCXX: g++ + JOB_MODULES: gcc/8.1.1 spectrum-mpi lsf-tools cuda/11.2.0 + VTKM_SETTINGS: cuda+ascent+ccache + +.ascent_build: + stage: build + variables: + CTEST_MAX_PARALLELISM: 4 + before_script: + # Prep the environment + - module purge + - echo ${JOB_MODULES} + - module load git git-lfs cmake zstd ${JOB_MODULES} + - export PATH="/gpfs/wolf/proj-shared/csc331/vtk-m/ci/utils:$PATH" + - ccache -p + - ccache -z + + - git remote add lfs https://gitlab.kitware.com/vtk/vtk-m.git + - git fetch lfs + - git-lfs install + - git-lfs pull lfs + + # Start running the builds scripts + - cmake --version + - "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake" + - "ctest -VV -S .gitlab/ci/ctest_configure.cmake" + + script: + - "ctest -VV -S .gitlab/ci/ctest_build.cmake" + - ccache -s + +.ascent_test: + stage: test + variables: + GITLAB_CI_EMULATION: "true" + SCHEDULER_PARAMETERS: -P CSC331 -W 1:00 -nnodes 1 -alloc_flags gpudefault + CTEST_MAX_PARALLELISM: 8 + # Tests errors to address due to different env/arch in Ascent + # Refer to issue: https://gitlab.kitware.com/vtk/vtk-m/-/issues/652 + CTEST_EXCLUSIONS: >- + UnitTestMathSERIAL + UnitTestMathCUDA + UnitTestSerialDeviceAdapter + UnitTestAverageByKeySERIAL + UnitTestKeysSERIAL + UnitTestWorkletReduceByKeySERIAL + RegressionTestAmrArraysSERIAL + RegressionTestAmrArraysCUDA + + before_script: + # Prep the environment + - module purge + - module load git cmake ${JOB_MODULES} + + script: + - "jsrun -n1 -r1 -a1 -g1 -c7 ctest -VV -S .gitlab/ci/ctest_test.cmake || test_output=$?" + - ctest -VV -S .gitlab/ci/ctest_test_submit.cmake + - $(exit $test_output) diff --git a/.gitlab/ci/config/initial_config.cmake b/.gitlab/ci/config/initial_config.cmake index f20eac906..8b6dc70a4 100644 --- a/.gitlab/ci/config/initial_config.cmake +++ b/.gitlab/ci/config/initial_config.cmake @@ -57,7 +57,7 @@ foreach(option IN LISTS options) set(VTKm_NO_DEPRECATED_VIRTUAL "OFF" CACHE STRING "") elseif(no_testing STREQUAL option) - set(VTKm_ENABLE_TESTING OFF CACHE BOOL "") + set(VTKm_ENABLE_TESTING "OFF" CACHE STRING "") elseif(examples STREQUAL option) set(VTKm_ENABLE_EXAMPLES "ON" CACHE STRING "") @@ -108,6 +108,37 @@ foreach(option IN LISTS options) set(CMAKE_CXX_COMPILER "/opt/rocm/llvm/bin/clang++" CACHE FILEPATH "") set(VTKm_ENABLE_KOKKOS_HIP ON CACHE STRING "") set(CMAKE_HIP_ARCHITECTURES "gfx900" CACHE STRING "") + + elseif(ascent STREQUAL option) + set(CMAKE_C_FLAGS "-mcpu=power9" CACHE STRING "") + set(CMAKE_CXX_FLAGS "-mcpu=power9" CACHE STRING "") + + elseif(ccache STREQUAL option) + find_program(CCACHE_COMMAND NAMES ccache REQUIRED) + + set(CCACHE_VERSION "NotFound") + execute_process( + COMMAND ${CCACHE_COMMAND} "--version" + OUTPUT_VARIABLE CCACHE_VERSION + ECHO_ERROR_VARIABLE + ) + + string(REGEX REPLACE "\n" " " CCACHE_VERSION ${CCACHE_VERSION}) + string(REGEX REPLACE "^.*ccache version ([.0-9]*).*$" "\\1" + CCACHE_VERSION ${CCACHE_VERSION}) + + # We need a recent version of ccache in order to ignore -isystem while + # hashing keys for the building cache. + if(${CCACHE_VERSION} VERSION_GREATER_EQUAL 4) + set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_COMMAND}" CACHE STRING "") + set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_COMMAND}" CACHE STRING "") + + if(VTKm_ENABLE_CUDA) + set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_COMMAND}" CACHE STRING "") + endif() + else() + message(FATAL_ERROR "CCACHE version [${CCACHE_VERSION}] is <= 4") + endif() endif() endforeach() diff --git a/.gitlab/ci/ctest_test.cmake b/.gitlab/ci/ctest_test.cmake index c539e778f..647e278c1 100644 --- a/.gitlab/ci/ctest_test.cmake +++ b/.gitlab/ci/ctest_test.cmake @@ -11,7 +11,7 @@ ##============================================================================= # We need this CMake versions for tests -cmake_minimum_required(VERSION 3.21) +cmake_minimum_required(VERSION 3.18) # Read the files from the build directory that contain # host information ( name, parallel level, etc ) @@ -26,17 +26,27 @@ set(test_exclusions $ENV{CTEST_EXCLUSIONS} ) +string(REPLACE " " ";" test_exclusions "${test_exclusions}") string(REPLACE ";" "|" test_exclusions "${test_exclusions}") if (test_exclusions) set(test_exclusions "(${test_exclusions})") endif () +if (CMAKE_VERSION VERSION_GREATER 3.21.0) + set(junit_args OUTPUT_JUNIT "${CTEST_BINARY_DIRECTORY}/junit.xml") +endif() + +set(PARALLEL_LEVEL "10") +if (DEFINED ENV{CTEST_MAX_PARALLELISM}) + set(PARALLEL_LEVEL $ENV{CTEST_MAX_PARALLELISM}) +endif() + ctest_test(APPEND - PARALLEL_LEVEL "10" + PARALLEL_LEVEL ${PARALLEL_LEVEL} RETURN_VALUE test_result EXCLUDE "${test_exclusions}" REPEAT "UNTIL_PASS:3" - OUTPUT_JUNIT "${CTEST_BINARY_DIRECTORY}/junit.xml" + ${junit_args} ) message(STATUS "ctest_test RETURN_VALUE: ${test_result}") diff --git a/.gitlab/ci/ctest_test_submit.cmake b/.gitlab/ci/ctest_test_submit.cmake new file mode 100644 index 000000000..ec4fd328a --- /dev/null +++ b/.gitlab/ci/ctest_test_submit.cmake @@ -0,0 +1,23 @@ +##============================================================================= +## +## Copyright (c) Kitware, Inc. +## All rights reserved. +## See LICENSE.txt for details. +## +## This software is distributed WITHOUT ANY WARRANTY; without even +## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +## PURPOSE. See the above copyright notice for more information. +## +##============================================================================= + +# We need this CMake versions for tests +cmake_minimum_required(VERSION 3.18) + +# Read the files from the build directory that contain +# host information ( name, parallel level, etc ) +include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake") +ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}") + +ctest_start(APPEND) +ctest_submit(PARTS Test BUILD_ID build_id) +message(STATUS "Test submission build_id: ${build_id}") diff --git a/CTestCustom.cmake.in b/CTestCustom.cmake.in index b63189bcc..1c4a65a76 100644 --- a/CTestCustom.cmake.in +++ b/CTestCustom.cmake.in @@ -41,6 +41,12 @@ list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION # I am seeing these for the Kokkos builds, and I don't want to fight the # compiler flags there, so I'm just going to suppress those. ".*nvlink warning.*SM Arch.*not found in.*" + + # Disable warnings about third party libraries. + # Normally compilers do not generate warnings for includes using -isystem, + # however, that is not always the case, specially in exotic systems such as + # OLCF Ascent/Summit + ".*vtkm/thirdparty.*" ) list(APPEND CTEST_CUSTOM_WARNING_MATCH diff --git a/docs/CI-README.md b/docs/CI-README.md index 6b1baadb5..1028ed82e 100644 --- a/docs/CI-README.md +++ b/docs/CI-README.md @@ -1,4 +1,3 @@ - Gitlab CI =============== @@ -17,8 +16,8 @@ Gitlab CI - How to add a new tester - How to update an existing docker image -4. ECP OSTI CI - - Issues +4. ECP Continuous Integration + - OLCF Ascent testing machine # Kitware Gitlab CI @@ -258,3 +257,55 @@ sudo docker login --username= cd .gitlab/ci/docker sudo ./update_all.sh 20201230 ``` + +# ECP Continuous Integration + +## OLCF Ascent testing machine + +VTK-m provides CI builds that run at the OLCF Ascent testing cluster. OLCF +Ascent is a scaled down version of OLCF Summit which replicates the same +provisions of software and architecture found at OLCF Summit, this is very +useful for us since we are allowed to periodically and automatically branches of +VTK-m. This is a significant leap compared to our previous workflow in which we +would have someone to manually test at OLCF Summit every few months. + +The ECP Gitlab continuous integration infrastructure differs from the Kitware +Gitlab CI infrastructure at the following points: + +- Kitare Gitlab CI uses the `docker` executer as the _backend_ for its + `Gitlab-Runner` daemon whereas ECP Gitlab CI uses the Jacamar CI executer as + the _backend_ for the `Gitlab-Runner` daemon. + +- ECP Gitlab VTK-m project is a mirror Gitlab project of the main Kitware Gitlab + VTK-m repository. + +- The runners provided by the ECP Gitlab CI reside inside the OLCF Ascent + cluster. + +Jacamar CI allows us to implicitly launch jobs using the HPC job scheduler LSF. +Jacamar-CI also connects the LSF job with the GitLab project which allows us to +control its state, monitor its output, and access its artifacts. Below is a brief +diagram describing the relations between the GitLab CI instance and the job. + +![Jacamar CI with LSF](./batch_lsf.png) + +Our Ascent Pipeline is composed of two stages: + +1. The build stage, which builds VTK-m and runs in the batch nodes +2. The test stage, which runs VTK-m unit tests and runs at the compute nodes. + +Due to the isolated environment in which LFS jobs run at Ascent, we are not able +to access to our `sccache` file server as we do in our other CI builds, thus, +for this very site we provide a local installation of `ccache`. This it turns +out to provided similar hit ratios as `sscache`, since we do not have any other +CI site that runs a _Power9_ architecture. + +Lastly, builds and tests status are reported to our VTK-m CDashboard and are +displayed in the same groups as Kitware Gitlab's builds. + +As for the flavor being currently tested at ECP Ascent is VTK-m with CUDA and +GCC8. + +For a view of only ascent jobs refer to the following [link][cdash-ascent]. + +[cdash-ascent]: https://open.cdash.org/index.php?project=VTKM&filtercount=1&showfilters=1&field1=site&compare1=63&value1=ascent diff --git a/docs/batch_lsf.png b/docs/batch_lsf.png new file mode 100644 index 000000000..282b54800 Binary files /dev/null and b/docs/batch_lsf.png differ