ci: Add OLCF GitLab-CI

Co-authored-by: Vicente Bolea <vicente.bolea@kitware.com>
Co-authored-by: Chuck Atkins <chuck.atkins@kitware.com
This commit is contained in:
Vicente Adolfo Bolea Sanchez 2021-12-14 16:42:25 -05:00
parent 557055b9cc
commit 67b92d3365
8 changed files with 263 additions and 34 deletions

@ -132,6 +132,12 @@
when: on_success
- when: never
.run_ecp_ci: &run_ecp_ci
rules:
- if: '$CI_PROJECT_PATH == "ecpcitest/vtk-m"'
when: on_success
- when: never
# General Longer Term Tasks:
# - Setup clang tidy as sub-pipeline
# - Setup a machine to replicate the issue in https://gitlab.kitware.com/vtk/vtk-m/-/issues/447
@ -170,6 +176,41 @@ stages:
script:
- "ctest -VV -S .gitlab/ci/ctest_build.cmake"
- sccache --show-stats
extends:
- .cmake_build_artifacts
.cmake_test_linux: &cmake_test_linux
stage: test
timeout: 50 minutes
interruptible: true
before_script:
- *install_cmake
script:
- "ctest $CTEST_TIMEOUT -VV -S .gitlab/ci/ctest_test.cmake"
extends:
- .cmake_test_artifacts
.cmake_memcheck_linux: &cmake_memcheck_linux
stage: test
timeout: 2 hours
interruptible: true
before_script:
- *install_cmake
script:
- "ctest -VV -S .gitlab/ci/ctest_memcheck.cmake"
artifacts:
expire_in: 24 hours
when: always
paths:
# The generated regression testing images
- build/*.png
- build/*.pnm
- build/*.pmm
reports:
junit:
- build/junit.xml
.cmake_build_artifacts: &cmake_build_artifacts
artifacts:
expire_in: 24 hours
when: always
@ -198,14 +239,7 @@ stages:
# CDash files.
- build/DartConfiguration.tcl
.cmake_test_linux: &cmake_test_linux
stage: test
timeout: 50 minutes
interruptible: true
before_script:
- *install_cmake
script:
- "ctest $CTEST_TIMEOUT -VV -S .gitlab/ci/ctest_test.cmake"
.cmake_test_artifacts: &cmake_test_artifacts
artifacts:
expire_in: 24 hours
when: always
@ -227,25 +261,6 @@ stages:
junit:
- build/junit.xml
.cmake_memcheck_linux: &cmake_memcheck_linux
stage: test
timeout: 2 hours
interruptible: true
before_script:
- *install_cmake
script:
- "ctest -VV -S .gitlab/ci/ctest_memcheck.cmake"
artifacts:
expire_in: 24 hours
when: always
paths:
# The generated regression testing images
- build/*.png
- build/*.pnm
- build/*.pmm
reports:
junit:
- build/junit.xml
include:
- local: '/.gitlab/ci/centos7.yml'
@ -256,3 +271,4 @@ include:
- local: '/.gitlab/ci/ubuntu1804.yml'
- local: '/.gitlab/ci/ubuntu2004.yml'
- local: '/.gitlab/ci/windows10.yml'
- local: '/.gitlab/ci/ascent.yml'

92
.gitlab/ci/ascent.yml Normal file

@ -0,0 +1,92 @@
# Ad-hoc build that runs in the ECP Hardware, concretely in OLCF Ascent.
build:ascent_gcc_cuda:
tags: [olcf, ascent, nobatch]
extends:
- .ascent_gcc_cuda
- .ascent_build
- .run_ecp_ci
- .cmake_build_artifacts
test:ascent_gcc_cuda:
tags: [olcf, ascent, batch]
extends:
- .ascent_gcc_cuda
- .ascent_test
- .run_ecp_ci
- .cmake_test_artifacts
.ascent_gcc_cuda:
variables:
CCACHE_BASEDIR: /gpfs/wolf/
CCACHE_DIR: "/gpfs/wolf/proj-shared/csc331/vtk-m/ci/ccache/"
# -isystem= is not affected by CCACHE_BASEDIR, thus we must ignore it
CCACHE_IGNOREOPTIONS: "-isystem=*"
CCACHE_NOHASHDIR: "true"
CMAKE_BUILD_TYPE: RelWithDebInfo
CMAKE_GENERATOR: Unix Makefiles
CUSTOM_CI_BUILDS_DIR: "/gpfs/wolf/proj-shared/csc331/vtk-m/ci/runtime"
FF_ENABLE_JOB_CLEANUP: "true"
CC: gcc
CXX: g++
CUDAHOSTCXX: g++
JOB_MODULES: gcc/8.1.1 spectrum-mpi lsf-tools cuda/11.2.0
VTKM_SETTINGS: cuda+ascent+ccache
.ascent_build:
stage: build
variables:
CTEST_MAX_PARALLELISM: 4
before_script:
# Prep the environment
- module purge
- echo ${JOB_MODULES}
- module load git git-lfs cmake zstd ${JOB_MODULES}
- export PATH="/gpfs/wolf/proj-shared/csc331/vtk-m/ci/utils:$PATH"
- ccache -p
- ccache -z
- git remote add lfs https://gitlab.kitware.com/vtk/vtk-m.git
- git fetch lfs
- git-lfs install
- git-lfs pull lfs
# Start running the builds scripts
- cmake --version
- "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
- "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
script:
- "ctest -VV -S .gitlab/ci/ctest_build.cmake"
- ccache -s
.ascent_test:
stage: test
variables:
GITLAB_CI_EMULATION: "true"
SCHEDULER_PARAMETERS: -P CSC331 -W 1:00 -nnodes 1 -alloc_flags gpudefault
CTEST_MAX_PARALLELISM: 8
# Tests errors to address due to different env/arch in Ascent
# Refer to issue: https://gitlab.kitware.com/vtk/vtk-m/-/issues/652
CTEST_EXCLUSIONS: >-
UnitTestMathSERIAL
UnitTestMathCUDA
UnitTestSerialDeviceAdapter
UnitTestAverageByKeySERIAL
UnitTestKeysSERIAL
UnitTestWorkletReduceByKeySERIAL
RegressionTestAmrArraysSERIAL
RegressionTestAmrArraysCUDA
before_script:
# Prep the environment
- module purge
- module load git cmake ${JOB_MODULES}
script:
- "jsrun -n1 -r1 -a1 -g1 -c7 ctest -VV -S .gitlab/ci/ctest_test.cmake || test_output=$?"
- ctest -VV -S .gitlab/ci/ctest_test_submit.cmake
- $(exit $test_output)

@ -57,7 +57,7 @@ foreach(option IN LISTS options)
set(VTKm_NO_DEPRECATED_VIRTUAL "OFF" CACHE STRING "")
elseif(no_testing STREQUAL option)
set(VTKm_ENABLE_TESTING OFF CACHE BOOL "")
set(VTKm_ENABLE_TESTING "OFF" CACHE STRING "")
elseif(examples STREQUAL option)
set(VTKm_ENABLE_EXAMPLES "ON" CACHE STRING "")
@ -108,6 +108,37 @@ foreach(option IN LISTS options)
set(CMAKE_CXX_COMPILER "/opt/rocm/llvm/bin/clang++" CACHE FILEPATH "")
set(VTKm_ENABLE_KOKKOS_HIP ON CACHE STRING "")
set(CMAKE_HIP_ARCHITECTURES "gfx900" CACHE STRING "")
elseif(ascent STREQUAL option)
set(CMAKE_C_FLAGS "-mcpu=power9" CACHE STRING "")
set(CMAKE_CXX_FLAGS "-mcpu=power9" CACHE STRING "")
elseif(ccache STREQUAL option)
find_program(CCACHE_COMMAND NAMES ccache REQUIRED)
set(CCACHE_VERSION "NotFound")
execute_process(
COMMAND ${CCACHE_COMMAND} "--version"
OUTPUT_VARIABLE CCACHE_VERSION
ECHO_ERROR_VARIABLE
)
string(REGEX REPLACE "\n" " " CCACHE_VERSION ${CCACHE_VERSION})
string(REGEX REPLACE "^.*ccache version ([.0-9]*).*$" "\\1"
CCACHE_VERSION ${CCACHE_VERSION})
# We need a recent version of ccache in order to ignore -isystem while
# hashing keys for the building cache.
if(${CCACHE_VERSION} VERSION_GREATER_EQUAL 4)
set(CMAKE_C_COMPILER_LAUNCHER "${CCACHE_COMMAND}" CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER "${CCACHE_COMMAND}" CACHE STRING "")
if(VTKm_ENABLE_CUDA)
set(CMAKE_CUDA_COMPILER_LAUNCHER "${CCACHE_COMMAND}" CACHE STRING "")
endif()
else()
message(FATAL_ERROR "CCACHE version [${CCACHE_VERSION}] is <= 4")
endif()
endif()
endforeach()

@ -11,7 +11,7 @@
##=============================================================================
# We need this CMake versions for tests
cmake_minimum_required(VERSION 3.21)
cmake_minimum_required(VERSION 3.18)
# Read the files from the build directory that contain
# host information ( name, parallel level, etc )
@ -26,17 +26,27 @@ set(test_exclusions
$ENV{CTEST_EXCLUSIONS}
)
string(REPLACE " " ";" test_exclusions "${test_exclusions}")
string(REPLACE ";" "|" test_exclusions "${test_exclusions}")
if (test_exclusions)
set(test_exclusions "(${test_exclusions})")
endif ()
if (CMAKE_VERSION VERSION_GREATER 3.21.0)
set(junit_args OUTPUT_JUNIT "${CTEST_BINARY_DIRECTORY}/junit.xml")
endif()
set(PARALLEL_LEVEL "10")
if (DEFINED ENV{CTEST_MAX_PARALLELISM})
set(PARALLEL_LEVEL $ENV{CTEST_MAX_PARALLELISM})
endif()
ctest_test(APPEND
PARALLEL_LEVEL "10"
PARALLEL_LEVEL ${PARALLEL_LEVEL}
RETURN_VALUE test_result
EXCLUDE "${test_exclusions}"
REPEAT "UNTIL_PASS:3"
OUTPUT_JUNIT "${CTEST_BINARY_DIRECTORY}/junit.xml"
${junit_args}
)
message(STATUS "ctest_test RETURN_VALUE: ${test_result}")

@ -0,0 +1,23 @@
##=============================================================================
##
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
##=============================================================================
# We need this CMake versions for tests
cmake_minimum_required(VERSION 3.18)
# Read the files from the build directory that contain
# host information ( name, parallel level, etc )
include("$ENV{CI_PROJECT_DIR}/build/CIState.cmake")
ctest_read_custom_files("${CTEST_BINARY_DIRECTORY}")
ctest_start(APPEND)
ctest_submit(PARTS Test BUILD_ID build_id)
message(STATUS "Test submission build_id: ${build_id}")

@ -41,6 +41,12 @@ list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION
# I am seeing these for the Kokkos builds, and I don't want to fight the
# compiler flags there, so I'm just going to suppress those.
".*nvlink warning.*SM Arch.*not found in.*"
# Disable warnings about third party libraries.
# Normally compilers do not generate warnings for includes using -isystem,
# however, that is not always the case, specially in exotic systems such as
# OLCF Ascent/Summit
".*vtkm/thirdparty.*"
)
list(APPEND CTEST_CUSTOM_WARNING_MATCH

@ -1,4 +1,3 @@
Gitlab CI
===============
@ -17,8 +16,8 @@ Gitlab CI
- How to add a new tester
- How to update an existing docker image
4. ECP OSTI CI
- Issues
4. ECP Continuous Integration
- OLCF Ascent testing machine
# Kitware Gitlab CI
@ -258,3 +257,55 @@ sudo docker login --username=<docker_hub_name>
cd .gitlab/ci/docker
sudo ./update_all.sh 20201230
```
# ECP Continuous Integration
## OLCF Ascent testing machine
VTK-m provides CI builds that run at the OLCF Ascent testing cluster. OLCF
Ascent is a scaled down version of OLCF Summit which replicates the same
provisions of software and architecture found at OLCF Summit, this is very
useful for us since we are allowed to periodically and automatically branches of
VTK-m. This is a significant leap compared to our previous workflow in which we
would have someone to manually test at OLCF Summit every few months.
The ECP Gitlab continuous integration infrastructure differs from the Kitware
Gitlab CI infrastructure at the following points:
- Kitare Gitlab CI uses the `docker` executer as the _backend_ for its
`Gitlab-Runner` daemon whereas ECP Gitlab CI uses the Jacamar CI executer as
the _backend_ for the `Gitlab-Runner` daemon.
- ECP Gitlab VTK-m project is a mirror Gitlab project of the main Kitware Gitlab
VTK-m repository.
- The runners provided by the ECP Gitlab CI reside inside the OLCF Ascent
cluster.
Jacamar CI allows us to implicitly launch jobs using the HPC job scheduler LSF.
Jacamar-CI also connects the LSF job with the GitLab project which allows us to
control its state, monitor its output, and access its artifacts. Below is a brief
diagram describing the relations between the GitLab CI instance and the job.
![Jacamar CI with LSF](./batch_lsf.png)
Our Ascent Pipeline is composed of two stages:
1. The build stage, which builds VTK-m and runs in the batch nodes
2. The test stage, which runs VTK-m unit tests and runs at the compute nodes.
Due to the isolated environment in which LFS jobs run at Ascent, we are not able
to access to our `sccache` file server as we do in our other CI builds, thus,
for this very site we provide a local installation of `ccache`. This it turns
out to provided similar hit ratios as `sscache`, since we do not have any other
CI site that runs a _Power9_ architecture.
Lastly, builds and tests status are reported to our VTK-m CDashboard and are
displayed in the same groups as Kitware Gitlab's builds.
As for the flavor being currently tested at ECP Ascent is VTK-m with CUDA and
GCC8.
For a view of only ascent jobs refer to the following [link][cdash-ascent].
[cdash-ascent]: https://open.cdash.org/index.php?project=VTKM&filtercount=1&showfilters=1&field1=site&compare1=63&value1=ascent

BIN
docs/batch_lsf.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB