CI: Enable Ascent builds in MR

This commit is contained in:
Vicente Adolfo Bolea Sanchez 2022-10-25 15:41:12 -04:00
parent e4d23a4ae6
commit d60c370d1d
7 changed files with 179 additions and 66 deletions

@ -118,6 +118,12 @@
when: on_success
- when: never
.run_only_merge_requests: &run_only_merge_requests
rules:
- if: '$CI_MERGE_REQUEST_ID && $CI_PROJECT_PATH == "vtk/vtk-m"'
when: on_success
- when: never
.run_scheduled: &run_scheduled
rules:
- if: '$CI_PROJECT_PATH == "vtk/vtk-m" && $CI_COMMIT_TAG'
@ -132,7 +138,7 @@
when: on_success
- when: never
.run_ecp_ci: &run_ecp_ci
.run_ascent_ci: &run_ascent_ci
rules:
- if: '$CI_PROJECT_PATH == "ecpcitest/vtk-m"'
when: on_success

@ -1,86 +1,83 @@
# Ad-hoc build that runs in the ECP Hardware, concretely in OLCF Ascent.
.ascent_gcc_cuda:
variables:
CCACHE_BASEDIR: "/gpfs/wolf/"
CCACHE_DIR: "/gpfs/wolf/proj-shared/csc331/vtk-m/ci/ccache/"
# -isystem= is not affected by CCACHE_BASEDIR, thus we must ignore it
CCACHE_IGNOREOPTIONS: "-isystem=*"
CCACHE_NOHASHDIR: "true"
CC: "gcc"
CXX: "g++"
CMAKE_BUILD_TYPE: "RelWithDebInfo"
CMAKE_GENERATOR: "Ninja"
CUDAHOSTCXX: "g++"
CUSTOM_CI_BUILDS_DIR: "/gpfs/wolf/proj-shared/csc331/vtk-m/ci/runtime"
VTKM_SETTINGS: cuda+ascent+ccache
JOB_MODULES: >-
git-lfs
zstd
cuda/11.4.2
gcc/10.2.0
ninja
spectrum-mpi
lsf-tools
cmake
interruptible: true
.setup_env_ecpci: &setup_env_ecpci |
module purge
module load ${JOB_MODULES}
module list
export PATH="/gpfs/wolf/proj-shared/csc331/vtk-m/ci/utils:$PATH"
build:ascent_gcc_cuda:
stage: build
tags: [olcf, ascent, nobatch]
extends:
- .ascent_gcc_cuda
- .ascent_build
- .run_ecp_ci
- .cmake_build_artifacts
- .run_ascent_ci
before_script:
- *setup_env_ecpci
- ccache -z
- git remote add lfs https://gitlab.kitware.com/vtk/vtk-m.git
- git fetch lfs
- git-lfs install
- git-lfs pull lfs
script:
- cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake
- ctest -VV -S .gitlab/ci/ctest_configure.cmake
artifacts:
expire_in: 24 hours
when: always
paths:
- build/
timeout: 90 minutes
test:ascent_gcc_cuda:
stage: test
tags: [olcf, ascent, batch]
extends:
- .ascent_gcc_cuda
- .ascent_test
- .run_ecp_ci
- .run_ascent_ci
- .cmake_test_artifacts
needs:
- build:ascent_gcc_cuda
dependencies:
- build:ascent_gcc_cuda
.ascent_gcc_cuda:
variables:
CCACHE_BASEDIR: /gpfs/wolf/
CCACHE_DIR: "/gpfs/wolf/proj-shared/csc331/vtk-m/ci/ccache/"
# -isystem= is not affected by CCACHE_BASEDIR, thus we must ignore it
CCACHE_IGNOREOPTIONS: "-isystem=*"
CCACHE_NOHASHDIR: "true"
CMAKE_BUILD_TYPE: RelWithDebInfo
CMAKE_GENERATOR: Ninja
CUSTOM_CI_BUILDS_DIR: "/gpfs/wolf/proj-shared/csc331/vtk-m/ci/runtime"
CC: gcc
CXX: g++
CUDAHOSTCXX: g++
JOB_MODULES: git cmake ninja gcc/10.2.0 spectrum-mpi lsf-tools cuda/11.4.2
VTKM_SETTINGS: cuda+ascent+ccache
.ascent_build:
stage: build
variables:
CTEST_MAX_PARALLELISM: 4
SCHEDULER_PARAMETERS: -P CSC331 -W 2:00 -nnodes 1 -alloc_flags gpudefault
# We need this to skip ctest_submit from being run inside a jsrun job
GITLAB_CI_EMULATION: 1
before_script:
# Prep the environment
- module purge
- echo ${JOB_MODULES}
- module load git-lfs zstd ${JOB_MODULES}
- export PATH="/gpfs/wolf/proj-shared/csc331/vtk-m/ci/utils:$PATH"
- ccache -p
- ccache -z
- git remote add lfs https://gitlab.kitware.com/vtk/vtk-m.git
- git fetch lfs
- git-lfs install
- git-lfs pull lfs
# Start running the builds scripts
- cmake --version
- "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
- "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
- *setup_env_ecpci
script:
- "ctest -VV -S .gitlab/ci/ctest_build.cmake"
- CTEST_MAX_PARALLELISM=32 jsrun -n1 -a1 -g1 -c42 ctest -VV -S .gitlab/ci/ctest_build.cmake
- CTEST_MAX_PARALLELISM=4 jsrun -n1 -a1 -g1 -c8 ctest -VV -S .gitlab/ci/ctest_test.cmake
after_script:
- *setup_env_ecpci
- ccache -s
.ascent_test:
stage: test
variables:
SCHEDULER_PARAMETERS: -P CSC331 -W 1:00 -nnodes 1 -alloc_flags gpudefault
CTEST_MAX_PARALLELISM: 8
# Tests errors to address due to different env/arch in Ascent
# Refer to issue: https://gitlab.kitware.com/vtk/vtk-m/-/issues/652
CTEST_EXCLUSIONS: >-
before_script:
# Prep the environment
- module purge
- module load ${JOB_MODULES}
script:
- "jsrun -n1 -r1 -a1 -g1 -c7 ctest -VV -S .gitlab/ci/ctest_test.cmake || test_output=$?"
- ctest -VV -S .gitlab/ci/ctest_submit_build.cmake
- ctest -VV -S .gitlab/ci/ctest_submit_test.cmake
- $(exit $test_output)
timeout: 30 minutes

@ -0,0 +1,44 @@
#!/usr/bin/env python3
import json
import ssl
import sys
import urllib.request
class ecpci_url_reader:
def __init__(self, base_url, token):
self.base_url = base_url
self.token = token
def to_string(self, url):
opener = urllib.request.build_opener(
urllib.request.HTTPSHandler(
context=ssl._create_unverified_context()))
opener.addheaders = [('PRIVATE-TOKEN', token)]
return opener.open(base_url + url).read().decode('utf-8')
def to_json(self, url):
return json.loads(self.to_string(url))
base_url = sys.argv[1]
commit = sys.argv[2]
token = sys.argv[3]
handler = ecpci_url_reader(base_url, token)
commit_info = handler.to_json("/repository/commits/" + commit)
last_pipeline_id = str(commit_info['last_pipeline']['id'])
jobs = handler.to_json("/pipelines/" + last_pipeline_id + "/jobs")
build_job_id = str(jobs[1]['id'])
test_job_id = str(jobs[0]['id'])
print("ECPCITEST BUILD OUTPUT================================================")
print(handler.to_string("/jobs/" + build_job_id + "/trace"))
print("ECPCITEST BUILD END===================================================")
print("ECPCITEST TEST OUTPUT=================================================")
print(handler.to_string("/jobs/" + test_job_id + "/trace"))
print("ECPCITEST TEST OUTPUT END=============================================")

@ -0,0 +1,4 @@
#!/bin/bash -ex
git lfs uninstall
git -c http.sslVerify=false push -f "$1" "HEAD:refs/heads/${2}"

@ -0,0 +1,3 @@
#!/bin/bash -ex
git -c http.sslVerify=false push --delete "$1" "$2"

@ -0,0 +1,37 @@
#!/bin/bash -ex
declare -r POLL_INTERVAL_SECONDS="10"
function fetch_commit_status()
{
local -r url="$1"
local output
output=$(curl --insecure --silent "$url" | tr ',{}[]' '\n' | grep -Po -m1 '(?<=^"status":")\w+(?=")')
# No status means that the pipeline has not being created yet
[ -z "$output" ] && output="empty"
echo "$output"
}
function wait_commit_pipeline_status()
{
local -r base_url="$1"
local -r commit="$2"
local -r url="${base_url}/repository/commits/${commit}"
while true
do
local ret="$(fetch_commit_status "$url")"
case "$ret" in
success)
return 0 ;;
failed|canceled|skipped)
echo "ERROR: The pipeline exited with \`${ret}\` status" > /dev/stderr
return 1 ;;
esac
sleep "$POLL_INTERVAL_SECONDS"
done
}
wait_commit_pipeline_status "$1" "$2"

@ -103,3 +103,25 @@ test:ubuntu2004_hip_kokkos:
needs:
- build:ubuntu2004_hip_kokkos
timeout: 3 hours
# This is only for merge-requests
build:ascent:
stage: build
variables:
BRANCH_NAME: "mr${CI_MERGE_REQUEST_IID}-${CI_COMMIT_REF_NAME}"
ASCENT_REST_URL: "https://code.ornl.gov/api/v4/projects/7035"
ASCENT_GIT_URL: "https://vbolea:${ECPTEST_TOKEN}@code.ornl.gov/ecpcitest/vtk-m.git"
tags:
- vtkm
- docker
extends:
- .ubuntu2004
- .run_only_merge_requests
script:
- .gitlab/ci/config/ecpci-push-branch.sh "$ASCENT_GIT_URL" "$BRANCH_NAME"
- timeout 130m .gitlab/ci/config/ecpci-wait-commit-status.sh "$ASCENT_REST_URL" "$CI_COMMIT_SHA"
after_script:
- .gitlab/ci/config/ecpci-remove-branch.sh "$ASCENT_GIT_URL" "$BRANCH_NAME" || true
- .gitlab/ci/config/ecpci-fetch-commit-trace.py "$ASCENT_REST_URL" "$CI_COMMIT_SHA" "$ECPTEST_TOKEN"
timeout: 150 minutes
interruptible: true