Merge branch 'master' into alpine_sampling_2d

This commit is contained in:
Subhashis Hazarika 2021-03-24 14:08:10 -06:00
commit ed8d2fb356
1238 changed files with 74155 additions and 29609 deletions

@ -1,17 +1,19 @@
---
# This configuration requires clang-format 3.8 or higher.
# This configuration requires clang-format 9 or higher.
BasedOnStyle: Mozilla
AlignAfterOpenBracket: Align
AlignEscapedNewlines: true
AlignOperands: false
AlwaysBreakAfterReturnType: None
AllowAllParametersOfDeclarationOnNextLine: false
AlwaysBreakAfterDefinitionReturnType: None
BreakBeforeBraces: Allman
AlwaysBreakAfterReturnType: None
BinPackArguments: false
BinPackParameters: false
BreakBeforeBraces: Allman
ColumnLimit: 100
# FixNamespaceComments: true
MaxEmptyLinesToKeep: 4
Standard: Cpp11
# This requires clang-format 4.0 (at least).
#FixNamespaceComments: true
ReflowComments: false
SpaceAfterTemplateKeyword: true
Standard: Cpp11
...

2
.gitattributes vendored

@ -1,5 +1,5 @@
# Attributes used for formatting.
[attr]our-c-style whitespace=tab-in-indent format.clang-format
[attr]our-c-style whitespace=tab-in-indent format.clang-format=9
*.cxx our-c-style
*.h our-c-style

@ -1,125 +0,0 @@
.slurm_p9_cuda: &slurm_p9_cuda
tags:
- nmc
- slurm
- nmc-xxfe1-sched-001
- xx-fe1
variables:
NMC_FE1_SLURM_PARAMETERS: " -N1 -p ecp-p9-4v100 --extra-node-info=*:*:* -t 1:30:00 "
CC: "gcc"
CXX: "g++"
CUDAHOSTCXX: "g++"
before_script:
# We need gcc-4.8.5, which is the system default compiler but not a compiler
# listed under the module system.
#
# That means to get this to work properly we explicitly do not request
# any compiler.
- module load cuda cmake/3.14.5
.slurm_p9_opemp: &slurm_p9_opemp
tags:
- nmc
- slurm
- nmc-xxfe1-sched-001
- xx-fe1
variables:
NMC_FE1_SLURM_PARAMETERS: " -N1 -p ecp-p9-4v100 --extra-node-info=*:*:* -t 1:30:00 "
before_script:
- module load gcc/8.3.0 openmpi/3.1.4 cmake/3.14.5
.cmake_build_artifacts: &cmake_build_artifacts
artifacts:
expire_in: 24 hours
when: always
paths:
# The artifacts of the build.
- vtkm-build/bin/
- vtkm-build/include/
# CTest files.
# XXX(globbing): Can be simplified with support from
# https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
- vtkm-build/CTestCustom*.cmake
- vtkm-build/CTestTestfile.cmake
- vtkm-build/*/CTestTestfile.cmake
- vtkm-build/*/*/CTestTestfile.cmake
- vtkm-build/*/*/*/CTestTestfile.cmake
- vtkm-build/*/*/*/*/CTestTestfile.cmake
- vtkm-build/*/*/*/*/*/CTestTestfile.cmake
- vtkm-build/Testing/
# CDash files.
- vtkm-build/DartConfiguration.tcl
.cmake_build_p9_cuda: &cmake_build_p9_cuda
stage: build
script:
- srun env | grep SLURM_JOB_NAME
- mkdir vtkm-build
- pushd vtkm-build
- cmake -DCMAKE_BUILD_TYPE=Release -DVTKm_ENABLE_CUDA=ON -S ../
- cmake --build . -j20
- popd
.cmake_build_p9_openmp: &cmake_build_p9_openmp
stage: build
script:
- srun env | grep SLURM_JOB_NAME
- mkdir vtkm-build
- pushd vtkm-build
- cmake -DCMAKE_BUILD_TYPE=Release -DVTKm_ENABLE_OPENMP=ON -S ../
- cmake --build . -j20
- popd
.cmake_test_p9: &cmake_test_p9
stage: test
script:
- echo "running the test using artifacts of the build"
- pushd vtkm-build
# We need to exclude the following tests
# - CopyrightStatement
# - TestInstallSetup
# - SourceInInstall
# Which we can do by using an exclude regex
- ctest -E "Install|CopyrightStatement"
- popd
stages:
- build
- test
build:p9_openmp:
extends:
- .slurm_p9_opemp
- .cmake_build_artifacts
- .cmake_build_p9_openmp
test:p9_openmp:
extends:
- .slurm_p9_opemp
- .cmake_test_p9
dependencies:
- build:p9_openmp
needs:
- build:p9_openmp
build:p9_cuda:
extends:
- .slurm_p9_cuda
- .cmake_build_artifacts
- .cmake_build_p9_cuda
test:p9_cuda:
extends:
- .slurm_p9_cuda
- .cmake_test_p9
dependencies:
- build:p9_cuda
needs:
- build:p9_cuda

@ -3,7 +3,7 @@
#
# * .gitlab/ci/docker/centos7/cuda10.2/
# - cuda
# - gcc 4.8.5
# - gcc 7.3.1
# * .gitlab/ci/docker/centos8/base/
# - gcc 8.3.1
# - clang 8.0.1
@ -13,7 +13,7 @@
# - cuda
# - gcc 8.2.1
# * .gitlab/ci/docker/ubuntu1604/base/
# - gcc 4.8
# - gcc 5.4.0
# - clang 3.8
# - clang 5.0
# - tbb
@ -32,9 +32,11 @@
# - tbb
# - openmp
# - mpich2
# * .gitlab/ci/docker/ubuntu1804/cuda10.1/
# - hdf5
# * .gitlab/ci/docker/ubuntu1804/cuda11.1/
# - cuda
# - gcc 7.4
# - gcc 7
# - gcc 8
# - tbb
# - openmp
# - mpich2
@ -49,55 +51,64 @@
GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci
.centos7: &centos7
image: "kitware/vtkm:ci-centos7_cuda10.2-20200601"
image: "kitware/vtkm:ci-centos7_cuda10.2-20210128"
extends:
- .docker_image
.centos8: &centos8
image: "kitware/vtkm:ci-centos8-20200601"
image: "kitware/vtkm:ci-centos8-20201016"
extends:
- .docker_image
.rhel8: &rhel8
image: "kitware/vtkm:ci-rhel8_cuda10.2-20200601"
image: "kitware/vtkm:ci-rhel8_cuda10.2-20201016"
extends:
- .docker_image
.ubuntu1604: &ubuntu1604
image: "kitware/vtkm:ci-ubuntu1604-20200601"
image: "kitware/vtkm:ci-ubuntu1604-20201016"
extends:
- .docker_image
.ubuntu1604_cuda: &ubuntu1604_cuda
image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200601"
image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20201016"
extends:
- .docker_image
.ubuntu1804: &ubuntu1804
image: "kitware/vtkm:ci-ubuntu1804-20200601"
image: "kitware/vtkm:ci-ubuntu1804-20210107"
extends:
- .docker_image
.ubuntu1804_cuda: &ubuntu1804_cuda
image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200601"
image: "kitware/vtkm:ci-ubuntu1804_cuda11.1-20201016"
extends:
- .docker_image
.ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20201016"
extends:
- .docker_image
.ubuntu2004_doxygen: &ubuntu2004_doxygen
image: "kitware/vtkm:ci-doxygen-20200601"
image: "kitware/vtkm:ci-doxygen-20201016"
extends:
- .docker_image
.ubuntu2004_kokkos: &ubuntu2004_kokkos
image: "kitware/vtkm:ci-ubuntu2004_kokkos-20201016"
extends:
- .docker_image
.only-default: &only-default
only:
- master
- master@vtk/vtk-m
- tags@vtk/vtk-m
- merge_requests
- tags
.only-master: &only-master
only:
- master
- master@vtk/vtk-m
# General Longer Term Tasks:
@ -120,6 +131,7 @@ stages:
- export PATH=$PWD/.gitlab:$PATH
- SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
- sccache --show-stats
- .gitlab/ci/config/google_benchmarks.sh
- "cmake --version"
- "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
- "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
@ -163,6 +175,14 @@ stages:
#for running failed tests multiple times so failures
#due to system load are not reported
- "ctest-latest -VV -S .gitlab/ci/ctest_test.cmake"
artifacts:
expire_in: 24 hours
when: always
paths:
# The generated regression testing images
- build/*.png
- build/*.pnm
- build/*.pmm
.cmake_memcheck_linux: &cmake_memcheck_linux
stage: test
@ -170,6 +190,14 @@ stages:
interruptible: true
script:
- "ctest-latest -VV -S .gitlab/ci/ctest_memcheck.cmake"
artifacts:
expire_in: 24 hours
when: always
paths:
# The generated regression testing images
- build/*.png
- build/*.pnm
- build/*.pmm
include:
- local: '/.gitlab/ci/centos7.yml'
@ -178,4 +206,5 @@ include:
- local: '/.gitlab/ci/rhel8.yml'
- local: '/.gitlab/ci/ubuntu1604.yml'
- local: '/.gitlab/ci/ubuntu1804.yml'
- local: '/.gitlab/ci/ubuntu2004.yml'
- local: '/.gitlab/ci/windows10.yml'

@ -1,12 +1,13 @@
# Build on centos7 with CUDA and test on rhel8 and centos7
# gcc 4.8
build:centos7_gcc48:
# gcc 7.3.1
build:centos7_gcc73:
tags:
- build
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .centos7
@ -15,33 +16,33 @@ build:centos7_gcc48:
variables:
CMAKE_BUILD_TYPE: RelWithDebInfo
CMAKE_GENERATOR: "Unix Makefiles"
VTKM_SETTINGS: "cuda+turing+32bit_ids"
VTKM_SETTINGS: "cuda+turing+32bit_ids+no_rendering"
test:centos7_gcc48:
test:centos7_gcc73:
tags:
- test
- cuda-rt
- turing
- vtkm
- docker
- linux
- cuda-rt
- turing
extends:
- .centos7
- .cmake_test_linux
- .only-default
dependencies:
- build:centos7_gcc48
- build:centos7_gcc73
needs:
- build:centos7_gcc48
- build:centos7_gcc73
test:rhel8_test_centos7:
tags:
- test
- cuda-rt
- turing
- vtkm
- docker
- linux
- cuda-rt
- turing
extends:
- .rhel8
- .cmake_test_linux
@ -49,6 +50,6 @@ test:rhel8_test_centos7:
variables:
CTEST_EXCLUSIONS: "built_against_test_install"
dependencies:
- build:centos7_gcc48
- build:centos7_gcc73
needs:
- build:centos7_gcc48
- build:centos7_gcc73

@ -30,6 +30,7 @@ test:centos8_sanitizer:
variables:
OMP_NUM_THREADS: 4
CTEST_MEMORYCHECK_TYPE: LeakSanitizer
CTEST_EXCLUSIONS: "RegressionTest.*"
dependencies:
- build:centos8_sanitizer
needs:

@ -0,0 +1,27 @@
#!/bin/bash
set -xe
readonly version="v1.5.2"
readonly tarball="$version.tar.gz"
readonly url="https://github.com/google/benchmark/archive/$tarball"
readonly sha256sum="dccbdab796baa1043f04982147e67bb6e118fe610da2c65f88912d73987e700c"
readonly install_dir="$HOME/gbench"
if ! [[ "$VTKM_SETTINGS" =~ "benchmarks" ]]; then
exit 0
fi
cd "$HOME"
echo "$sha256sum $tarball" > gbenchs.sha256sum
curl --insecure -OL "$url"
sha256sum --check gbenchs.sha256sum
tar xf "$tarball"
mkdir build
mkdir "$install_dir"
cmake -GNinja -S benchmark* -B build -DBENCHMARK_DOWNLOAD_DEPENDENCIES=ON
cmake --build build
cmake --install build --prefix "$install_dir"

@ -10,10 +10,16 @@
##
##=============================================================================
# Default to Release builds.
if ("$ENV{CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "")
else ()
set(CMAKE_BUILD_TYPE "$ENV{CMAKE_BUILD_TYPE}" CACHE STRING "")
endif ()
string(REPLACE "+" ";" options "$ENV{VTKM_SETTINGS}")
foreach(option IN LISTS options)
if(static STREQUAL option)
set(BUILD_SHARED_LIBS "OFF" CACHE STRING "")
@ -23,6 +29,10 @@ foreach(option IN LISTS options)
elseif(vtk_types STREQUAL option)
set(VTKm_USE_DEFAULT_TYPES_FOR_VTK "ON" CACHE STRING "")
elseif(ascent_types STREQUAL option)
# Note: ascent_types also requires 32bit_ids and 64bit_floats
set(VTKm_USE_DEFAULT_TYPES_FOR_ASCENT "ON" CACHE STRING "")
elseif(32bit_ids STREQUAL option)
set(VTKm_USE_64BIT_IDS "OFF" CACHE STRING "")
@ -37,6 +47,15 @@ foreach(option IN LISTS options)
set(VTKm_ENABLE_SANITIZER "ON" CACHE STRING "")
list(APPEND sanitizers "leak")
elseif(rendering STREQUAL option)
set(VTKm_ENABLE_RENDERING "ON" CACHE STRING "")
elseif(no_rendering STREQUAL option)
set(VTKm_ENABLE_RENDERING "OFF" CACHE STRING "")
elseif(no_virtual STREQUAL option)
set(VTKm_NO_DEPRECATED_VIRTUAL "ON" CACHE STRING "")
elseif(examples STREQUAL option)
set(VTKm_ENABLE_EXAMPLES "ON" CACHE STRING "")
@ -45,6 +64,7 @@ foreach(option IN LISTS options)
elseif(benchmarks STREQUAL option)
set(VTKm_ENABLE_BENCHMARKS "ON" CACHE STRING "")
set(ENV{CMAKE_PREFIX_PATH} "$ENV{HOME}/gbench")
elseif(mpi STREQUAL option)
set(VTKm_ENABLE_MPI "ON" CACHE STRING "")
@ -58,6 +78,12 @@ foreach(option IN LISTS options)
elseif(cuda STREQUAL option)
set(VTKm_ENABLE_CUDA "ON" CACHE STRING "")
elseif(kokkos STREQUAL option)
set(VTKm_ENABLE_KOKKOS "ON" CACHE STRING "")
elseif(hdf5 STREQUAL option)
set(VTKm_ENABLE_HDF5_IO "ON" CACHE STRING "")
elseif(maxwell STREQUAL option)
set(VTKm_CUDA_Architecture "maxwell" CACHE STRING "")
@ -82,7 +108,10 @@ find_program(SCCACHE_COMMAND NAMES sccache)
if(SCCACHE_COMMAND)
set(CMAKE_C_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
if(VTKm_ENABLE_CUDA)
# Use VTKm_CUDA_Architecture to determine if we need CUDA sccache setup
# since this will also capture when kokkos is being used with CUDA backing
if(DEFINED VTKm_CUDA_Architecture)
set(CMAKE_CUDA_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
endif()
endif()

@ -10,7 +10,7 @@ readonly tarball="$filename.tar.gz"
cd .gitlab
echo "$sha256sum $tarball" > sccache.sha256sum
curl -OL "https://github.com/robertmaynard/sccache/releases/download/$version/$tarball"
curl --insecure -OL "https://github.com/robertmaynard/sccache/releases/download/$version/$tarball"
sha256sum --check sccache.sha256sum
tar xf "$tarball"
#mv "$filename/sccache" .

@ -1,14 +0,0 @@
$tempFile = "$env:temp\vcvars.txt"
if ($env:CI_JOB_NAME -eq "build:windows_vs2019") {
cmd.exe /c "call `"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat`" && set > $tempFile"
} else {
cmd.exe /c "call `"C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat`" && set > $tempFile"
}
Get-Content "$tempFile" | Foreach-Object {
if ($_ -match "^(.*?)=(.*)$") {
Set-Content "env:\$($matches[1])" $matches[2]
}
}

@ -0,0 +1,9 @@
$erroractionpreference = "stop"
cmd /c "`"$env:VCVARSALL`" $VCVARSPLATFORM -vcvars_ver=$VCVARSVERSION & set" |
foreach {
if ($_ -match "=") {
$v = $_.split("=")
[Environment]::SetEnvironmentVariable($v[0], $v[1])
}
}

@ -33,7 +33,8 @@ if(NOT CTEST_MEMORYCHECK_SUPPRESSIONS_FILE)
endif()
set(test_exclusions
# placeholder for tests to exclude
# placeholder for tests to exclude provided by the env
$ENV{CTEST_EXCLUSIONS}
)
string(REPLACE ";" "|" test_exclusions "${test_exclusions}")

@ -1,9 +1,9 @@
FROM nvidia/cuda:10.2-devel-centos7
LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
RUN yum install cmake make gcc gcc-c++ -y
RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 centos-release-scl -y
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
RUN yum install git git-lfs -y
RUN yum install git git-lfs devtoolset-7-gcc-c++ -y
# Provide a consistent CMake path across all images
# Install CMake 3.13 as it is the minium for cuda builds
@ -20,4 +20,4 @@ RUN mkdir /opt/cmake-latest/ && \
rm cmake-3.17.3-Linux-x86_64.sh && \
ln -s /opt/cmake-latest/bin/ctest /opt/cmake-latest/bin/ctest-latest
ENV PATH "/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"
ENV PATH "/opt/rh/devtoolset-7/root/bin:/opt/cmake/bin:/opt/cmake-latest/bin:${PATH}"

@ -1,7 +1,7 @@
FROM nvidia/cuda:10.2-devel-ubi8
LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
RUN yum install make gcc gcc-c++ curl -y
RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 -y
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
RUN yum install git git-lfs -y

@ -12,12 +12,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
libmpich-dev \
libomp-dev \
libtbb-dev \
libhdf5-dev \
mpich \
ninja-build \
software-properties-common
# extra dependencies for charm machine
RUN add-apt-repository ppa:jonathonf/gcc-9.2
RUN add-apt-repository ppa:jonathonf/gcc
RUN apt-get update && apt-get install -y --no-install-recommends \
clang-8 \
g++-9 \

@ -1,10 +1,11 @@
FROM nvidia/cuda:10.1-devel-ubuntu18.04
FROM nvidia/cuda:11.1-devel-ubuntu18.04
LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
# Base dependencies for building VTK-m projects
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
g++ \
g++-8 \
clang-8 \
git \
git-lfs \
libmpich-dev \

@ -0,0 +1,47 @@
FROM nvidia/cuda:11.0-devel-ubuntu18.04
LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
# Base dependencies for building VTK-m projects
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
g++ \
git \
git-lfs \
ninja-build \
&& \
rm -rf /var/lib/apt/lists/*
# Need to run git-lfs install manually on ubuntu based images when using the
# system packaged version
RUN git-lfs install
# kokkos backend requires cmake 3.18
RUN mkdir /opt/cmake/ && \
curl -L https://github.com/Kitware/CMake/releases/download/v3.18.1/cmake-3.18.1-Linux-x86_64.sh > cmake-3.18.1-Linux-x86_64.sh && \
sh cmake-3.18.1-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
rm cmake-3.18.1-Linux-x86_64.sh && \
ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
ENV PATH "/opt/cmake/bin:${PATH}"
# Build and install Kokkos
RUN mkdir -p /opt/kokkos/build && \
cd /opt/kokkos/build && \
curl -L https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz > kokkos-3.1.01.tar.gz && \
tar -xf kokkos-3.1.01.tar.gz && \
mkdir bld && cd bld && \
CXX=/opt/kokkos/build/kokkos-3.1.01/bin/nvcc_wrapper \
cmake -B . -S ../kokkos-3.1.01 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=/opt/kokkos \
-DCMAKE_CXX_FLAGS=-fPIC \
-DCMAKE_CXX_STANDARD=14 \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_CONSTEXPR=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_CUDA_LDG_INTRINSIC=ON \
-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
-DKokkos_ENABLE_CUDA_UVM=ON \
-DKokkos_ARCH_TURING75=ON && \
cmake --build . -j 8 && \
cmake --install .

@ -0,0 +1,41 @@
FROM ubuntu:20.04
LABEL maintainer "Sujin Philip<sujin.philip@kitware.com>"
# Base dependencies for building VTK-m projects
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
cmake \
curl \
g++ \
git \
git-lfs \
libmpich-dev \
libomp-dev \
mpich \
ninja-build \
rsync \
ssh \
software-properties-common
# Need to run git-lfs install manually on ubuntu based images when using the
# system packaged version
RUN git-lfs install
# Provide CMake 3.17 so we can re-run tests easily
# This will be used when we run just the tests
RUN mkdir /opt/cmake/ && \
curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
rm cmake-3.17.3-Linux-x86_64.sh && \
ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
ENV PATH "${PATH}:/opt/cmake/bin"
# Build and install Kokkos
RUN mkdir -p /opt/kokkos/build && \
cd /opt/kokkos/build && \
curl -L https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz > kokkos-3.1.01.tar.gz && \
tar -xf kokkos-3.1.01.tar.gz && \
mkdir bld && cd bld && \
cmake -GNinja -DCMAKE_INSTALL_PREFIX=/opt/kokkos -DCMAKE_CXX_FLAGS=-fPIC -DKokkos_ENABLE_SERIAL=ON ../kokkos-3.1.01 &&\
ninja all && \
ninja install

@ -30,14 +30,22 @@ cd ubuntu1804/base
sudo docker build -t kitware/vtkm:ci-ubuntu1804-$date .
cd ../..
cd ubuntu1804/cuda10.1
sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda10.1-$date .
cd ubuntu1804/cuda11.1
sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11.1-$date .
cd ../..
cd ubuntu1804/kokkos-cuda
sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-$date .
cd ../..
cd ubuntu2004/doxygen/
sudo docker build -t kitware/vtkm:ci-doxygen-$date .
cd ../..
cd ubuntu2004/kokkos
sudo docker build -t kitware/vtkm:ci-ubuntu2004_kokkos-$date .
cd ../..
# sudo docker login --username=<docker_hub_name>
sudo docker push kitware/vtkm
sudo docker system prune

@ -25,10 +25,9 @@ doxygen:
- "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
- "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
script:
- eval `ssh-agent -s`
- ssh-add <(echo "$DOC_API_KEY_BASE64" | base64 --decode)
- doxygen build/docs/doxyfile
- rsync -tv --recursive --delete -e "ssh -o StrictHostKeyChecking=no" build/docs/doxygen/html/ vtkm.documentation
- chmod 400 $DOC_KEY_FILE
- rsync -tv --recursive --delete -e "ssh -i $DOC_KEY_FILE -o StrictHostKeyChecking=no" build/docs/doxygen/html/ kitware@public.kitware.com:vtkm_documentation/
variables:
CMAKE_BUILD_TYPE: Release
VTKM_SETTINGS: "tbb+openmp+mpi+shared+docs"

@ -7,6 +7,7 @@ build:ubuntu1604_gcc5:
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1604_cuda
@ -16,41 +17,24 @@ build:ubuntu1604_gcc5:
CC: "gcc-5"
CXX: "g++-5"
CMAKE_BUILD_TYPE: RelWithDebInfo
VTKM_SETTINGS: "cuda+pascal"
VTKM_SETTINGS: "cuda+pascal+no_virtual+ascent_types+32bit_ids+64bit_floats"
# Temporarily disabled as we don't have a pascal hw gitlab-runner
# test:ubuntu1604_gcc5:
# tags:
# - test
# - cuda-rt
# - pascal
# - vtkm
# - docker
# - linux
# extends:
# - .ubuntu1604_cuda
# - .cmake_test_linux
# - .only-default
# dependencies:
# - build:ubuntu1604_gcc5
# needs:
# - build:ubuntu1604_gcc5
# test:ubuntu1804_test_ubuntu1604_gcc5:
# tags:
# - test
# - cuda-rt
# - pascal
# - vtkm
# - docker
# - linux
# extends:
# - .ubuntu1804_cuda
# - .cmake_test_linux
# - .only-default
# dependencies:
# - build:ubuntu1604_gcc5
# needs:
# - build:ubuntu1604_gcc5
test:ubuntu1604_gcc5:
tags:
- test
- vtkm
- docker
- linux
- cuda-rt
- pascal
extends:
- .ubuntu1604_cuda
- .cmake_test_linux
- .only-default
dependencies:
- build:ubuntu1604_gcc5
needs:
- build:ubuntu1604_gcc5
# Build on ubuntu1704 with OpenMP + CUDA
# Runs only on nightlies
@ -60,6 +44,7 @@ build:ubuntu1604_gcc5_2:
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1604_cuda
@ -71,49 +56,24 @@ build:ubuntu1604_gcc5_2:
CMAKE_BUILD_TYPE: Release
VTKM_SETTINGS: "openmp+cuda+pascal+examples"
# Build on ubuntu1604 with mpi + tbb and test on ubuntu1604
# Uses gcc 4.8
# Uses OpenMPI
build:ubuntu1604_gcc48:
tags:
- build
- vtkm
- docker
- linux
extends:
- .ubuntu1604
- .cmake_build_linux
- .only-default
variables:
CC: "gcc-4.8"
CXX: "g++-4.8"
CMAKE_BUILD_TYPE: Release
#custom openmpi install location
CMAKE_PREFIX_PATH: "/opt/openmpi/"
VTKM_SETTINGS: "tbb+mpi+shared"
test:ubuntu1604_gcc48:
test:ubuntu1804_test_ubuntu1604_gcc5_2:
tags:
- test
- vtkm
- docker
- linux
- cuda-rt
- pascal
extends:
- .ubuntu1604
- .ubuntu1804_cuda
- .cmake_test_linux
- .only-default
- .only-master
variables:
#env flags to allow openmpi to run as root user
OMPI_ALLOW_RUN_AS_ROOT: 1
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
#mpi location so that `built_against_test_install` tests
#pass
CMAKE_PREFIX_PATH: "/opt/openmpi/"
CTEST_EXCLUSIONS: "built_against_test_install"
dependencies:
- build:ubuntu1604_gcc48
- build:ubuntu1604_gcc5_2
needs:
- build:ubuntu1604_gcc48
- build:ubuntu1604_gcc5_2
# Build on ubuntu1604 with tbb and test on ubuntu1604
# Uses clang 5

@ -16,7 +16,7 @@ build:ubuntu1804_gcc9:
CC: "gcc-9"
CXX: "g++-9"
CMAKE_BUILD_TYPE: Debug
VTKM_SETTINGS: "tbb+openmp+mpi+shared"
VTKM_SETTINGS: "benchmarks+tbb+openmp+mpi+shared+hdf5"
test:ubuntu1804_gcc9:
tags:
@ -38,7 +38,7 @@ test:ubuntu1804_gcc9:
- build:ubuntu1804_gcc9
# Build on ubuntu1804 with CUDA + MPI and test on ubuntu1804
# Uses gcc 7.4
# Uses gcc 7
# Uses MPICH2
build:ubuntu1804_gcc7:
tags:
@ -46,6 +46,7 @@ build:ubuntu1804_gcc7:
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1804_cuda
@ -54,16 +55,17 @@ build:ubuntu1804_gcc7:
variables:
CC: "gcc-7"
CXX: "g++-7"
VTKM_SETTINGS: "cuda+turing+mpi+64bit_floats"
CUDAHOSTCXX: "g++-7"
VTKM_SETTINGS: "benchmarks+cuda+turing+mpi+64bit_floats+no_virtual"
test:ubuntu1804_gcc7:
tags:
- test
- cuda-rt
- turing
- vtkm
- docker
- linux
- cuda-rt
- turing
extends:
- .ubuntu1804_cuda
- .cmake_test_linux
@ -74,42 +76,45 @@ test:ubuntu1804_gcc7:
- build:ubuntu1804_gcc7
# Build on ubuntu1804 with OpenMP and test on ubuntu1804
# Uses gcc 7.4
# Build on ubuntu1804 with CUDA+TBB and test on ubuntu1804
# Uses clang as CUDA host compiler
# Runs only on nightlies
build:ubuntu1804_gcc7_2:
build:ubuntu1804_clang_cuda:
tags:
- build
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1804
- .ubuntu1804_cuda
- .cmake_build_linux
- .only-master
- .only-default
# - .only-master
variables:
CC: "gcc-7"
CXX: "g++-7"
VTKM_SETTINGS: "openmp+shared+examples"
CC: "clang-8"
CXX: "clang++-8"
CUDAHOSTCXX: "clang++-8"
VTKM_SETTINGS: "cuda+pascal+tbb+static+examples"
test:ubuntu1804_gcc7_2:
test:ubuntu1804_clang_cuda:
tags:
- test
- vtkm
- docker
- linux
- cuda-rt
- pascal
extends:
- .ubuntu1804
- .ubuntu1804_cuda
- .cmake_test_linux
- .only-master
variables:
#Restrict OpenMP number of threads since multiple test stages
#execute on the same hardware concurrently
OMP_NUM_THREADS: 4
- .only-default
# - .only-master
dependencies:
- build:ubuntu1804_gcc7_2
- build:ubuntu1804_clang_cuda
needs:
- build:ubuntu1804_gcc7_2
- build:ubuntu1804_clang_cuda
# Build on ubuntu1804 with OpenMP and test on ubuntu1804
# Uses gcc 6.5
@ -179,3 +184,39 @@ test:ubuntu1804_clang8:
- build:ubuntu1804_clang8
needs:
- build:ubuntu1804_clang8
# Build on ubuntu1804 with kokkos and test on ubuntu1804
# Uses CUDA 11
build:ubuntu1804_kokkos:
tags:
- build
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1804_cuda_kokkos
- .cmake_build_linux
- .only-default
variables:
CMAKE_GENERATOR: "Ninja"
CMAKE_BUILD_TYPE: Release
VTKM_SETTINGS: "benchmarks+kokkos+turing+static+64bit_floats"
test:ubuntu1804_kokkos:
tags:
- test
- vtkm
- docker
- linux
- cuda-rt
- turing
extends:
- .ubuntu1804_cuda_kokkos
- .cmake_test_linux
- .only-default
dependencies:
- build:ubuntu1804_kokkos
needs:
- build:ubuntu1804_kokkos

28
.gitlab/ci/ubuntu2004.yml Normal file

@ -0,0 +1,28 @@
build:ubuntu2004_kokkos:
tags:
- build
- vtkm
- docker
- linux
extends:
- .ubuntu2004_kokkos
- .cmake_build_linux
- .only-default
variables:
CMAKE_BUILD_TYPE: RelWithDebInfo
VTKM_SETTINGS: "kokkos+shared+64bit_floats"
test:ubuntu2004_kokkos:
tags:
- test
- vtkm
- docker
- linux
extends:
- .ubuntu2004_kokkos
- .cmake_test_linux
- .only-default
dependencies:
- build:ubuntu2004_kokkos
needs:
- build:ubuntu2004_kokkos

@ -1,10 +1,27 @@
.windows_build:
variables:
# Note that shell runners only support runners with a single
# concurrency level. We can't use `$CI_CONCURRENCY_ID` because this may
# change between the build and test stages which CMake doesn't support.
# Even if we could, it could change if other runners on the machine
# could run at the same time, so we drop it.
GIT_CLONE_PATH: "$CI_BUILDS_DIR\\vtkm ci"
.windows_vs2019:
variables:
VCVARSALL: "${VS160COMNTOOLS}\\..\\..\\VC\\Auxiliary\\Build\\vcvarsall.bat"
VCVARSPLATFORM: "x64"
VCVARSVERSION: "14.25"
.cmake_build_windows: &cmake_build_windows
extends:
- .windows_build
- .windows_vs2019
stage: build
timeout: 2 hours
interruptible: true
before_script:
- .gitlab/ci/config/setup_vs_powershell.ps1
- Invoke-Expression -Command .gitlab/ci/config/vcvarsall.ps1
- "cmake --version"
- "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
- "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
@ -39,11 +56,14 @@
.cmake_test_windows: &cmake_test_windows
extends:
- .windows_build
- .windows_vs2019
stage: test
timeout: 50 minutes
interruptible: true
before_script:
- .gitlab/ci/config/setup_vs_powershell.ps1
- Invoke-Expression -Command .gitlab/ci/config/vcvarsall.ps1
script:
#Need to use our custom ctest-latest symlink
#This will allow us to use 3.17+ which has support
@ -55,11 +75,13 @@
# Will have CUDA 10.2 once build issues are resolved
build:windows_vs2019:
tags:
- vtkm # Since this is a bare runner, pin to a project.
- nonconcurrent
- build
- vtkm
- windows
- vs2019
- shell
- vs2019
- msvc-19.25
- large-memory
extends:
- .cmake_build_windows
@ -73,10 +95,13 @@ build:windows_vs2019:
test:windows_vs2019:
tags:
- vtkm # Since this is a bare runner, pin to a project.
- nonconcurrent
- test
- vtkm
- windows
- shell
- vs2019
- msvc-19.25
- cuda-rt
- turing
extends:

@ -0,0 +1,23 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
file(GLOB cmake_version_backports
LIST_DIRECTORIES true
RELATIVE "${CMAKE_CURRENT_LIST_DIR}/patches"
"${CMAKE_CURRENT_LIST_DIR}/patches/*")
foreach (cmake_version_backport IN LISTS cmake_version_backports)
if (NOT IS_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/patches/${cmake_version_backport}")
continue ()
endif ()
if (CMAKE_VERSION VERSION_LESS "${cmake_version_backport}")
list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_LIST_DIR}/patches/${cmake_version_backport}")
endif ()
endforeach ()

@ -77,7 +77,7 @@ endif()
set(vec_levels none native)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
#for now we presume gcc >= 4.8
#for now we presume gcc >= 5.4
list(APPEND vec_levels avx avx2)
#common flags for the avx and avx2 instructions for the gcc compiler

@ -22,6 +22,8 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(VTKM_COMPILER_IS_CLANG 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(VTKM_COMPILER_IS_GNU 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "XLClang")
set(VTKM_COMPILER_IS_XL 1)
endif()
#-----------------------------------------------------------------------------
@ -51,7 +53,7 @@ if(VTKM_COMPILER_IS_MSVC)
if(TARGET vtkm::cuda)
target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler="/Gy">)
endif()
elseif(NOT VTKM_COMPILER_IS_PGI) #can't find an equivalant PGI flag
elseif(NOT (VTKM_COMPILER_IS_PGI OR VTKM_COMPILER_IS_XL)) #can't find an equivalant PGI/XL flag
target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-ffunction-sections>)
if(TARGET vtkm::cuda)
target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-ffunction-sections>)
@ -122,8 +124,15 @@ elseif(VTKM_COMPILER_IS_ICC)
target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-wd1478 -wd13379>)
elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common)
set(cuda_flags -Xcompiler=-Wall,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-local-typedefs,-Wno-unused-function,-Wcast-align,-Wchar-subscripts,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-Wunused,-fno-common)
set(cxx_flags -Wall -Wcast-align -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common -Wno-unused-function)
set(cuda_flags -Xcompiler=-Wall,-Wcast-align,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-fno-common,-Wunused,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-function)
#Clang does not support the -Wchar-subscripts flag for warning if an array
#subscript has a char type.
if (VTKM_COMPILER_IS_GNU)
list(APPEND cxx_flags -Wchar-subscripts)
set(cuda_flags "${cuda_flags},-Wchar-subscripts")
endif()
#Only add float-conversion warnings for gcc as the integer warnigns in GCC
#include the implicit casting of all types smaller than int to ints.
@ -161,17 +170,21 @@ elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
endif()
endif()
#common warnings for all platforms when building cuda
if(TARGET vtkm::cuda)
function(setup_cuda_flags)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
#nvcc 9 introduced specific controls to disable the stack size warning
#otherwise we let the warning occur. We have to set this in CMAKE_CUDA_FLAGS
#as it is passed to the device link step, unlike compile_options
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink=--suppress-stack-size-warning")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink=--suppress-stack-size-warning" PARENT_SCOPE)
endif()
set(display_error_nums -Xcudafe=--display_error_number)
target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:${display_error_nums}>)
endfunction()
#common warnings for all platforms when building cuda
if ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
setup_cuda_flags()
endif()
if(NOT VTKm_INSTALL_ONLY_LIBRARIES)

@ -39,6 +39,7 @@
# VTKm_ENABLE_CUDA Will be enabled if VTK-m was built with CUDA support
# VTKm_ENABLE_TBB Will be enabled if VTK-m was built with TBB support
# VTKm_ENABLE_OPENMP Will be enabled if VTK-m was built with OpenMP support
# VTKm_ENABLE_KOKKOS Will be enabled if VTK-m was built with Kokkos support
# VTKm_ENABLE_LOGGING Will be enabled if VTK-m was built with logging support
# VTKm_ENABLE_MPI Will be enabled if VTK-m was built with MPI support
# VTKm_ENABLE_RENDERING Will be enabled if VTK-m was built with rendering support
@ -67,8 +68,9 @@ set(VTKm_VERSION "@VTKm_VERSION@")
set(VTKm_BUILD_SHARED_LIBS "@VTKm_BUILD_SHARED_LIBS@")
set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@")
set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
set(VTKm_ENABLE_KOKKOS "@VTKm_ENABLE_KOKKOS@")
set(VTKm_ENABLE_OPENMP "@VTKm_ENABLE_OPENMP@")
set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
set(VTKm_ENABLE_LOGGING "@VTKm_ENABLE_LOGGING@")
set(VTKm_ENABLE_RENDERING "@VTKm_ENABLE_RENDERING@")
set(VTKm_ENABLE_GL_CONTEXT "@VTKm_ENABLE_GL_CONTEXT@")
@ -101,6 +103,12 @@ endif()
if(VTKm_ENABLE_CUDA AND VTKM_FROM_INSTALL_DIR)
set_target_properties(vtkm::cuda PROPERTIES cuda_architecture_flags "@VTKm_CUDA_Architecture_Flags@")
set_target_properties(vtkm::cuda PROPERTIES requires_static_builds TRUE)
# If VTK-m is built with 3.18+ and the consumer is < 3.18 we need to drop
# these properties as they break the VTK-m cuda flag logic
if(CMAKE_VERSION VERSION_LESS 3.18)
set_target_properties(vtkm::cuda PROPERTIES INTERFACE_LINK_OPTIONS "")
endif()
endif()
# VTKm requires some CMake Find modules not included with CMake, so

@ -127,10 +127,13 @@ if(VTKm_ENABLE_CUDA)
requires_static_builds TRUE
)
target_compile_options(vtkm_cuda INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
set_target_properties(vtkm_cuda PROPERTIES
INTERFACE_COMPILE_OPTIONS $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND
CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
# CUDA 11+ deprecated C++11 support
target_compile_features(vtkm_cuda INTERFACE cxx_std_14)
endif()
# add the -gencode flags so that all cuda code
# way compiled properly
@ -164,7 +167,10 @@ if(VTKm_ENABLE_CUDA)
# 6 - volta
# - Uses: --generate-code=arch=compute_70,code=sm_70
# 7 - turing
# - Uses: --generate-code=arch=compute_75code=sm_75
# - Uses: --generate-code=arch=compute_75,code=sm_75
# 8 - ampere
# - Uses: --generate-code=arch=compute_80,code=sm_80
# - Uses: --generate-code=arch=compute_86,code=sm_86
# 8 - all
# - Uses: --generate-code=arch=compute_30,code=sm_30
# - Uses: --generate-code=arch=compute_35,code=sm_35
@ -172,12 +178,14 @@ if(VTKm_ENABLE_CUDA)
# - Uses: --generate-code=arch=compute_60,code=sm_60
# - Uses: --generate-code=arch=compute_70,code=sm_70
# - Uses: --generate-code=arch=compute_75,code=sm_75
# - Uses: --generate-code=arch=compute_80,code=sm_80
# - Uses: --generate-code=arch=compute_86,code=sm_86
# 8 - none
#
#specify the property
set(VTKm_CUDA_Architecture "native" CACHE STRING "Which GPU Architecture(s) to compile for")
set_property(CACHE VTKm_CUDA_Architecture PROPERTY STRINGS native fermi kepler maxwell pascal volta turing all none)
set_property(CACHE VTKm_CUDA_Architecture PROPERTY STRINGS native fermi kepler maxwell pascal volta turing ampere all none)
#detect what the property is set too
if(VTKm_CUDA_Architecture STREQUAL "native")
@ -231,23 +239,124 @@ if(VTKm_ENABLE_CUDA)
set(arch_flags --generate-code=arch=compute_70,code=sm_70)
elseif(VTKm_CUDA_Architecture STREQUAL "turing")
set(arch_flags --generate-code=arch=compute_75,code=sm_75)
elseif(VTKm_CUDA_Architecture STREQUAL "ampere")
set(arch_flags --generate-code=arch=compute_80,code=sm_80)
set(arch_flags --generate-code=arch=compute_86,code=sm_86)
elseif(VTKm_CUDA_Architecture STREQUAL "all")
set(arch_flags --generate-code=arch=compute_30,code=sm_30
--generate-code=arch=compute_35,code=sm_35
--generate-code=arch=compute_50,code=sm_50
--generate-code=arch=compute_60,code=sm_60
--generate-code=arch=compute_70,code=sm_70
--generate-code=arch=compute_75,code=sm_75)
--generate-code=arch=compute_75,code=sm_75
--generate-code=arch=compute_80,code=sm_80
--generate-code=arch=compute_86,code=sm_86)
endif()
string(REPLACE ";" " " arch_flags "${arch_flags}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
#We propagate cuda flags via target* options so that they
#export cleanly
set(CMAKE_CUDA_ARCHITECTURES OFF)
target_compile_options(vtkm_cuda INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:${arch_flags}>)
target_link_options(vtkm_cuda INTERFACE $<DEVICE_LINK:${arch_flags}>)
else()
# Before 3.18 we had to use CMAKE_CUDA_FLAGS as we had no way
# to propagate flags to the device link step
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
endif()
# This needs to be lower-case for the property to be properly exported
# CMake 3.15 we can add `cuda_architecture_flags` to the EXPORT_PROPERTIES
# target property to have this automatically exported for us
set_target_properties(vtkm_cuda PROPERTIES cuda_architecture_flags "${arch_flags}")
set(VTKm_CUDA_Architecture_Flags "${arch_flags}")
set_target_properties(vtkm_cuda PROPERTIES cuda_architecture_flags "${arch_flags}")
unset(arch_flags)
endif()
endif()
#-----------------------------------------------------------------------------
# Kokkos with its Cuda backend enabled, expects everything to be compiled using its
# `nvcc-wrapper` as the CXX compiler. As the name suggests, nvcc-wrapper is a wrapper around
# Cuda's nvcc compiler. Kokkos targets have all of the flags meant for the nvcc compiler set as the
# CXX compiler flags. This function changes all such flags to be CUDA flags so that we can use
# CMake and vtk-m's existing infrastructure to compile for Cuda and Host separately. Without this
# all of the files will be compiled using nvcc which can be very time consuming. It can also have
# issues with calling host functions from device functions when compiling code for other backends.
function(kokkos_fix_compile_options)
set(targets Kokkos::kokkos)
set(seen_targets)
set(cuda_arch)
while(targets)
list(GET targets 0 target_name)
list(REMOVE_AT targets 0)
get_target_property(link_libraries ${target_name} INTERFACE_LINK_LIBRARIES)
foreach(lib_target IN LISTS link_libraries)
if (TARGET ${lib_target})
if (lib_target IN_LIST seen_targets)
continue()
endif()
list(APPEND seen_targets ${lib_target})
list(APPEND targets ${lib_target})
get_target_property(compile_options ${lib_target} INTERFACE_COMPILE_OPTIONS)
if (compile_options)
string(REGEX MATCH "[$]<[$]<COMPILE_LANGUAGE:CXX>:-Xcompiler;.*>" cxx_compile_options "${compile_options}")
string(REGEX MATCH "-arch=sm_[0-9][0-9]" cuda_arch "${compile_options}")
string(REPLACE "-Xcompiler;" "" cxx_compile_options "${cxx_compile_options}")
list(TRANSFORM compile_options REPLACE "--relocatable-device-code=true" "") #We use CMake for this flag
list(TRANSFORM compile_options REPLACE "COMPILE_LANGUAGE:CXX" "COMPILE_LANGUAGE:CUDA")
list(APPEND compile_options "${cxx_compile_options}")
set_property(TARGET ${lib_target} PROPERTY INTERFACE_COMPILE_OPTIONS ${compile_options})
endif()
set_property(TARGET ${lib_target} PROPERTY INTERFACE_LINK_OPTIONS "")
endif()
endforeach()
endwhile()
set_property(TARGET vtkm::kokkos PROPERTY INTERFACE_LINK_OPTIONS "$<DEVICE_LINK:${cuda_arch}>")
if (OPENMP IN_LIST Kokkos_DEVICES)
set_property(TARGET vtkm::kokkos PROPERTY INTERFACE_LINK_OPTIONS "$<HOST_LINK:-fopenmp>")
endif()
endfunction()
if(VTKm_ENABLE_KOKKOS AND NOT TARGET vtkm::kokkos)
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
find_package(Kokkos REQUIRED)
if (CUDA IN_LIST Kokkos_DEVICES)
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
enable_language(CUDA)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND
CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.0" AND
CMAKE_BUILD_TYPE STREQUAL "Release")
message(WARNING "There is a known issue with Cuda 10 and -O3 optimization. Switching to -O2. Please refer to issue #555.")
string(REPLACE "-O3" "-O2" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
string(REPLACE "-O3" "-O2" CMAKE_CUDA_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
endif()
string(REGEX MATCH "[0-9][0-9]$" cuda_arch ${Kokkos_ARCH})
set(CMAKE_CUDA_ARCHITECTURES ${cuda_arch})
message(STATUS "Detected Cuda arch from Kokkos: ${cuda_arch}")
add_library(vtkm::kokkos_cuda INTERFACE IMPORTED GLOBAL)
elseif(HIP IN_LIST Kokkos_DEVICES)
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
enable_language(HIP)
add_library(vtkm::kokkos_hip INTERFACE IMPORTED GLOBAL)
set_property(TARGET Kokkos::kokkoscore PROPERTY INTERFACE_COMPILE_OPTIONS "")
set_property(TARGET Kokkos::kokkoscore PROPERTY INTERFACE_LINK_OPTIONS "")
endif()
add_library(vtkm::kokkos INTERFACE IMPORTED GLOBAL)
set_target_properties(vtkm::kokkos PROPERTIES INTERFACE_LINK_LIBRARIES "Kokkos::kokkos")
if (TARGET vtkm::kokkos_cuda)
kokkos_fix_compile_options()
endif()
endif()

@ -1,24 +0,0 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
if(CMAKE_VERSION VERSION_LESS 3.15)
#While CMake 3.10 introduced the new MPI module.
#Fixes related to MPI+CUDA that VTK-m needs are
#only found in CMake 3.15+.
find_package(MPI REQUIRED MODULE)
else()
#clunky but we need to make sure we use the upstream module if it exists
set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH "")
find_package(MPI REQUIRED MODULE)
set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
endif()
endif()

@ -61,6 +61,20 @@ function(vtkm_find_gl)
if(DO_GLUT_FIND AND NOT TARGET GLUT::GLUT)
find_package(GLUT ${GLUT_REQUIRED} ${QUIETLY})
if(APPLE AND CMAKE_VERSION VERSION_LESS 3.19.2)
get_target_property(lib_path GLUT::GLUT IMPORTED_LOCATION)
if(EXISTS "${lib_path}.tbd")
set_target_properties(GLUT::GLUT PROPERTIES
IMPORTED_LOCATION "${lib_path}.tbd")
endif()
get_target_property(lib_path GLUT::Cocoa IMPORTED_LOCATION)
if(EXISTS "${lib_path}.tbd")
set_target_properties(GLUT::Cocoa PROPERTIES
IMPORTED_LOCATION "${lib_path}.tbd")
endif()
endif()
endif()
endfunction()

@ -10,9 +10,13 @@
include(CMakeParseArguments)
include(VTKmCMakeBackports)
include(VTKmDeviceAdapters)
include(VTKmCPUVectorization)
include(VTKmMPI)
if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
find_package(MPI REQUIRED MODULE)
endif()
#-----------------------------------------------------------------------------
# INTERNAL FUNCTIONS
@ -29,7 +33,7 @@ function(vtkm_get_kit_name kitvar)
# Optional second argument to get dir_prefix.
if (${ARGC} GREATER 1)
set(${ARGV1} "${dir_prefix}" PARENT_SCOPE)
endif (${ARGC} GREATER 1)
endif ()
endfunction(vtkm_get_kit_name)
#-----------------------------------------------------------------------------
@ -62,7 +66,7 @@ function(vtkm_generate_export_header lib_name)
# Now generate a header that holds the macros needed to easily export
# template classes. This
string(TOUPPER ${kit_name} BASE_NAME_UPPER)
string(TOUPPER ${lib_name} BASE_NAME_UPPER)
set(EXPORT_MACRO_NAME "${BASE_NAME_UPPER}")
set(EXPORT_IS_BUILT_STATIC 0)
@ -77,17 +81,17 @@ function(vtkm_generate_export_header lib_name)
if(NOT EXPORT_IMPORT_CONDITION)
#set EXPORT_IMPORT_CONDITION to what the DEFINE_SYMBOL would be when
#building shared
set(EXPORT_IMPORT_CONDITION ${kit_name}_EXPORTS)
set(EXPORT_IMPORT_CONDITION ${lib_name}_EXPORTS)
endif()
configure_file(
${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
${VTKm_BINARY_DIR}/include/${dir_prefix}/${kit_name}_export.h
${VTKm_BINARY_DIR}/include/${dir_prefix}/${lib_name}_export.h
@ONLY)
if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
install(FILES ${VTKm_BINARY_DIR}/include/${dir_prefix}/${kit_name}_export.h
install(FILES ${VTKm_BINARY_DIR}/include/${dir_prefix}/${lib_name}_export.h
DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/${dir_prefix}
)
endif()
@ -146,9 +150,14 @@ endfunction()
# Pass to consumers extra compile flags they need to add to CMAKE_CUDA_FLAGS
# to have CUDA compatibility.
#
# This is required as currently the -sm/-gencode flags when specified inside
# COMPILE_OPTIONS / target_compile_options are not propagated to the device
# linker. Instead they must be specified in CMAKE_CUDA_FLAGS
# If VTK-m was built with CMake 3.18+ and you are using CMake 3.18+ and have
# a cmake_minimum_required of 3.18 or have set policy CMP0105 to new, this will
# return an empty string as the `vtkm::cuda` target will correctly propagate
# all the necessary flags.
#
# This is required for CMake < 3.18 as they don't support the `$<DEVICE_LINK>`
# generator expression for `target_link_options`. Instead they need to be
# specified in CMAKE_CUDA_FLAGS
#
#
# add_library(lib_that_uses_vtkm ...)
@ -156,7 +165,18 @@ endfunction()
# target_link_libraries(lib_that_uses_vtkm PRIVATE vtkm_filter)
#
function(vtkm_get_cuda_flags settings_var)
if(TARGET vtkm::cuda)
if(POLICY CMP0105)
cmake_policy(GET CMP0105 does_device_link)
get_property(arch_flags
TARGET vtkm::cuda
PROPERTY INTERFACE_LINK_OPTIONS)
if(arch_flags AND CMP0105 STREQUAL "NEW")
return()
endif()
endif()
get_property(arch_flags
TARGET vtkm::cuda
PROPERTY cuda_architecture_flags)
@ -232,8 +252,14 @@ endfunction()
#
#
# MODIFY_CUDA_FLAGS: If enabled will add the required -arch=<ver> flags
# that VTK-m was compiled with. If you have multiple libraries that use
# VTK-m calling `vtkm_add_target_information` multiple times with
# that VTK-m was compiled with.
#
# If VTK-m was built with CMake 3.18+ and you are using CMake 3.18+ and have
# a cmake_minimum_required of 3.18 or have set policy CMP0105 to new, this will
# return an empty string as the `vtkm::cuda` target will correctly propagate
# all the necessary flags.
#
# Note: calling `vtkm_add_target_information` multiple times with
# `MODIFY_CUDA_FLAGS` will cause duplicate compiler flags. To resolve this issue
# you can; pass all targets and sources to a single `vtkm_add_target_information`
# call, have the first one use `MODIFY_CUDA_FLAGS`, or use the provided
@ -275,10 +301,11 @@ function(vtkm_add_target_information uses_vtkm_target)
${ARGN}
)
if(VTKm_TI_MODIFY_CUDA_FLAGS)
vtkm_get_cuda_flags(CMAKE_CUDA_FLAGS)
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} PARENT_SCOPE)
vtkm_get_cuda_flags(cuda_flags)
if(cuda_flags)
set(CMAKE_CUDA_FLAGS ${cuda_flags} PARENT_SCOPE)
endif()
endif()
set(targets ${uses_vtkm_target})
@ -291,6 +318,8 @@ function(vtkm_add_target_information uses_vtkm_target)
# set the required target properties
set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
# CUDA_ARCHITECTURES added in CMake 3.18
set_target_properties(${targets} PROPERTIES CUDA_ARCHITECTURES OFF)
if(VTKm_TI_DROP_UNUSED_SYMBOLS)
foreach(target IN LISTS targets)
@ -298,6 +327,12 @@ function(vtkm_add_target_information uses_vtkm_target)
endforeach()
endif()
if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "CUDA")
elseif(TARGET vtkm::kokkos_hip)
set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "HIP")
endif()
# Validate that following:
# - We are building with CUDA enabled.
# - We are building a VTK-m library or a library that wants cross library
@ -305,11 +340,15 @@ function(vtkm_add_target_information uses_vtkm_target)
#
# This is required as CUDA currently doesn't support device side calls across
# dynamic library boundaries.
if(TARGET vtkm::cuda)
set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "CUDA")
if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
foreach(target IN LISTS targets)
get_target_property(lib_type ${target} TYPE)
get_target_property(requires_static vtkm::cuda requires_static_builds)
if (TARGET vtkm::cuda)
get_target_property(requires_static vtkm::cuda requires_static_builds)
endif()
if (TARGET vtkm::kokkos)
get_target_property(requires_static vtkm::kokkos requires_static_builds)
endif()
if(requires_static AND ${lib_type} STREQUAL "SHARED_LIBRARY" AND VTKm_TI_EXTENDS_VTKM)
#We provide different error messages based on if we are building VTK-m

@ -0,0 +1,18 @@
##=============================================================================
##
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
##=============================================================================
# This module is already included in new versions of CMake
if(CMAKE_VERSION VERSION_LESS 3.15)
include(${CMAKE_CURRENT_LIST_DIR}/3.15/FindMPI.cmake)
else()
include(${CMAKE_ROOT}/Modules/FindMPI.cmake)
endif()

7
CMake/patches/README.md Normal file

@ -0,0 +1,7 @@
# CMake backports
This directory contains backports from newer CMake versions to help support
actually using older CMake versions for building VTK-m. The directory name is the
minimum version of CMake for which the contained files are no longer necessary.
For example, the files under the `3.15` directory are not needed for 3.15 or
3.16, but are for 3.14.

@ -37,11 +37,19 @@ if(NOT GENERATED_FILE)
return()
endif()
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
RESULT_VARIABLE pyexpander_result
OUTPUT_VARIABLE pyexpander_output
if(MSVC)
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
RESULT_VARIABLE pyexpander_result
OUTPUT_VARIABLE pyexpander_output
)
else()
execute_process(
COMMAND ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
RESULT_VARIABLE pyexpander_result
OUTPUT_VARIABLE pyexpander_output
)
endif()
if(pyexpander_result)
# If pyexpander returned non-zero, it failed.

@ -18,6 +18,7 @@
# -DVTKm_INSTALL_INCLUDE_DIR=<VTKm_INSTALL_INCLUDE_DIR>
# -DVTKm_ENABLE_RENDERING=<VTKm_ENABLE_RENDERING>
# -DVTKm_ENABLE_LOGGING=<VTKm_ENABLE_LOGGING>
# -DVTKm_ENABLE_HDF5_IO=<VTKm_ENABLE_HDF5_IO>
# -P <VTKm_SOURCE_DIR>/CMake/testing/VTKMCheckSourceInInstall.cmake
##
@ -39,7 +40,9 @@ endif ()
if (NOT DEFINED VTKm_ENABLE_LOGGING)
message(FATAL_ERROR "VTKm_ENABLE_LOGGING not defined.")
endif ()
if (NOT DEFINED VTKm_ENABLE_HDF5_IO)
message(FATAL_ERROR "VTKm_ENABLE_HDF5_IO not defined.")
endif()
include(CMakeParseArguments)
# -----------------------------------------------------------------------------
@ -110,8 +113,19 @@ function(do_verify root_dir prefix)
)
set(file_exceptions
cont/ColorTablePrivate.hxx
thirdparty/diy/vtkmdiy/cmake/mpi_types.h
# Ignore deprecated virtual classes (which are not installed if VTKm_NO_DEPRECATED_VIRTUAL
# is on). These exceptions can be removed when these files are completely removed.
cont/ArrayHandleVirtual.h
cont/ArrayHandleVirtual.hxx
cont/ArrayHandleVirtualCoordinates.h
cont/CellLocator.h
cont/PointLocator.h
cont/StorageVirtual.h
cont/StorageVirtual.hxx
exec/CellLocator.h
exec/PointLocator.h
)
#by default every header in a testing directory doesn't need to be installed
@ -124,7 +138,12 @@ function(do_verify root_dir prefix)
if(NOT VTKm_ENABLE_LOGGING)
list(APPEND directory_exceptions thirdparty/loguru)
endif()
if (NOT VTKm_ENABLE_HDF5_IO)
list(APPEND file_exceptions
io/ImageWriterHDF5.h
io/ImageReaderHDF5.h
)
endif()
#Step 2. Verify the installed files match what headers are listed in each
# source directory
verify_install_per_dir("${VTKm_SOURCE_DIR}/vtkm"

@ -17,6 +17,7 @@ function(vtkm_test_install )
"-DVTKm_INSTALL_INCLUDE_DIR=${VTKm_INSTALL_INCLUDE_DIR}"
"-DVTKm_ENABLE_RENDERING=${VTKm_ENABLE_RENDERING}"
"-DVTKm_ENABLE_LOGGING=${VTKm_ENABLE_LOGGING}"
"-DVTKm_ENABLE_HDF5_IO=${VTKm_ENABLE_HDF5_IO}"
)
#By having this as separate tests using fixtures, it will allow us in
@ -110,6 +111,10 @@ function(vtkm_test_against_install dir)
)
endif()
if(TARGET vtkm::kokkos)
list(APPEND args "-DKokkos_DIR=${Kokkos_DIR}")
endif()
#determine if the test is expected to compile or fail to build. We use
#this information to built the test name to make it clear to the user
#what a 'passing' test means

@ -27,7 +27,6 @@ function(vtkm_create_test_executable
# for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
if (is_mpi_test)
set(extraArgs EXTRA_INCLUDE "vtkm/thirdparty/diy/environment.h")
set(CMAKE_TESTDRIVER_BEFORE_TESTMAIN "vtkmdiy::mpi::environment env(ac, av);")
if (use_mpi)
vtkm_diy_use_mpi(ON)
@ -48,9 +47,15 @@ function(vtkm_create_test_executable
vtkm_add_drop_unused_function_flags(${prog})
target_compile_definitions(${prog} PRIVATE ${defines})
#if all backends are enabled, we can use cuda compiler to handle all possible backends.
#determine if we have a device that requires a separate compiler enabled
set(device_lang_enabled FALSE)
if( (TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda) OR (TARGET vtkm::kokkos_hip))
set(device_lang_enabled TRUE)
endif()
#if all backends are enabled, we can use the device compiler to handle all possible backends.
set(device_sources)
if(TARGET vtkm::cuda AND enable_all_backends)
if(device_lang_enabled AND enable_all_backends)
set(device_sources ${sources})
endif()
vtkm_add_target_information(${prog} DEVICE_SOURCES ${device_sources})
@ -63,7 +68,7 @@ function(vtkm_create_test_executable
set_property(TARGET ${prog} PROPERTY LIBRARY_OUTPUT_DIRECTORY ${VTKm_LIBRARY_OUTPUT_PATH})
set_property(TARGET ${prog} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${VTKm_EXECUTABLE_OUTPUT_PATH})
target_link_libraries(${prog} PRIVATE vtkm_cont ${libraries})
target_link_libraries(${prog} PRIVATE vtkm_cont_testing ${libraries})
if(use_job_pool)
vtkm_setup_job_pool()
@ -153,6 +158,13 @@ function(vtkm_unit_tests)
#serially
list(APPEND per_device_serial TRUE)
endif()
if (VTKm_ENABLE_KOKKOS)
list(APPEND per_device_command_line_arguments --device=kokkos)
list(APPEND per_device_suffix "KOKKOS")
#may require more time because of kernel generation.
list(APPEND per_device_timeout 1500)
list(APPEND per_device_serial FALSE)
endif()
endif()
set(test_prog)
@ -172,6 +184,9 @@ function(vtkm_unit_tests)
# Add the path to the location where regression test images are to be stored
list(APPEND VTKm_UT_TEST_ARGS "--baseline-dir=${VTKm_SOURCE_DIR}/data/baseline")
# Add the path to the location where generated regression test images should be written
list(APPEND VTKm_UT_TEST_ARGS "--write-dir=${VTKm_BINARY_DIR}")
if(VTKm_UT_MPI)
if (VTKm_ENABLE_MPI)
vtkm_create_test_executable(

@ -8,15 +8,17 @@
## PURPOSE. See the above copyright notice for more information.
##============================================================================
# If you want CUDA support, you will need to have CMake 3.9 on Linux/OSX.
# We require CMake 3.11 with the MSVC generator as the $<COMPILE_LANGUAGE:>
# generator expression is not supported on older versions.
# If you want CUDA support, you will need to have CMake 3.13 on Linux/OSX.
cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
project (VTKm)
if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
endif()
# We only allow c++14
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
# When using C++14 support make sure you use the standard C++ extensions rather
# than compiler-specific versions of the extensions (to preserve portability).
set(CMAKE_CXX_EXTENSIONS OFF)
# Update module path
set(VTKm_CMAKE_MODULE_PATH ${VTKm_SOURCE_DIR}/CMake)
@ -79,8 +81,9 @@ endmacro ()
# Configurable Options
vtkm_option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
vtkm_option(VTKm_ENABLE_KOKKOS "Enable Kokkos support" OFF)
vtkm_option(VTKm_ENABLE_OPENMP "Enable OpenMP support" OFF)
vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
vtkm_option(VTKm_ENABLE_RENDERING "Enable rendering library" ON)
vtkm_option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
vtkm_option(VTKm_ENABLE_MPI "Enable MPI support" OFF)
@ -97,6 +100,11 @@ endif()
vtkm_option(VTKm_USE_DOUBLE_PRECISION "Use double precision for floating point calculations" OFF)
vtkm_option(VTKm_USE_64BIT_IDS "Use 64-bit indices." ON)
vtkm_option(VTKm_ENABLE_HDF5_IO "Enable HDF5 support" OFF)
if (VTKm_ENABLE_HDF5_IO)
find_package(HDF5 REQUIRED COMPONENTS HL)
endif()
# VTK-m will turn on logging by default, but will set the default
# logging level to WARN. This option should not be visible by default
# in the GUI, as ERROR and WARN level logging should not interfere
@ -108,6 +116,17 @@ vtkm_option(VTKm_ENABLE_LOGGING "Enable VTKm Logging" ON)
# performance.
vtkm_option(VTKm_NO_ASSERT "Disable assertions in debugging builds." OFF)
# The CUDA compiler (as of CUDA 11) takes a surprising long time to compile
# kernels with assert in them. By default we turn off asserts when compiling
# for CUDA devices.
vtkm_option(VTKm_NO_ASSERT_CUDA "Disable assertions for CUDA devices." ON)
# The HIP compiler (as of ROCm 3.7) takes a surprising long time to compile
# kernels with assert in them they generate `printf` calls which are very
# slow ( cause massive register spillage). By default we turn off asserts when
# compiling for HIP devices.
vtkm_option(VTKm_NO_ASSERT_HIP "Disable assertions for HIP devices." ON)
# When VTK-m is embedded into larger projects that wish to make end user
# applications they want to only install libraries and don't want CMake/headers
# installed.
@ -132,19 +151,26 @@ vtkm_option(VTKm_ENABLE_DEVELOPER_FLAGS "Enable compiler flags that are useful w
# Some application might need not to install those, hence this option.
vtkm_option(VTKm_NO_INSTALL_README_LICENSE "disable the installation of README and LICENSE files" OFF)
# We are in the process of deprecating the use of virtual methods because they
# are not well supported on many accelerators. Turn this option on to remove
# the code entirely. Note that the deprecation of virtual methods is work in
# progress, so not all use of virtual methods may be done. In VTK-m 2.0
# virtual methods should be removed entirely and this option will be removed.
vtkm_option(VTKm_NO_DEPRECATED_VIRTUAL "Do not compile support of deprecated virtual methods" OFF)
mark_as_advanced(
VTKm_ENABLE_LOGGING
VTKm_NO_ASSERT
VTKm_NO_ASSERT_CUDA
VTKm_NO_ASSERT_HIP
VTKm_INSTALL_ONLY_LIBRARIES
VTKm_HIDE_PRIVATE_SYMBOLS
VTKm_ENABLE_DEVELOPER_FLAGS
VTKm_NO_INSTALL_README_LICENSE
VTKm_NO_DEPRECATED_VIRTUAL
)
#-----------------------------------------------------------------------------
# When using C++11 support make sure you use the standard C++ extensions rather
# than compiler-specific versions of the extensions (to preserve portability).
set(CMAKE_CXX_EXTENSIONS Off)
# Setup default build types
include(VTKmBuildType)
@ -202,7 +228,7 @@ if (VTKm_ENABLE_TESTING)
# Setup compiler flags for dynamic analysis if needed
include(testing/VTKmCompilerDynamicAnalysisFlags)
endif (VTKm_ENABLE_TESTING)
endif()
#-----------------------------------------------------------------------------
# Check basic type sizes.
@ -266,10 +292,16 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
# Install helper configure files.
install(
FILES
${VTKm_SOURCE_DIR}/CMake/VTKmCMakeBackports.cmake
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
${VTKm_SOURCE_DIR}/CMake/FindMPI.cmake
${VTKm_SOURCE_DIR}/CMake/patches/FindMPI.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
)
install(
FILES
${VTKm_SOURCE_DIR}/CMake/patches/3.15/FindMPI.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}/3.15
)
# Install support files.
install(
@ -279,7 +311,6 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmDIYUtils.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmWrappers.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -313,7 +344,7 @@ endif ()
#-----------------------------------------------------------------------------
#add the benchmarking folder
if(VTKm_ENABLE_BENCHMARKS)
add_subdirectory(benchmarking)
add_subdirectory(benchmarking)
endif()
#-----------------------------------------------------------------------------

@ -55,7 +55,7 @@ list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION
"nvlink warning : .*ArrayPortalVirtual.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorBoundingIntervalHierarchyExec.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorRectilinearGrid.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorUniformBins.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorTwoLevel.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorUniformGrid.* has address taken but no possible call to it"
)

@ -64,7 +64,7 @@ effort.
VTK-m Requires:
+ C++11 Compiler. VTK-m has been confirmed to work with the following
+ GCC 4.8+
+ GCC 5.4+
+ Clang 5.0+
+ XCode 5.0+
+ MSVC 2015+
@ -76,8 +76,8 @@ VTK-m Requires:
Optional dependencies are:
+ CUDA Device Adapter
+ [Cuda Toolkit 9.2+](https://developer.nvidia.com/cuda-toolkit)
+ Note CUDA >= 10.1 is required on Windows
+ [Cuda Toolkit 9.2, >= 10.2](https://developer.nvidia.com/cuda-toolkit)
+ Note CUDA >= 10.2 is required on Windows
+ TBB Device Adapter
+ [TBB](https://www.threadingbuildingblocks.org/)
+ OpenMP Device Adapter
@ -103,14 +103,14 @@ Optional dependencies are:
VTK-m has been tested on the following configurations:c
+ On Linux
+ GCC 4.8.5, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0
+ GCC 5.4.0, 5.4, 6.5, 7.4, 8.2, 9.2; Clang 5, 8; Intel 17.0.4; 19.0.0
+ CMake 3.12, 3.13, 3.16, 3.17
+ CUDA 9.2.148, 10.0.130, 10.1.105, 10.2.89
+ CUDA 9.2, 10.2, 11.0, 11.1
+ TBB 4.4 U2, 2017 U7
+ On Windows
+ Visual Studio 2015, 2017
+ CMake 3.12, 3.17
+ CUDA 10.1
+ CUDA 10.2
+ TBB 2017 U3, 2018 U2
+ On MacOS
+ AppleClang 9.1
@ -200,7 +200,6 @@ scene.AddActor(vtkm::rendering::Actor(outputData.GetCellSet(),
outputData.GetField(fieldName),
colorTable));
vtkm::rendering::View3D view(scene, mapper, canvas, camera, bg);
view.Initialize();
view.Paint();
view.SaveAs("demo_output.png");
```

@ -1,4 +1,4 @@
#!/bin/env python3
#!/usr/bin/env python3
#=============================================================================
#
@ -201,11 +201,8 @@ ENV GITLAB_CI=1 \
COPY . /src
ENV $gitlab_env
WORKDIR /src
#Let git fix issues from copying across OS (such as windows EOL)
#Note that this will remove any changes not committed.
RUN echo "$before_script || true" >> /setup-gitlab-env.sh && \
echo "$script || true" >> /run-gitlab-stage.sh && \
git reset --hard && \
bash /setup-gitlab-env.sh
''')

@ -77,6 +77,14 @@ struct ReadWriteValues : vtkm::worklet::WorkletMapField
}
};
// Takes a vector of data and creates a fresh ArrayHandle with memory just allocated
// in the control environment.
template <typename T>
vtkm::cont::ArrayHandle<T> CreateFreshArrayHandle(const std::vector<T>& vec)
{
return vtkm::cont::make_ArrayHandleMove(std::vector<T>(vec));
}
//------------- Benchmark functors -------------------------------------------
// Copies NumValues from control environment to execution environment and
@ -97,14 +105,18 @@ void BenchContToExecRead(benchmark::State& state)
state.SetLabel(desc.str());
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
ArrayType array = vtkm::cont::make_ArrayHandle(vec);
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadValues{}, array);
timer.Stop();
@ -181,19 +193,26 @@ void BenchContToExecReadWrite(benchmark::State& state)
state.SetLabel(desc.str());
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
ArrayType array = vtkm::cont::make_ArrayHandle(vec);
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadWriteValues{}, array);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
// Remove data from execution environment so it has to be transferred again.
array.ReleaseResourcesExecution();
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
@ -223,21 +242,23 @@ void BenchRoundTripRead(benchmark::State& state)
state.SetLabel(desc.str());
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
ArrayType array = vtkm::cont::make_ArrayHandle(vec);
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Ensure data is in control before we start:
array.ReleaseResourcesExecution();
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadValues{}, array);
// Copy back to host and read:
// (Note, this probably does not copy. The array exists in both control and execution for read.)
auto portal = array.ReadPortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
@ -277,21 +298,23 @@ void BenchRoundTripReadWrite(benchmark::State& state)
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
ArrayType array = vtkm::cont::make_ArrayHandle(vec);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Ensure data is in control before we start:
array.ReleaseResourcesExecution();
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
// Do work on device:
invoker(ReadWriteValues{}, array);
// Copy back to host and read/write:
auto portal = array.WritePortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
@ -330,14 +353,14 @@ void BenchExecToContRead(benchmark::State& state)
state.SetLabel(desc.str());
}
ArrayType array;
array.Allocate(numValues);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
// Time the copy:
timer.Start();
@ -383,14 +406,14 @@ void BenchExecToContWrite(benchmark::State& state)
state.SetLabel(desc.str());
}
ArrayType array;
array.Allocate(numValues);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
timer.Start();
// Allocate/write data on device
@ -435,14 +458,14 @@ void BenchExecToContReadWrite(benchmark::State& state)
state.SetLabel(desc.str());
}
ArrayType array;
array.Allocate(numValues);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
timer.Start();
// Allocate/write data on device

@ -260,7 +260,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(
->ArgNames({ "Values", "Ops", "Stride" }),
vtkm::cont::AtomicArrayTypeList);
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to adjacent
// Benchmarks AtomicArray::CompareExchange such that each work index writes to adjacent
// indices.
struct CASSeqWorker : public vtkm::worklet::WorkletMapField
{
@ -273,12 +273,8 @@ struct CASSeqWorker : public vtkm::worklet::WorkletMapField
const vtkm::Id idx = i % portal.GetNumberOfValues();
const T val = static_cast<T>(i) + in;
T oldVal = portal.Get(idx);
T assumed = static_cast<T>(0);
do
{
assumed = oldVal;
oldVal = portal.CompareAndSwap(idx, assumed + val, assumed);
} while (assumed != oldVal);
while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
;
}
};
@ -371,7 +367,7 @@ VTKM_BENCHMARK_TEMPLATES_OPTS(BenchCASSeqBaseline,
->ArgNames({ "Values", "Ops" }),
vtkm::cont::AtomicArrayTypeList);
// Benchmarks AtomicArray::CompareAndSwap such that each work index writes to
// Benchmarks AtomicArray::CompareExchange such that each work index writes to
// a strided index:
// ( floor(i / stride) + stride * (i % stride)
struct CASStrideWorker : public vtkm::worklet::WorkletMapField
@ -393,12 +389,8 @@ struct CASStrideWorker : public vtkm::worklet::WorkletMapField
const vtkm::Id idx = (i / this->Stride + this->Stride * (i % this->Stride)) % numVals;
const T val = static_cast<T>(i) + in;
T oldVal = portal.Get(idx);
T assumed = static_cast<T>(0);
do
{
assumed = oldVal;
oldVal = portal.CompareAndSwap(idx, assumed + val, assumed);
} while (assumed != oldVal);
while (!portal.CompareExchange(idx, &oldVal, oldVal + val))
;
}
};

@ -20,6 +20,7 @@
#include <vtkm/cont/Timer.h>
#include <vtkm/worklet/StableSortIndices.h>
#include <vtkm/worklet/WorkletMapField.h>
#include <algorithm>
#include <cmath>

@ -8,18 +8,21 @@
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/ImplicitFunction.h>
#include <vtkm/Math.h>
#include <vtkm/VectorAnalysis.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleMultiplexer.h>
#include <vtkm/cont/ArrayHandleVirtual.h>
#include <vtkm/cont/CellSetStructured.h>
#include <vtkm/cont/ImplicitFunctionHandle.h>
#include <vtkm/cont/Initialize.h>
#include <vtkm/cont/Invoker.h>
#include <vtkm/cont/Timer.h>
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
#include <vtkm/cont/ArrayHandleVirtual.h>
#endif
#include <vtkm/worklet/WorkletMapField.h>
#include <vtkm/worklet/WorkletMapTopology.h>
@ -223,20 +226,20 @@ public:
using ExecutionSignature = void(_1, _2, _3, _4);
using InputDomain = _1;
template <typename WeightType, typename T, typename S, typename D>
template <typename WeightType, typename T, typename S>
VTKM_EXEC void operator()(const vtkm::Id2& low_high,
const WeightType& weight,
const vtkm::exec::ExecutionWholeArrayConst<T, S, D>& inPortal,
const vtkm::exec::ExecutionWholeArrayConst<T, S>& inPortal,
T& result) const
{
//fetch the low / high values from inPortal
result = vtkm::Lerp(inPortal.Get(low_high[0]), inPortal.Get(low_high[1]), weight);
}
template <typename WeightType, typename T, typename S, typename D, typename U>
template <typename WeightType, typename T, typename S, typename U>
VTKM_EXEC void operator()(const vtkm::Id2&,
const WeightType&,
const vtkm::exec::ExecutionWholeArrayConst<T, S, D>&,
const vtkm::exec::ExecutionWholeArrayConst<T, S>&,
U&) const
{
//the inPortal and result need to be the same type so this version only
@ -245,50 +248,35 @@ public:
}
};
template <typename ImplicitFunction>
class EvaluateImplicitFunction : public vtkm::worklet::WorkletMapField
{
public:
using ControlSignature = void(FieldIn, FieldOut);
using ExecutionSignature = void(_1, _2);
using ControlSignature = void(FieldIn, FieldOut, ExecObject);
using ExecutionSignature = void(_1, _2, _3);
EvaluateImplicitFunction(const ImplicitFunction* function)
: Function(function)
template <typename VecType, typename ScalarType, typename FunctionType>
VTKM_EXEC void operator()(const VecType& point,
ScalarType& val,
const FunctionType& function) const
{
val = function.Value(point);
}
template <typename VecType, typename ScalarType>
VTKM_EXEC void operator()(const VecType& point, ScalarType& val) const
{
val = this->Function->Value(point);
}
private:
const ImplicitFunction* Function;
};
template <typename T1, typename T2>
class Evaluate2ImplicitFunctions : public vtkm::worklet::WorkletMapField
{
public:
using ControlSignature = void(FieldIn, FieldOut);
using ExecutionSignature = void(_1, _2);
using ControlSignature = void(FieldIn, FieldOut, ExecObject, ExecObject);
using ExecutionSignature = void(_1, _2, _3, _4);
Evaluate2ImplicitFunctions(const T1* f1, const T2* f2)
: Function1(f1)
, Function2(f2)
template <typename VecType, typename ScalarType, typename FType1, typename FType2>
VTKM_EXEC void operator()(const VecType& point,
ScalarType& val,
const FType1& function1,
const FType2& function2) const
{
val = function1.Value(point) + function2.Value(point);
}
template <typename VecType, typename ScalarType>
VTKM_EXEC void operator()(const VecType& point, ScalarType& val) const
{
val = this->Function1->Value(point) + this->Function2->Value(point);
}
private:
const T1* Function1;
const T2* Function2;
};
struct PassThroughFunctor
@ -433,15 +421,19 @@ void BenchBlackScholesStatic(::benchmark::State& state)
};
VTKM_BENCHMARK_TEMPLATES(BenchBlackScholesStatic, ValueTypes);
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchBlackScholesDynamic(::benchmark::State& state)
{
VTKM_DEPRECATED_SUPPRESS_BEGIN
BenchBlackScholesImpl<ValueType> impl{ state };
impl.Run(vtkm::cont::make_ArrayHandleVirtual(impl.StockPrice),
vtkm::cont::make_ArrayHandleVirtual(impl.OptionStrike),
vtkm::cont::make_ArrayHandleVirtual(impl.OptionYears));
VTKM_DEPRECATED_SUPPRESS_END
};
VTKM_BENCHMARK_TEMPLATES(BenchBlackScholesDynamic, ValueTypes);
#endif //VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchBlackScholesMultiplexer0(::benchmark::State& state)
@ -537,15 +529,19 @@ void BenchMathStatic(::benchmark::State& state)
};
VTKM_BENCHMARK_TEMPLATES(BenchMathStatic, ValueTypes);
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchMathDynamic(::benchmark::State& state)
{
VTKM_DEPRECATED_SUPPRESS_BEGIN
BenchMathImpl<ValueType> impl{ state };
impl.Run(vtkm::cont::make_ArrayHandleVirtual(impl.InputHandle),
vtkm::cont::make_ArrayHandleVirtual(impl.TempHandle1),
vtkm::cont::make_ArrayHandleVirtual(impl.TempHandle2));
VTKM_DEPRECATED_SUPPRESS_END
};
VTKM_BENCHMARK_TEMPLATES(BenchMathDynamic, ValueTypes);
#endif //VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchMathMultiplexer0(::benchmark::State& state)
@ -636,13 +632,17 @@ void BenchFusedMathStatic(::benchmark::State& state)
};
VTKM_BENCHMARK_TEMPLATES(BenchFusedMathStatic, ValueTypes);
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchFusedMathDynamic(::benchmark::State& state)
{
VTKM_DEPRECATED_SUPPRESS_BEGIN
BenchFusedMathImpl<ValueType> impl{ state };
impl.Run(vtkm::cont::make_ArrayHandleVirtual(impl.InputHandle));
VTKM_DEPRECATED_SUPPRESS_END
};
VTKM_BENCHMARK_TEMPLATES(BenchFusedMathDynamic, ValueTypes);
#endif //VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchFusedMathMultiplexer0(::benchmark::State& state)
@ -756,15 +756,19 @@ void BenchEdgeInterpStatic(::benchmark::State& state)
};
VTKM_BENCHMARK_TEMPLATES(BenchEdgeInterpStatic, InterpValueTypes);
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchEdgeInterpDynamic(::benchmark::State& state)
{
VTKM_DEPRECATED_SUPPRESS_BEGIN
BenchEdgeInterpImpl<ValueType> impl{ state };
impl.Run(vtkm::cont::make_ArrayHandleVirtual(impl.EdgePairHandle),
vtkm::cont::make_ArrayHandleVirtual(impl.WeightHandle),
vtkm::cont::make_ArrayHandleVirtual(impl.FieldHandle));
VTKM_DEPRECATED_SUPPRESS_END
};
VTKM_BENCHMARK_TEMPLATES(BenchEdgeInterpDynamic, InterpValueTypes);
#endif //VTKM_NO_DEPRECATED_VIRTUAL
struct ImplicitFunctionBenchData
{
@ -802,7 +806,7 @@ static ImplicitFunctionBenchData MakeImplicitFunctionBenchData()
void BenchImplicitFunction(::benchmark::State& state)
{
using EvalWorklet = EvaluateImplicitFunction<vtkm::Sphere>;
using EvalWorklet = EvaluateImplicitFunction;
const vtkm::cont::DeviceAdapterId device = Config.Device;
@ -814,10 +818,7 @@ void BenchImplicitFunction(::benchmark::State& state)
state.SetLabel(desc.str());
}
vtkm::cont::Token token;
auto handle = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere1);
auto function = static_cast<const vtkm::Sphere*>(handle.PrepareForExecution(device, token));
EvalWorklet eval(function);
EvalWorklet eval;
vtkm::cont::Timer timer{ device };
vtkm::cont::Invoker invoker{ device };
@ -826,7 +827,7 @@ void BenchImplicitFunction(::benchmark::State& state)
{
(void)_;
timer.Start();
invoker(eval, data.Points, data.Result);
invoker(eval, data.Points, data.Result, data.Sphere1);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
@ -836,7 +837,7 @@ VTKM_BENCHMARK(BenchImplicitFunction);
void BenchVirtualImplicitFunction(::benchmark::State& state)
{
using EvalWorklet = EvaluateImplicitFunction<vtkm::ImplicitFunction>;
using EvalWorklet = EvaluateImplicitFunction;
const vtkm::cont::DeviceAdapterId device = Config.Device;
@ -848,9 +849,7 @@ void BenchVirtualImplicitFunction(::benchmark::State& state)
state.SetLabel(desc.str());
}
vtkm::cont::Token token;
auto sphere = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere1);
EvalWorklet eval(sphere.PrepareForExecution(device, token));
EvalWorklet eval;
vtkm::cont::Timer timer{ device };
vtkm::cont::Invoker invoker{ device };
@ -859,7 +858,7 @@ void BenchVirtualImplicitFunction(::benchmark::State& state)
{
(void)_;
timer.Start();
invoker(eval, data.Points, data.Result);
invoker(eval, data.Points, data.Result, data.Sphere1);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
@ -869,7 +868,7 @@ VTKM_BENCHMARK(BenchVirtualImplicitFunction);
void Bench2ImplicitFunctions(::benchmark::State& state)
{
using EvalWorklet = Evaluate2ImplicitFunctions<vtkm::Sphere, vtkm::Sphere>;
using EvalWorklet = Evaluate2ImplicitFunctions;
const vtkm::cont::DeviceAdapterId device = Config.Device;
@ -881,12 +880,7 @@ void Bench2ImplicitFunctions(::benchmark::State& state)
state.SetLabel(desc.str());
}
vtkm::cont::Token token;
auto h1 = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere1);
auto h2 = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere2);
auto f1 = static_cast<const vtkm::Sphere*>(h1.PrepareForExecution(device, token));
auto f2 = static_cast<const vtkm::Sphere*>(h2.PrepareForExecution(device, token));
EvalWorklet eval(f1, f2);
EvalWorklet eval;
vtkm::cont::Timer timer{ device };
vtkm::cont::Invoker invoker{ device };
@ -895,7 +889,7 @@ void Bench2ImplicitFunctions(::benchmark::State& state)
{
(void)_;
timer.Start();
invoker(eval, data.Points, data.Result);
invoker(eval, data.Points, data.Result, data.Sphere1, data.Sphere2);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
@ -903,40 +897,6 @@ void Bench2ImplicitFunctions(::benchmark::State& state)
}
VTKM_BENCHMARK(Bench2ImplicitFunctions);
void Bench2VirtualImplicitFunctions(::benchmark::State& state)
{
using EvalWorklet = Evaluate2ImplicitFunctions<vtkm::ImplicitFunction, vtkm::ImplicitFunction>;
const vtkm::cont::DeviceAdapterId device = Config.Device;
auto data = MakeImplicitFunctionBenchData();
{
std::ostringstream desc;
desc << data.Points.GetNumberOfValues() << " points";
state.SetLabel(desc.str());
}
vtkm::cont::Token token;
auto s1 = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere1);
auto s2 = vtkm::cont::make_ImplicitFunctionHandle(data.Sphere2);
EvalWorklet eval(s1.PrepareForExecution(device, token), s2.PrepareForExecution(device, token));
vtkm::cont::Timer timer{ device };
vtkm::cont::Invoker invoker{ device };
for (auto _ : state)
{
(void)_;
timer.Start();
invoker(eval, data.Points, data.Result);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
}
VTKM_BENCHMARK(Bench2VirtualImplicitFunctions);
} // end anon namespace
int main(int argc, char* argv[])

@ -24,8 +24,8 @@
#include <vtkm/cont/ErrorInternal.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/StorageBasic.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/testing/MakeTestDataSet.h>
#include <vtkm/cont/internal/OptionParser.h>
@ -39,6 +39,7 @@
#include <vtkm/filter/Tetrahedralize.h>
#include <vtkm/filter/Threshold.h>
#include <vtkm/filter/ThresholdPoints.h>
#include <vtkm/filter/Triangulate.h>
#include <vtkm/filter/VectorMagnitude.h>
#include <vtkm/filter/VertexClustering.h>
#include <vtkm/filter/WarpScalar.h>
@ -92,12 +93,15 @@ vtkm::cont::InitializeResult Config;
// The input dataset we'll use on the filters:
static vtkm::cont::DataSet InputDataSet;
static vtkm::cont::DataSet UnstructuredInputDataSet;
// The point scalars to use:
static std::string PointScalarsName;
// The cell scalars to use:
static std::string CellScalarsName;
// The point vectors to use:
static std::string PointVectorsName;
// Whether the input is a file or is generated
bool FileAsInput = false;
bool InputIsStructured()
{
@ -166,8 +170,8 @@ void BenchGradient(::benchmark::State& state, int options)
}
}
#define VTKM_PRIVATE_GRADIENT_BENCHMARK(Name, Opts) \
void BenchGradient##Name(::benchmark::State& state) { BenchGradient(state, Opts); } \
#define VTKM_PRIVATE_GRADIENT_BENCHMARK(Name, Opts) \
void BenchGradient##Name(::benchmark::State& state) { BenchGradient(state, Opts); } \
VTKM_BENCHMARK(BenchGradient##Name)
VTKM_PRIVATE_GRADIENT_BENCHMARK(Scalar, Gradient | ScalarInput);
@ -343,10 +347,11 @@ void BenchContour(::benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id numIsoVals = static_cast<vtkm::Id>(state.range(0));
const bool mergePoints = static_cast<bool>(state.range(1));
const bool normals = static_cast<bool>(state.range(2));
const bool fastNormals = static_cast<bool>(state.range(3));
const bool isStructured = static_cast<vtkm::Id>(state.range(0));
const vtkm::Id numIsoVals = static_cast<vtkm::Id>(state.range(1));
const bool mergePoints = static_cast<bool>(state.range(2));
const bool normals = static_cast<bool>(state.range(3));
const bool fastNormals = static_cast<bool>(state.range(4));
vtkm::filter::Contour filter;
filter.SetActiveField(PointScalarsName, vtkm::cont::Field::Association::POINTS);
@ -372,11 +377,14 @@ void BenchContour(::benchmark::State& state)
filter.SetComputeFastNormalsForUnstructured(fastNormals);
vtkm::cont::Timer timer{ device };
vtkm::cont::DataSet input = isStructured ? InputDataSet : UnstructuredInputDataSet;
for (auto _ : state)
{
(void)_;
timer.Start();
auto result = filter.Execute(InputDataSet);
auto result = filter.Execute(input);
::benchmark::DoNotOptimize(result);
timer.Stop();
@ -386,19 +394,25 @@ void BenchContour(::benchmark::State& state)
void BenchContourGenerator(::benchmark::internal::Benchmark* bm)
{
bm->ArgNames({ "NIsoVals", "MergePts", "GenNormals", "FastNormals" });
bm->ArgNames({ "IsStructuredDataSet", "NIsoVals", "MergePts", "GenNormals", "FastNormals" });
auto helper = [&](const vtkm::Id numIsoVals) {
bm->Args({ numIsoVals, 0, 0, 0 });
bm->Args({ numIsoVals, 1, 0, 0 });
bm->Args({ numIsoVals, 0, 1, 0 });
bm->Args({ numIsoVals, 0, 1, 1 });
bm->Args({ 0, numIsoVals, 0, 0, 0 });
bm->Args({ 0, numIsoVals, 1, 0, 0 });
bm->Args({ 0, numIsoVals, 0, 1, 0 });
bm->Args({ 0, numIsoVals, 0, 1, 1 });
bm->Args({ 1, numIsoVals, 0, 0, 0 });
bm->Args({ 1, numIsoVals, 1, 0, 0 });
bm->Args({ 1, numIsoVals, 0, 1, 0 });
bm->Args({ 1, numIsoVals, 0, 1, 1 });
};
helper(1);
helper(3);
helper(12);
}
// :TODO: Disabled until SIGSEGV in Countour when passings field is resolved
VTKM_BENCHMARK_APPLY(BenchContour, BenchContourGenerator);
void BenchExternalFaces(::benchmark::State& state)
@ -428,10 +442,9 @@ void BenchTetrahedralize(::benchmark::State& state)
const vtkm::cont::DeviceAdapterId device = Config.Device;
// This filter only supports structured datasets:
if (!InputIsStructured())
if (FileAsInput && !InputIsStructured())
{
state.SkipWithError("Tetrahedralize Filter requires structured data.");
return;
}
vtkm::filter::Tetrahedralize filter;
@ -456,10 +469,9 @@ void BenchVertexClustering(::benchmark::State& state)
const vtkm::Id numDivs = static_cast<vtkm::Id>(state.range(0));
// This filter only supports unstructured datasets:
if (InputIsStructured())
if (FileAsInput && InputIsStructured())
{
state.SkipWithError("VertexClustering Filter requires unstructured data.");
return;
state.SkipWithError("VertexClustering Filter requires unstructured data (use --tetra).");
}
vtkm::filter::VertexClustering filter;
@ -469,8 +481,9 @@ void BenchVertexClustering(::benchmark::State& state)
for (auto _ : state)
{
(void)_;
timer.Start();
auto result = filter.Execute(InputDataSet);
auto result = filter.Execute(UnstructuredInputDataSet);
::benchmark::DoNotOptimize(result);
timer.Stop();
@ -530,13 +543,12 @@ struct PrepareForInput
void BenchReverseConnectivityGen(::benchmark::State& state)
{
if (InputIsStructured())
if (FileAsInput && InputIsStructured())
{
state.SkipWithError("ReverseConnectivityGen requires unstructured data.");
return;
state.SkipWithError("ReverseConnectivityGen requires unstructured data (--use tetra).");
}
auto cellset = InputDataSet.GetCellSet();
auto cellset = UnstructuredInputDataSet.GetCellSet();
PrepareForInput functor;
for (auto _ : state)
{
@ -763,6 +775,10 @@ struct Arg : vtkm::cont::internal::option::Arg
bool msg)
{
if ((option.arg != nullptr) && (option.arg[0] != '\0'))
{
return vtkm::cont::internal::option::ARG_OK;
}
else
{
if (msg)
{
@ -770,10 +786,6 @@ struct Arg : vtkm::cont::internal::option::Arg
}
return vtkm::cont::internal::option::ARG_ILLEGAL;
}
else
{
return vtkm::cont::internal::option::ARG_OK;
}
}
};
@ -861,8 +873,12 @@ void InitDataSet(int& argc, char** argv)
if (options[HELP])
{
// FIXME: Print google benchmark usage too
option::printUsage(std::cerr, usage.data());
option::printUsage(std::cout, usage.data());
// Print google benchmark usage too
const char* helpstr = "--help";
char* tmpargv[] = { argv[0], const_cast<char*>(helpstr), nullptr };
int tmpargc = 2;
VTKM_EXECUTE_BENCHMARKS(tmpargc, tmpargv);
exit(0);
}
@ -975,6 +991,7 @@ void InitDataSet(int& argc, char** argv)
std::cerr << "[InitDataSet] Loading file: " << filename << "\n";
vtkm::io::VTKDataSetReader reader(filename);
InputDataSet = reader.ReadDataSet();
FileAsInput = true;
}
else
{
@ -986,17 +1003,20 @@ void InitDataSet(int& argc, char** argv)
InputDataSet = source.Execute();
}
if (tetra)
{
std::cerr << "[InitDataSet] Tetrahedralizing dataset...\n";
vtkm::filter::Tetrahedralize tet;
tet.SetFieldsToPass(vtkm::filter::FieldSelection(vtkm::filter::FieldSelection::MODE_ALL));
InputDataSet = tet.Execute(InputDataSet);
}
FindFields();
CreateMissingFields();
std::cerr
<< "[InitDataSet] Create UnstructuredInputDataSet from Tetrahedralized InputDataSet...\n";
vtkm::filter::Tetrahedralize tet;
tet.SetFieldsToPass(vtkm::filter::FieldSelection(vtkm::filter::FieldSelection::MODE_ALL));
UnstructuredInputDataSet = tet.Execute(InputDataSet);
if (tetra)
{
InputDataSet = UnstructuredInputDataSet;
}
inputGenTimer.Stop();
std::cerr << "[InitDataSet] DataSet initialization took " << inputGenTimer.GetElapsedTime()
@ -1015,16 +1035,12 @@ int main(int argc, char* argv[])
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
// This opts changes when it is help
if (opts != vtkm::cont::InitializeOptions::None)
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
InitDataSet(argc, args.data());
}
InitDataSet(argc, args.data());
const std::string dataSetSummary = []() -> std::string {
std::ostringstream out;

@ -0,0 +1,97 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include "Benchmarker.h"
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/DataSetBuilderUniform.h>
#include <vtkm/cont/ErrorInternal.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/internal/OptionParser.h>
#include <vtkm/filter/ParticleAdvection.h>
#include <vtkm/worklet/particleadvection/EulerIntegrator.h>
#include <vtkm/worklet/particleadvection/RK4Integrator.h>
#ifdef VTKM_ENABLE_TBB
#include <tbb/task_scheduler_init.h>
#endif
#ifdef VTKM_ENABLE_OPENMP
#include <omp.h>
#endif
namespace
{
// Hold configuration state (e.g. active device):
vtkm::cont::InitializeResult Config;
// Wrapper around RK4:
void BenchParticleAdvection(::benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id3 dims(5, 5, 5);
const vtkm::Vec3f vecX(1, 0, 0);
vtkm::Id numPoints = dims[0] * dims[1] * dims[2];
std::vector<vtkm::Vec3f> vectorField(static_cast<std::size_t>(numPoints));
for (std::size_t i = 0; i < static_cast<std::size_t>(numPoints); i++)
vectorField[i] = vecX;
vtkm::cont::DataSetBuilderUniform dataSetBuilder;
vtkm::cont::DataSet ds = dataSetBuilder.Create(dims);
ds.AddPointField("vector", vectorField);
vtkm::cont::ArrayHandle<vtkm::Particle> seedArray =
vtkm::cont::make_ArrayHandle({ vtkm::Particle(vtkm::Vec3f(.2f, 1.0f, .2f), 0),
vtkm::Particle(vtkm::Vec3f(.2f, 2.0f, .2f), 1),
vtkm::Particle(vtkm::Vec3f(.2f, 3.0f, .2f), 2) });
vtkm::filter::ParticleAdvection particleAdvection;
particleAdvection.SetStepSize(vtkm::FloatDefault(1) / state.range(0));
particleAdvection.SetNumberOfSteps(static_cast<vtkm::Id>(state.range(0)));
particleAdvection.SetSeeds(seedArray);
particleAdvection.SetActiveField("vector");
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
timer.Start();
auto output = particleAdvection.Execute(ds);
::benchmark::DoNotOptimize(output);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
state.SetComplexityN(state.range(0));
}
VTKM_BENCHMARK_OPTS(BenchParticleAdvection,
->RangeMultiplier(2)
->Range(32, 4096)
->ArgName("Steps")
->Complexity());
} // end anon namespace
int main(int argc, char* argv[])
{
auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
Config = vtkm::cont::Initialize(argc, args.data(), opts);
if (opts != vtkm::cont::InitializeOptions::None)
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -26,8 +26,6 @@
#include <vtkm/exec/FunctorBase.h>
#include <vtkm/cont/ColorTable.hxx>
#include <sstream>
#include <string>
#include <vector>

@ -170,7 +170,7 @@
/// and modified using the passed arguments; see the Google Benchmark documentation
/// for more details. The `preamble` string may be used to supply additional
/// information that will be appended to the output's preamble.
#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
/// \def VTKM_BENCHMARK(BenchFunc)
@ -181,7 +181,7 @@
/// ```
/// void BenchFunc(::benchmark::State& state)
/// ```
#define VTKM_BENCHMARK(BenchFunc) \
#define VTKM_BENCHMARK(BenchFunc) \
BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)
/// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
@ -196,7 +196,7 @@
/// Note the similarity to the raw Google Benchmark usage of
/// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
/// the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
#define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options
/// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
@ -211,7 +211,7 @@
/// ```
///
/// See the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)
/// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
@ -224,7 +224,7 @@
/// template <typename T>
/// void BenchFunc(::benchmark::State& state)
/// ```
#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
/// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
@ -237,10 +237,10 @@
/// ->ArgName("MyParam")->Range(32, 1024*1024),
/// vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
/// ```
#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY( \
BenchFunc, \
[](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY( \
BenchFunc, \
[](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
TypeList)
/// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
@ -255,22 +255,22 @@
/// ```
///
/// See the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
namespace \
#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
namespace \
{ /* A template function cannot be used as a template parameter, so wrap the function with \
* a template struct to get it into the GenerateTemplateBenchmarks class. */ \
template <typename... Ts> \
struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
{ \
static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
}; \
} /* end anon namespace */ \
int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks< \
brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>, \
template <typename... Ts> \
struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
{ \
static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
}; \
} /* end anon namespace */ \
int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks< \
brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>, \
TypeList>::Register(#BenchFunc, ApplyFunctor)
// Internal use only:
#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)
namespace vtkm
@ -280,9 +280,7 @@ namespace bench
namespace detail
{
static inline void NullApply(::benchmark::internal::Benchmark*)
{
}
static inline void NullApply(::benchmark::internal::Benchmark*) {}
/// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
/// instead.

@ -44,6 +44,7 @@ set(benchmarks
BenchmarkDeviceAdapter
BenchmarkFieldAlgorithms
BenchmarkFilters
BenchmarkODEIntegrators
BenchmarkTopologyAlgorithms
)

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5706bddc644b5b120ffbd424b3073ce989735272726de711ca8dac19b4a30ee1
size 2653

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:785051d9773c4a0ced2701de3499f9cd948da2a4c846a5187e30dfb5cb0783cb
size 10830

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1d990b5f0e9ef27e4e5f87f4c62c4f9974992506521f32bd5901ac6670e71bfa
size 9656

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:54e09a09c97a20627e54c835d2d488bc9f692ef1315122ab60241c006ab78813
size 19742

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e1472e6002ca4ad4012e0c9f067f8254290fabe93c82713a4994ad97a7fdbdfc
size 31218

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5ff6d72bd325ffe0fb3b22bfdc294b6d674384afd662290424bb77634202b4ef
size 71150

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:24c71e8846fe62e6f6eefdb72c9729639061af80bf9d3453d35c8c6838de9174
size 37162

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b4c905ec76e72513519515ec41cf5efd34490b98255ee7465f8b6746fcff41e5
size 51865

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ddf65aefbd8c8fe8fb479521af7e5fa894cc94b3f890e2cc527a8df5c6e5601c
size 728

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5f85560cc05688d09c21b22e91c14cec22deecb3c51dc364d82cc9fd460c6ab6
size 328

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a47045b1ae5539ef0125273ee9c50a9a6e809f78411f6a850ac34e6fa43189bb
size 535

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ea0a0903fce2b7b42023ca0a2bdc008781a61fa74f75b2b107e6d0788c404551
size 1441

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:58aed19216ce91b6c9bc7c0d8ee31c1062405ad6f5a4a977b49f213e2ce81307
size 1518

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ef3dfd79f0c8d18780d0749014d71c0226134041283d33de0bcd994e343dd421
size 2001070

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2bb3d36ea5ecef5e7ef1057d0dddebbc590424915083091ead3dac2928000524
size 2904465

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bffad7dae3dd6ef018ad7a9e109464ced0f3b9bc15cf1fb5d555f6d0d00b621f
size 3001624

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2cbdf56fd5445ddc5b6bc05507b8825fb8d74fe1ccce894bde03e5ff2ecf5fb6
size 525141

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:752021630d25aff8dfd00064badd452896be70bc8b2f94b008900b4fc70d4dd5
size 1811

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4d1dbb4c28f1c829769ad3e03fc58f667935d8a461d3515036d5d98f5e3841cb
size 395

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bc4033483646c7e3c7be921ca4f821d1277c0d6d79063b1565dfb78c4766bf4d
size 1234

3
data/data/third_party/ecl_cc/README vendored Normal file

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6f5e6e3dc559fefc7990daaec071fcd620f620e5ab8652dddaa6b43ca4ba08e7
size 222

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:892470e152ccd46ddcca70e26bcd88816c247f08c496319cea80864b6b94ce46
size 3596536

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a2c0b3788197a48a305fc049f54d66c94c20298e617ef06dbe4fe0c2043f7366
size 3590

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c1860e747d7f460afc63e32de184e445ffb966a42fb07f9d44ba39020584864f
size 496

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3d9bea2064cd3402f3f5b7862e6b775e37f33210ba099f59358857d4bdae1020
size 255

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e154ba13346e6998b864316868da3f155e99efe4f330c8e080b0d7ece22b505a
size 488

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7191ea7dec00129cb262239a508aeba4bb9387e581adfa2049211f4514ee4130
size 1020

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b7b7e73f60f3572e19178aa55fcd32cafb5c5823062241d28aa37d82b0031a2a
size 1145

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:90aed1ed3c3eba58f1b0b1573b09e8c024e48f5ca822e9f88b0c1ff6593a978f
size 693

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3d0ddc7c712a6d544db85660cd9d325884892b18d6f0ed451361aaeae2a96413
size 204

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:75b5601eb23b1724d5309e69a51839615bce625f6e7641b52dc3d06e10b0c5ee
size 745

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ff3108d009d2eef410593811857e38388001f7df624ddeaed3edceafbc838aea
size 849

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5aca6667b06deb4ec6236d5caa3d9518345bc1eb9021bc721289b81acc980af9
size 789

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:861fc904b7d4db43288fce85c8c1398726b54ac82d7bcbcebd8f12808cb5599b
size 1002

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:29e43c695763535251ab22af815651caa53d103b5fd168c72dfb9188e72e4ff4
size 1244

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3731448fe4d87b204e185829237a6a6b0140aed2fb27eea0533883a4cf4ed79d
size 1065

@ -60,14 +60,14 @@ Current gitlab runner tags for VTK-m are:
Used to state that we require a linux based gitlab-runner
- large-memory
Used to state that this step will require a machine that has lots of memory.
This is currently used for cuda `build` requests
This is currently used for CUDA `build` requests
- cuda-rt
Used to state that the runner is required to have the cuda runtime enviornment.
This isn't required to `build` VTK-m, only `test`
Used to state that the runner is required to have the CUDA runtime environment.
This is required to `build` and `test` VTK-m when using CUDA
- maxwell
- pascal
- turing
Only used on a `test` stage to signifiy which GPU hardware is required to
Only used on a `test` stage to signify which GPU hardware is required to
run the VTK-m tests
# How to use docker builders locally
@ -118,10 +118,9 @@ compilation of VTK-m. Instead of doing the compilation, instead you will be give
./reproduce_ci_env.py run rhel8
```
To compile VTK-m from the the interactive shell you would do the following:
To compile VTK-m from the the interactive shell with the settings of the CI job you would do the following:
```
> src]# cd build/
> build]# cmake --build .
> src]# bash /run-gitlab-stage.sh
```
# How to Add/Update Kitware Gitlab CI
@ -259,22 +258,3 @@ sudo docker login --username=<docker_hub_name>
cd .gitlab/ci/docker
sudo ./update_all.sh 20201230
```
# ECP OSTI CI
`.gitlab-ci-ecp.yml` allows for VTK-m to run CI on provided by ECP at NMC.
To have this work properly you will need to make sure that the gitlab repository
has been updated to this non-standard yaml file location
( "Settings" -> "CI/CD" -> "General pipelines" -> "Custom CI configuration path").
The ECP CI is setup to verify VTK-m mainly on Power9 hardware as that currently is
missing from VTK-m standard CI infrastructure.
Currently we verify Power9 support with `cuda` and `openmp` builders. The `cuda` builder
is setup to use the default cuda SDK on the machine and the required `c++` compiler which
currently is `gcc-4.8.5`. The `openmp` builder is setup to use the newest `c++` compiler provided
on the machine so that we maximimze compiler coverage.
## Issues
Currently these builders don't report back to the VTK-m CDash instance.

@ -0,0 +1,7 @@
# Remove VTKDataSetWriter::WriteDataSet just_points parameter
In the method `VTKDataSetWriter::WriteDataSet`, `just_points` parameter has been
removed due to lack of usage.
The purpose of `just_points` was to allow exporting only the points of a
DataSet without its cell data.

@ -0,0 +1,5 @@
# Add Kokkos backend
Adds a new device backend `Kokkos` which uses the kokkos library for parallelism.
User must provide the kokkos build and Vtk-m will use the default configured execution
space.

@ -0,0 +1,226 @@
# Extract component arrays from unknown arrays
One of the problems with the data structures of VTK-m is that non-templated
classes like `DataSet`, `Field`, and `UnknownArrayHandle` (formally
`VariantArrayHandle`) internally hold an `ArrayHandle` of a particular type
that has to be cast to the correct task before it can be reasonably used.
That in turn is problematic because the list of possible `ArrayHandle`
types is very long.
At one time we were trying to compensate for this by using
`ArrayHandleVirtual`. However, for technical reasons this class is
infeasible for every use case of VTK-m and has been deprecated. Also, this
was only a partial solution since using it still required different code
paths for, say, handling values of `vtkm::Float32` and `vtkm::Vec3f_32`
even though both are essentially arrays of 32-bit floats.
The extract component feature compensates for this problem by allowing you
to extract the components from an `ArrayHandle`. This feature allows you to
create a single code path to handle `ArrayHandle`s containing scalars or
vectors of any size. Furthermore, when you extract a component from an
array, the storage gets normalized so that one code path covers all storage
types.
## `ArrayExtractComponent`
The basic enabling feature is a new function named `ArrayExtractComponent`.
This function takes takes an `ArrayHandle` and an index to a component. It
then returns an `ArrayHandleStride` holding the selected component of each
entry in the original array.
We will get to the structure of `ArrayHandleStride` later. But the
important part is that `ArrayHandleStride` does _not_ depend on the storage
type of the original `ArrayHandle`. That means whether you extract a
component from `ArrayHandleBasic`, `ArrayHandleSOA`,
`ArrayHandleCartesianProduct`, or any other type, you get back the same
`ArrayHandleStride`. Likewise, regardless of whether the input
`ArrayHandle` has a `ValueType` of `FloatDefault`, `Vec2f`, `Vec3f`, or any
other `Vec` of a default float, you get the same `ArrayHandleStride`. Thus,
you can see how this feature can dramatically reduce code paths if used
correctly.
It should be noted that `ArrayExtractComponent` will (logically) flatten
the `ValueType` before extracting the component. Thus, nested `Vec`s such
as `Vec<Vec3f, 3>` will be treated as a `Vec<FloatDefault, 9>`. The
intention is so that the extracted component will always be a basic C type.
For the purposes of this document when we refer to the "component type", we
really mean the base component type.
Different `ArrayHandle` implementations provide their own implementations
for `ArrayExtractComponent` so that the component can be extracted without
deep copying all the data. We will visit how `ArrayHandleStride` can
represent different data layouts later, but first let's go into the main
use case.
## Extract components from `UnknownArrayHandle`
The principle use case for `ArrayExtractComponent` is to get an
`ArrayHandle` from an unknown array handle without iterating over _every_
possible type. (Rather, we iterate over a smaller set of types.) To
facilitate this, an `ExtractComponent` method has been added to
`UnknownArrayHandle`.
To use `UnknownArrayHandle::ExtractComponent`, you must give it the
component type. You can check for the correct component type by using the
`IsBaseComponentType` method. The method will then return an
`ArrayHandleStride` for the component type specified.
### Example
As an example, let's say you have a worklet, `FooWorklet`, that does some
per component operation on an array. Furthermore, let's say that you want
to implement a function that, to the best of your ability, can apply
`FooWorklet` on an array of any type. This function should be pre-compiled
into a library so it doesn't have to be compiled over and over again.
(`MapFieldPermutation` and `MapFieldMergeAverage` are real and important
examples that have this behavior.)
Without the extract component feature, the implementation might look
something like this (many practical details left out):
``` cpp
struct ApplyFooFunctor
{
template <typename ArrayType>
void operator()(const ArrayType& input, vtkm::cont::UnknownArrayHandle& output) const
{
ArrayType outputArray;
vtkm::cont::Invoke invoke;
invoke(FooWorklet{}, input, outputArray);
output = outputArray;
}
};
vtkm::cont::UnknownArrayHandle ApplyFoo(const vtkm::cont::UnknownArrayHandle& input)
{
vtkm::cont::UnknownArrayHandle output;
input.CastAndCallForTypes<vtkm::TypeListAll, VTKM_DEFAULT_STORAGE_LIST_TAG>(
ApplyFooFunctor{}, output);
return output;
}
```
Take a look specifically at the `CastAndCallForTypes` call near the bottom
of this example. It calls for all types in `vtkm::TypeListAll`, which is
about 40 instances. Then, it needs to be called for any type in the desired
storage list. This could include basic arrays, SOA arrays, and lots of
other specialized types. It would be expected for this code to generate
over 100 paths for `ApplyFooFunctor`. This in turn contains a worklet
invoke, which is not a small amount of code.
Now consider how we can use the `ExtractComponent` feature to reduce the
code paths:
``` cpp
struct ApplyFooFunctor
{
template <typename T>
void operator()(T,
const vtkm::cont::UnknownArrayHandle& input,
cont vtkm::cont::UnknownArrayHandle& output) const
{
if (!input.IsBasicComponentType<T>()) { return; }
VTKM_ASSERT(output.IsBasicComponentType<T>());
vtkm::cont::Invoke invoke;
invoke(FooWorklet{}, input.ExtractComponent<T>(), output.ExtractComponent<T>());
}
};
vtkm::cont::UnknownArrayHandle ApplyFoo(const vtkm::cont::UnknownArrayHandle& input)
{
vtkm::cont::UnknownArrayHandle output = input.NewInstanceBasic();
output.Allocate(input.GetNumberOfValues());
vtkm::cont::ListForEach(ApplyFooFunctor{}, vtkm::TypeListScalarAll{}, input, output);
return output;
}
```
The number of lines of code is about the same, but take a look at the
`ListForEach` (which replaces the `CastAndCallForTypes`). This calling code
takes `TypeListScalarAll` instead of `TypeListAll`, which reduces the
instances created from around 40 to 13 (every basic C type). It is also no
longer dependent on the storage, so these 13 instances are it. As an
example of potential compile savings, changing the implementation of the
`MapFieldMergePermutation` and `MapFieldMergeAverage` functions in this way
reduced the filters_common library (on Mac, Debug build) by 24 MB (over a
third of the total size).
Another great advantage of this approach is that even though it takes less
time to compile and generates less code, it actually covers more cases.
Have an array containg values of `Vec<short, 13>`? No problem. The values
were actually stored in an `ArrayHandleReverse`? It will still work.
## `ArrayHandleStride`
This functionality is made possible with the new `ArrayHandleStride`. This
array behaves much like `ArrayHandleBasic`, except that it contains an
_offset_ parameter to specify where in the buffer array to start reading
and a _stride_ parameter to specify how many entries to skip for each
successive entry. `ArrayHandleStride` also has optional parameters
`divisor` and `modulo` that allow indices to be repeated at regular
intervals.
Here are how `ArrayHandleStride` extracts components from several common
arrays. For each of these examples, we assume that the `ValueType` of the
array is `Vec<T, N>`. They are each extracting _component_.
### Extracting from `ArrayHandleBasic`
When extracting from an `ArrayHandleBasic`, we just need to start at the
proper component and skip the length of the `Vec`.
* _offset_: _component_
* _stride_: `N`
### Extracting from `ArrayHandleSOA`
Since each component is held in a separate array, they are densly packed.
Each component could be represented by `ArrayHandleBasic`, but of course we
use `ArrayHandleStride` to keep the type consistent.
* _offset_: 0
* _stride_: 1
### Extracting from `ArrayHandleCartesianProduct`
This array is the basic reason for implementing the _divisor_ and _modulo_
parameters. Each of the 3 components have different parameters, which are
the following (given that _dims_[3] captures the size of the 3 arrays for
each dimension).
* _offset_: 0
* _stride_: 1
* case _component_ == 0
* _divisor_: _ignored_
* _modulo_: _dims_[0]
* case _component_ == 1
* _divisor_: _dims_[0]
* _modulo_: _dims_[1]
* case _component_ == 2
* _divisor_: _dims_[0]
* _modulo_: _ignored_
### Extracting from `ArrayHandleUniformPointCoordinates`
This array cannot be represented directly because it is fully implicit.
However, it can be trivially converted to `ArrayHandleCartesianProduct` in
typically very little memory. (In fact, EAVL always represented uniform
point coordinates by explicitly storing a Cartesian product.) Thus, for
very little overhead the `ArrayHandleStride` can be created.
## Runtime overhead of extracting components
These benefits come at a cost, but not a large one. The "biggest" cost is
the small cost of computing index arithmetic for each access into
`ArrayHandleStride`. To make this as efficient as possible, there are
conditions that skip over the modulo and divide steps if they are not
necessary. (Integer modulo and divide tend to take much longer than
addition and multiplication.) It is for this reason that we probably do not
want to use this method all the time.
Another cost is the fact that not every `ArrayHandle` can be represented by
`ArrayHandleStride` directly without copying. If you ask to extract a
component that cannot be directly represented, it will be copied into a
basic array, which is not great. To make matters worse, for technical
reasons this copy happens on the host rather than the device.

@ -0,0 +1,29 @@
# Create `ArrayHandleOffsetsToNumComponents`
`ArrayHandleOffsetsToNumComponents` is a fancy array that takes an array of
offsets and converts it to an array of the number of components for each
packed entry.
It is common in VTK-m to pack small vectors of variable sizes into a single
contiguous array. For example, cells in an explicit cell set can each have
a different amount of vertices (triangles = 3, quads = 4, tetra = 4, hexa =
8, etc.). Generally, to access items in this list, you need an array of
components in each entry and the offset for each entry. However, if you
have just the array of offsets in sorted order, you can easily derive the
number of components for each entry by subtracting adjacent entries. This
works best if the offsets array has a size that is one more than the number
of packed vectors with the first entry set to 0 and the last entry set to
the total size of the packed array (the offset to the end).
When packing data of this nature, it is common to start with an array that
is the number of components. You can convert that to an offsets array using
the `vtkm::cont::ConvertNumComponentsToOffsets` function. This will create
an offsets array with one extra entry as previously described. You can then
throw out the original number of components array and use the offsets with
`ArrayHandleOffsetsToNumComponents` to represent both the offsets and num
components while storing only one array.
This replaces the use of `ArrayHandleDecorator` in `CellSetExplicit`.
The two implementation should do the same thing, but the new
`ArrayHandleOffsetsToNumComponents` should be less complex for
compilers.

@ -0,0 +1,18 @@
# `ArrayRangeCompute` works on any array type without compiling device code
Originally, `ArrayRangeCompute` required you to know specifically the
`ArrayHandle` type (value type and storage type) and to compile using any
device compiler. The method is changed to include only overloads that have
precompiled versions of `ArrayRangeCompute`.
Additionally, an `ArrayRangeCompute` overload that takes an
`UnknownArrayHandle` has been added. In addition to allowing you to compute
the range of arrays of unknown types, this implementation of
`ArrayRangeCompute` serves as a fallback for `ArrayHandle` types that are
not otherwise explicitly supported.
If you really want to make sure that you compute the range directly on an
`ArrayHandle` of a particular type, you can include
`ArrayRangeComputeTemplate.h`, which contains a templated overload of
`ArrayRangeCompute` that directly computes the range of an `ArrayHandle`.
Including this header requires compiling for device code.

@ -0,0 +1,29 @@
# `vtkm::cont::internal::Buffer` now can have ownership transferred
Memory once transferred to `Buffer` always had to be managed by VTK-m. This is problematic
for applications that needed VTK-m to allocate memory, but have the memory ownership
be longer than VTK-m.
`Buffer::TakeHostBufferOwnership` allows for easy transfer ownership of memory out of VTK-m.
When taking ownership of an VTK-m buffer you are provided the following information:
- Memory: A `void*` pointer to the array
- Container: A `void*` pointer used to free the memory. This is necessary to support cases such as allocations transferred into VTK-m from a `std::vector`.
- Delete: The function to call to actually delete the transferred memory
- Reallocate: The function to call to re-allocate the transferred memory. This will throw an exception if users try
to reallocate a buffer that was 'view' only
- Size: The size in number of elements of the array
To properly steal memory from VTK-m you do the following:
```cpp
vtkm::cont::ArrayHandle<T> arrayHandle;
...
auto stolen = arrayHandle.GetBuffers()->TakeHostBufferOwnership();
...
stolen.Delete(stolen.Container);
```

202
docs/changelog/buffer.md Normal file

@ -0,0 +1,202 @@
# Redesign of ArrayHandle to access data using typeless buffers
The original implementation of `ArrayHandle` is meant to be very generic.
To define an `ArrayHandle`, you actually create a `Storage` class that
maintains the data and provides portals to access it (on the host). Because
the `Storage` can provide any type of data structure it wants, you also
need to define an `ArrayTransfer` that describes how to move the
`ArrayHandle` to and from a device. It also has to be repeated for every
translation unit that uses them.
This is a very powerful mechanism. However, one of the major problems with
this approach is that every `ArrayHandle` type needs to have a separate
compile path for every value type crossed with every device. Because of
this limitation, the `ArrayHandle` for the basic storage has a special
implementation that manages the actual data allocation and movement as
`void *` arrays. In this way all the data management can be compiled once
and put into the `vtkm_cont` library. This has dramatically improved the
VTK-m compile time.
This new design replicates the basic `ArrayHandle`'s success to all other
storage types. The basic idea is to make the implementation of
`ArrayHandle` storage slightly less generic. Instead of requiring it to
manage the data it stores, it instead just builds `ArrayPortal`s from
`void` pointers that it is given. The management of `void` pointers can be
done in non-templated classes that are compiled into a library.
This initial implementation does not convert all `ArrayHandle`s to avoid
making non-backward compatible changes before the next minor revision of
VTK-m. In particular, it would be particularly difficult to convert
`ArrayHandleVirtual`. It could be done, but it would be a lot of work for a
class that will likely be removed.
## Buffer
Key to these changes is the introduction of a
`vtkm::cont::internal::Buffer` object. As the name implies, the `Buffer`
object manages a single block of bytes. `Buffer` is agnostic to the type of
data being stored. It only knows the length of the buffer in bytes. It is
responsible for allocating space on the host and any devices as necessary
and for transferring data among them. (Since `Buffer` knows nothing about
the type of data, a precondition of VTK-m would be that the host and all
devices have to have the same endian.)
The idea of the `Buffer` object is similar in nature to the existing
`vtkm::cont::internal::ExecutionArrayInterfaceBasicBase` except that it
will manage a buffer of data among the control and all devices rather than
in one device through a templated subclass.
As explained below, `ArrayHandle` holds some fixed number of `Buffer`
objects. (The number can be zero for implicit `ArrayHandle`s.) Because all
the interaction with the devices happen through `Buffer`, it will no longer
be necessary to compile any reference to `ArrayHandle` for devices (e.g.
you won't have to use nvcc just because the code links `ArrayHandle.h`).
## Storage
The `vtkm::cont::internal::Storage` class changes dramatically. Although an
instance will be kept, the intention is for `Storage` itself to be a
stateless object. It will manage its data through `Buffer` objects provided
from the `ArrayHandle`.
That said, it is possible for `Storage` to have some state. For example,
the `Storage` for `ArrayHandleImplicit` must hold on to the instance of the
portal used to manage the state.
## ArrayTransport
The `vtkm::cont::internal::ArrayTransfer` class will be removed completely.
All data transfers will be handled internally with the `Buffer` object
## Portals
A big change for this design is that the type of a portal for an
`ArrayHandle` will be the same for all devices and the host. Thus, we no
longer need specialized versions of portals for each device. We only have
one portal type. And since they are constructed from `void *` pointers, one
method can create them all.
## Advantages
The `ArrayHandle` interface should not change significantly for external
uses, but this redesign offers several advantages.
### Faster Compiles
Because the memory management is contained in a non-templated `Buffer`
class, it can be compiled once in a library and used by all template
instances of `ArrayHandle`. It should have similar compile advantages to
our current specialization of the basic `ArrayHandle`, but applied to all
types of `ArrayHandle`s.
### Fewer Templates
Hand-in-hand with faster compiles, the new design should require fewer
templates and template instances. We have immediately gotten rid of
`ArrayTransport`. `Storage` is also much shorter. Because all
`ArrayPortal`s are the same for every device and the host, we need many
fewer versions of those classes. In the device adapter, we can probably
collapse the three `ArrayManagerExecution` classes into a single, much
simpler class that does simple memory allocation and copy.
### Fewer files need to be compiled for CUDA
Including `ArrayHandle.h` no longer adds code that compiles for a device.
Thus, we should no longer need to compile for a specific device adapter
just because we access an `ArrayHandle`. This should make it much easier to
achieve our goal of a "firewall". That is, code that just calls VTK-m
filters does not need to support all its compilers and flags.
### Simpler ArrayHandle specialization
The newer code should simplify the implementation of special `ArrayHandle`s
a bit. You need only implement an `ArrayPortal` that operates on one or
more `void *` arrays and a simple `Storage` class.
### Out of band memory sharing
With the current version of `ArrayHandle`, if you want to take data from
one `ArrayHandle` you pretty much have to create a special template to wrap
another `ArrayHandle` around that. With this new design, it is possible to
take data from one `ArrayHandle` and give it to another `ArrayHandle` of a
completely different type. You can't do this willy-nilly since different
`ArrayHandle` types will interpret buffers differently. But there can be
some special important use cases.
One such case could be an `ArrayHandle` that provides strided access to a
buffer. (Let's call it `ArrayHandleStride`.) The idea is that it interprets
the buffer as an array for a particular type (like a basic `ArrayHandle`)
but also defines a stride, skip, and repeat so that given an index it looks
up the value `((index / skip) % repeat) * stride`. The point is that it can
take an AoS array of tuples and represent an array of one of the
components.
The point would be that if you had a `VariantArrayHandle` or `Field`, you
could pull out an array of one of the components as an `ArrayHandleStride`.
An `ArrayHandleStride<vtkm::Float32>` could be used to represent that data
that comes from any basic `ArrayHandle` with `vtkm::Float32` or a
`vtkm::Vec` of that type. It could also represent data from an
`ArrayHandleCartesianProduct` and `ArrayHandleSoA`. We could even represent
an `ArrayHandleUniformPointCoordinates` by just making a small array. This
allows us to statically access a whole bunch of potential array storage
classes with a single type.
### Potentially faster device transfers
There is currently a fast-path for basic `ArrayHandle`s that does a block
cuda memcpy between host and device. But for other `ArrayHandle`s that do
not defer their `ArrayTransfer` to a sub-array, the transfer first has to
copy the data into a known buffer.
Because this new design stores all data in `Buffer` objects, any of these
can be easily and efficiently copied between devices.
## Disadvantages
This new design gives up some features of the original `ArrayHandle` design.
### Can only interface data that can be represented in a fixed number of buffers
Because the original `ArrayHandle` design required the `Storage` to
completely manage the data, it could represent it in any way possible. In
this redesign, the data need to be stored in some fixed number of memory
buffers.
This is a pretty open requirement. I suspect most data formats will be
storable in this. The user's guide has an example of data stored in a
`std::deque` that will not be representable. But that is probably not a
particularly practical example.
### VTK-m would only be able to support hosts and devices with the same endian
Because data are transferred as `void *` blocks of memory, there is no way
to correct words if the endian on the two devices does not agree. As far as
I know, there should be no issues with the proposed ECP machines.
If endian becomes an issue, it might be possible to specify a word length
in the `Buffer`. That would assume that all numbers stored in the `Buffer`
have the same word length.
### ArrayPortals must be completely recompiled in each translation unit
We can declare that an `ArrayHandle` does not need to include the device
adapter header files in part because it no longer needs specialized
`ArrayPortal`s for each device. However, that means that a translation unit
compiled with the host compiler (say gcc) will produce different code for
the `ArrayPortal`s than those with the device compiler (say nvcc). This
could lead to numerous linking problems.
To get around these issues, we will probably have to enforce no exporting
of any of the `ArrayPotal` symbols and force them all to be recompiled for
each translation unit. This will serve to increase the compile times a bit.
We will probably also still encounter linking errors as there would be no
way to enforce this requirement.
### Cannot have specialized portals for the control environment
Because the new design unifies `ArrayPortal` types across control and
execution environments, it is no longer possible to have a special version
for the control environment to manage resources. This will require removing
some recent behavior of control portals such as with MR !1988.

@ -0,0 +1,9 @@
# Precompiled `ArrayCopy` for `UnknownArrayHandle`
Previously, in order to copy an `UnknownArrayHandle`, you had to specify
some subset of types and then specially compile a copy for each potential
type. With the new ability to extract a component from an
`UnknownArrayHandle`, it is now feasible to precompile copying an
`UnknownArrayHandle` to another array. This greatly reduces the overhead of
using `ArrayCopy` to copy `UnknownArrayHandle`s while simultaneously
increasing the likelihood that the copy will be successful.

@ -0,0 +1,10 @@
# Disable asserts for CUDA architecture builds
`assert` is supported on recent CUDA cards, but compiling it appears to be
very slow. By default, the `VTKM_ASSERT` macro has been disabled whenever
compiling for a CUDA device (i.e. when `__CUDA_ARCH__` is defined).
Asserts for CUDA devices can be turned back on by turning the
`VTKm_NO_ASSERT_CUDA` CMake variable off. Turning this CMake variable off
will enable assertions in CUDA kernels unless there is another reason
turning off all asserts (such as a release build).

@ -0,0 +1,39 @@
# Deprecate ArrayHandleVirtualCoordinates
As we port VTK-m to more types of accelerator architectures, supporting
virtual methods is becoming more problematic. Thus, we are working to back
out of using virtual methods in the execution environment.
One of the most widespread users of virtual methods in the execution
environment is `ArrayHandleVirtual`. As a first step of deprecating this
class, we first deprecate the `ArrayHandleVirtualCoordinates` subclass.
Not surprisingly, `ArrayHandleVirtualCoordinates` is used directly by
`CoordinateSystem`. The biggest change necessary was that the `GetData`
method returned an `ArrayHandleVirtualCoordinates`, which obviously would
not work if that class is deprecated.
An oddness about this return type is that it is quite different from the
superclass's method of the same name. Rather, `Field` returns a
`VariantArrayHandle`. Since this had to be corrected anyway, it was decided
to change `CoordinateSystem`'s `GetData` to also return a
`VariantArrayHandle`, although its typelist is set to just `vtkm::Vec3f`.
To try to still support old code that expects the deprecated behavior of
returning an `ArrayHandleVirtualCoordinates`, `CoordinateSystem::GetData`
actually returns a "hidden" subclass of `VariantArrayHandle` that
automatically converts itself to an `ArrayHandleVirtualCoordinates`. (A
deprecation warning is given if this is done.)
This approach to support deprecated code is not perfect. The returned value
for `CoordinateSystem::GetData` can only be used as an `ArrayHandle` if a
method is directly called on it or if it is cast specifically to
`ArrayHandleVirtualCoordinates` or its superclass. For example, if passing
it to a method argument typed as `vtkm::cont::ArrayHandle<T, S>` where `T`
and `S` are template parameters, then the conversion will fail.
To continue to support ease of use, `CoordinateSystem` now has a method
named `GetDataAsMultiplexer` that returns the data as an
`ArrayHandleMultiplexer`. This can be employed to quickly use the
`CoordinateSystem` as an array without the overhead of a `CastAndCall`.

@ -0,0 +1,17 @@
# Virtual methods in execution environment deprecated
The use of classes with any virtual methods in the execution environment is
deprecated. Although we had code to correctly build virtual methods on some
devices such as CUDA, this feature was not universally supported on all
programming models we wish to support. Plus, the implementation of virtual
methods is not hugely convenient on CUDA because the virtual methods could
not be embedded in a library. To get around virtual methods declared in
different libraries, all builds had to be static, and a special linking
step to pull in possible virtual method implementations was required.
For these reasons, VTK-m is no longer relying on virtual methods. (Other
approaches like multiplexers are used instead.) The code will be officially
removed in version 2.0. It is still supported in a deprecated sense (you
should get a warning). However, if you want to build without virtual
methods, you can set the `VTKm_NO_DEPRECATED_VIRTUAL` CMake flag, and they
will not be compiled.

Some files were not shown because too many files have changed in this diff Show More