Merge branch 'master' into particle_density

This commit is contained in:
Li-Ta Lo 2020-10-07 12:24:07 -06:00
commit c1681d2f2a
851 changed files with 30216 additions and 17343 deletions

@ -1,17 +1,19 @@
---
# This configuration requires clang-format 3.8 or higher.
# This configuration requires clang-format 9 or higher.
BasedOnStyle: Mozilla
AlignAfterOpenBracket: Align
AlignEscapedNewlines: true
AlignOperands: false
AlwaysBreakAfterReturnType: None
AllowAllParametersOfDeclarationOnNextLine: false
AlwaysBreakAfterDefinitionReturnType: None
BreakBeforeBraces: Allman
AlwaysBreakAfterReturnType: None
BinPackArguments: false
BinPackParameters: false
BreakBeforeBraces: Allman
ColumnLimit: 100
# FixNamespaceComments: true
MaxEmptyLinesToKeep: 4
Standard: Cpp11
# This requires clang-format 4.0 (at least).
#FixNamespaceComments: true
ReflowComments: false
SpaceAfterTemplateKeyword: true
Standard: Cpp11
...

2
.gitattributes vendored

@ -1,5 +1,5 @@
# Attributes used for formatting.
[attr]our-c-style whitespace=tab-in-indent format.clang-format
[attr]our-c-style whitespace=tab-in-indent format.clang-format=9
*.cxx our-c-style
*.h our-c-style

@ -1,125 +0,0 @@
.slurm_p9_cuda: &slurm_p9_cuda
tags:
- nmc
- slurm
- nmc-xxfe1-sched-001
- xx-fe1
variables:
NMC_FE1_SLURM_PARAMETERS: " -N1 -p ecp-p9-4v100 --extra-node-info=*:*:* -t 1:30:00 "
CC: "gcc"
CXX: "g++"
CUDAHOSTCXX: "g++"
before_script:
# We need gcc-4.8.5, which is the system default compiler but not a compiler
# listed under the module system.
#
# That means to get this to work properly we explicitly do not request
# any compiler.
- module load cuda cmake/3.14.5
.slurm_p9_opemp: &slurm_p9_opemp
tags:
- nmc
- slurm
- nmc-xxfe1-sched-001
- xx-fe1
variables:
NMC_FE1_SLURM_PARAMETERS: " -N1 -p ecp-p9-4v100 --extra-node-info=*:*:* -t 1:30:00 "
before_script:
- module load gcc/8.3.0 openmpi/3.1.4 cmake/3.14.5
.cmake_build_artifacts: &cmake_build_artifacts
artifacts:
expire_in: 24 hours
when: always
paths:
# The artifacts of the build.
- vtkm-build/bin/
- vtkm-build/include/
# CTest files.
# XXX(globbing): Can be simplified with support from
# https://gitlab.com/gitlab-org/gitlab-runner/issues/4840
- vtkm-build/CTestCustom*.cmake
- vtkm-build/CTestTestfile.cmake
- vtkm-build/*/CTestTestfile.cmake
- vtkm-build/*/*/CTestTestfile.cmake
- vtkm-build/*/*/*/CTestTestfile.cmake
- vtkm-build/*/*/*/*/CTestTestfile.cmake
- vtkm-build/*/*/*/*/*/CTestTestfile.cmake
- vtkm-build/Testing/
# CDash files.
- vtkm-build/DartConfiguration.tcl
.cmake_build_p9_cuda: &cmake_build_p9_cuda
stage: build
script:
- srun env | grep SLURM_JOB_NAME
- mkdir vtkm-build
- pushd vtkm-build
- cmake -DCMAKE_BUILD_TYPE=Release -DVTKm_ENABLE_CUDA=ON -S ../
- cmake --build . -j20
- popd
.cmake_build_p9_openmp: &cmake_build_p9_openmp
stage: build
script:
- srun env | grep SLURM_JOB_NAME
- mkdir vtkm-build
- pushd vtkm-build
- cmake -DCMAKE_BUILD_TYPE=Release -DVTKm_ENABLE_OPENMP=ON -S ../
- cmake --build . -j20
- popd
.cmake_test_p9: &cmake_test_p9
stage: test
script:
- echo "running the test using artifacts of the build"
- pushd vtkm-build
# We need to exclude the following tests
# - CopyrightStatement
# - TestInstallSetup
# - SourceInInstall
# Which we can do by using an exclude regex
- ctest -E "Install|CopyrightStatement"
- popd
stages:
- build
- test
build:p9_openmp:
extends:
- .slurm_p9_opemp
- .cmake_build_artifacts
- .cmake_build_p9_openmp
test:p9_openmp:
extends:
- .slurm_p9_opemp
- .cmake_test_p9
dependencies:
- build:p9_openmp
needs:
- build:p9_openmp
build:p9_cuda:
extends:
- .slurm_p9_cuda
- .cmake_build_artifacts
- .cmake_build_p9_cuda
test:p9_cuda:
extends:
- .slurm_p9_cuda
- .cmake_test_p9
dependencies:
- build:p9_cuda
needs:
- build:p9_cuda

@ -49,55 +49,64 @@
GIT_CLONE_PATH: $CI_BUILDS_DIR/gitlab-kitware-sciviz-ci
.centos7: &centos7
image: "kitware/vtkm:ci-centos7_cuda10.2-20200601"
image: "kitware/vtkm:ci-centos7_cuda10.2-20200820"
extends:
- .docker_image
.centos8: &centos8
image: "kitware/vtkm:ci-centos8-20200601"
image: "kitware/vtkm:ci-centos8-20200820"
extends:
- .docker_image
.rhel8: &rhel8
image: "kitware/vtkm:ci-rhel8_cuda10.2-20200601"
image: "kitware/vtkm:ci-rhel8_cuda10.2-20200820"
extends:
- .docker_image
.ubuntu1604: &ubuntu1604
image: "kitware/vtkm:ci-ubuntu1604-20200601"
image: "kitware/vtkm:ci-ubuntu1604-20200820"
extends:
- .docker_image
.ubuntu1604_cuda: &ubuntu1604_cuda
image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200601"
image: "kitware/vtkm:ci-ubuntu1604_cuda9.2-20200820"
extends:
- .docker_image
.ubuntu1804: &ubuntu1804
image: "kitware/vtkm:ci-ubuntu1804-20200601"
image: "kitware/vtkm:ci-ubuntu1804-20200820"
extends:
- .docker_image
.ubuntu1804_cuda: &ubuntu1804_cuda
image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200601"
image: "kitware/vtkm:ci-ubuntu1804_cuda10.1-20200820"
extends:
- .docker_image
.ubuntu1804_cuda_kokkos: &ubuntu1804_cuda_kokkos
image: "kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-20200820"
extends:
- .docker_image
.ubuntu2004_doxygen: &ubuntu2004_doxygen
image: "kitware/vtkm:ci-doxygen-20200601"
image: "kitware/vtkm:ci-doxygen-20200820"
extends:
- .docker_image
.ubuntu2004_kokkos: &ubuntu2004_kokkos
image: "kitware/vtkm:ci-ubuntu2004_kokkos-20200820"
extends:
- .docker_image
.only-default: &only-default
only:
- master
- master@vtk/vtk-m
- tags@vtk/vtk-m
- merge_requests
- tags
.only-master: &only-master
only:
- master
- master@vtk/vtk-m
# General Longer Term Tasks:
@ -178,4 +187,5 @@ include:
- local: '/.gitlab/ci/rhel8.yml'
- local: '/.gitlab/ci/ubuntu1604.yml'
- local: '/.gitlab/ci/ubuntu1804.yml'
- local: '/.gitlab/ci/ubuntu2004.yml'
- local: '/.gitlab/ci/windows10.yml'

@ -7,6 +7,7 @@ build:centos7_gcc48:
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .centos7
@ -20,15 +21,17 @@ build:centos7_gcc48:
test:centos7_gcc48:
tags:
- test
- cuda-rt
- turing
- vtkm
- docker
- linux
- cuda-rt
- turing
extends:
- .centos7
- .cmake_test_linux
- .only-default
variables:
CTEST_EXCLUSIONS: "UnitTestContourTreeUniformAugmentedFilterCUDA|UnitTestContourTreeUniformAugmentedCUDA"
dependencies:
- build:centos7_gcc48
needs:
@ -37,17 +40,17 @@ test:centos7_gcc48:
test:rhel8_test_centos7:
tags:
- test
- cuda-rt
- turing
- vtkm
- docker
- linux
- cuda-rt
- turing
extends:
- .rhel8
- .cmake_test_linux
- .only-default
variables:
CTEST_EXCLUSIONS: "built_against_test_install"
CTEST_EXCLUSIONS: "built_against_test_install|UnitTestContourTreeUniformAugmentedFilterCUDA|UnitTestContourTreeUniformAugmentedCUDA"
dependencies:
- build:centos7_gcc48
needs:

@ -10,10 +10,16 @@
##
##=============================================================================
# Default to Release builds.
if ("$ENV{CMAKE_BUILD_TYPE}" STREQUAL "")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "")
else ()
set(CMAKE_BUILD_TYPE "$ENV{CMAKE_BUILD_TYPE}" CACHE STRING "")
endif ()
string(REPLACE "+" ";" options "$ENV{VTKM_SETTINGS}")
foreach(option IN LISTS options)
if(static STREQUAL option)
set(BUILD_SHARED_LIBS "OFF" CACHE STRING "")
@ -43,6 +49,9 @@ foreach(option IN LISTS options)
elseif(no_rendering STREQUAL option)
set(VTKm_ENABLE_RENDERING "OFF" CACHE STRING "")
elseif(no_virtual STREQUAL option)
set(VTKm_NO_DEPRECATED_VIRTUAL "ON" CACHE STRING "")
elseif(examples STREQUAL option)
set(VTKm_ENABLE_EXAMPLES "ON" CACHE STRING "")
@ -64,6 +73,9 @@ foreach(option IN LISTS options)
elseif(cuda STREQUAL option)
set(VTKm_ENABLE_CUDA "ON" CACHE STRING "")
elseif(kokkos STREQUAL option)
set(VTKm_ENABLE_KOKKOS "ON" CACHE STRING "")
elseif(maxwell STREQUAL option)
set(VTKm_CUDA_Architecture "maxwell" CACHE STRING "")
@ -88,7 +100,10 @@ find_program(SCCACHE_COMMAND NAMES sccache)
if(SCCACHE_COMMAND)
set(CMAKE_C_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
if(VTKm_ENABLE_CUDA)
# Use VTKm_CUDA_Architecture to determine if we need CUDA sccache setup
# since this will also capture when kokkos is being used with CUDA backing
if(DEFINED VTKm_CUDA_Architecture)
set(CMAKE_CUDA_COMPILER_LAUNCHER "${SCCACHE_COMMAND}" CACHE STRING "")
endif()
endif()

@ -1,7 +1,7 @@
FROM nvidia/cuda:10.2-devel-centos7
LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
RUN yum install cmake make gcc gcc-c++ -y
RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 -y
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
RUN yum install git git-lfs -y

@ -1,7 +1,7 @@
FROM nvidia/cuda:10.2-devel-ubi8
LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
RUN yum install make gcc gcc-c++ curl -y
RUN yum install make gcc gcc-c++ curl cuda-compat-10-2 -y
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.rpm.sh | bash
RUN yum install git git-lfs -y

@ -5,6 +5,7 @@ LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
g++ \
clang-8 \
git \
git-lfs \
libmpich-dev \

@ -0,0 +1,47 @@
FROM nvidia/cuda:11.0-devel-ubuntu18.04
LABEL maintainer "Robert Maynard<robert.maynard@kitware.com>"
# Base dependencies for building VTK-m projects
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
g++ \
git \
git-lfs \
ninja-build \
&& \
rm -rf /var/lib/apt/lists/*
# Need to run git-lfs install manually on ubuntu based images when using the
# system packaged version
RUN git-lfs install
# kokkos backend requires cmake 3.18
RUN mkdir /opt/cmake/ && \
curl -L https://github.com/Kitware/CMake/releases/download/v3.18.1/cmake-3.18.1-Linux-x86_64.sh > cmake-3.18.1-Linux-x86_64.sh && \
sh cmake-3.18.1-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
rm cmake-3.18.1-Linux-x86_64.sh && \
ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
ENV PATH "/opt/cmake/bin:${PATH}"
# Build and install Kokkos
RUN mkdir -p /opt/kokkos/build && \
cd /opt/kokkos/build && \
curl -L https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz > kokkos-3.1.01.tar.gz && \
tar -xf kokkos-3.1.01.tar.gz && \
mkdir bld && cd bld && \
CXX=/opt/kokkos/build/kokkos-3.1.01/bin/nvcc_wrapper \
cmake -B . -S ../kokkos-3.1.01 \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=/opt/kokkos \
-DCMAKE_CXX_FLAGS=-fPIC \
-DCMAKE_CXX_STANDARD=14 \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_CONSTEXPR=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_CUDA_LDG_INTRINSIC=ON \
-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
-DKokkos_ENABLE_CUDA_UVM=ON \
-DKokkos_ARCH_TURING75=ON && \
cmake --build . -j 8 && \
cmake --install .

@ -0,0 +1,41 @@
FROM ubuntu:20.04
LABEL maintainer "Sujin Philip<sujin.philip@kitware.com>"
# Base dependencies for building VTK-m projects
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
cmake \
curl \
g++ \
git \
git-lfs \
libmpich-dev \
libomp-dev \
mpich \
ninja-build \
rsync \
ssh \
software-properties-common
# Need to run git-lfs install manually on ubuntu based images when using the
# system packaged version
RUN git-lfs install
# Provide CMake 3.17 so we can re-run tests easily
# This will be used when we run just the tests
RUN mkdir /opt/cmake/ && \
curl -L https://github.com/Kitware/CMake/releases/download/v3.17.3/cmake-3.17.3-Linux-x86_64.sh > cmake-3.17.3-Linux-x86_64.sh && \
sh cmake-3.17.3-Linux-x86_64.sh --prefix=/opt/cmake/ --exclude-subdir --skip-license && \
rm cmake-3.17.3-Linux-x86_64.sh && \
ln -s /opt/cmake/bin/ctest /opt/cmake/bin/ctest-latest
ENV PATH "${PATH}:/opt/cmake/bin"
# Build and install Kokkos
RUN mkdir -p /opt/kokkos/build && \
cd /opt/kokkos/build && \
curl -L https://github.com/kokkos/kokkos/archive/3.1.01.tar.gz > kokkos-3.1.01.tar.gz && \
tar -xf kokkos-3.1.01.tar.gz && \
mkdir bld && cd bld && \
cmake -GNinja -DCMAKE_INSTALL_PREFIX=/opt/kokkos -DCMAKE_CXX_FLAGS=-fPIC -DKokkos_ENABLE_SERIAL=ON ../kokkos-3.1.01 &&\
ninja all && \
ninja install

@ -34,10 +34,18 @@ cd ubuntu1804/cuda10.1
sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda10.1-$date .
cd ../..
cd ubuntu1804/kokkos-cuda
sudo docker build -t kitware/vtkm:ci-ubuntu1804_cuda11_kokkos-$date .
cd ../..
cd ubuntu2004/doxygen/
sudo docker build -t kitware/vtkm:ci-doxygen-$date .
cd ../..
cd ubuntu2004/kokkos
sudo docker build -t kitware/vtkm:ci-ubuntu2004_kokkos-$date .
cd ../..
# sudo docker login --username=<docker_hub_name>
sudo docker push kitware/vtkm
sudo docker system prune

@ -25,10 +25,9 @@ doxygen:
- "cmake -V -P .gitlab/ci/config/gitlab_ci_setup.cmake"
- "ctest -VV -S .gitlab/ci/ctest_configure.cmake"
script:
- eval `ssh-agent -s`
- ssh-add <(echo "$DOC_API_KEY_BASE64" | base64 --decode)
- doxygen build/docs/doxyfile
- rsync -tv --recursive --delete -e "ssh -o StrictHostKeyChecking=no" build/docs/doxygen/html/ vtkm.documentation
- chmod 400 $DOC_KEY_FILE
- rsync -tv --recursive --delete -e "ssh -i $DOC_KEY_FILE -o StrictHostKeyChecking=no" build/docs/doxygen/html/ kitware@public.kitware.com:vtkm_documentation/
variables:
CMAKE_BUILD_TYPE: Release
VTKM_SETTINGS: "tbb+openmp+mpi+shared+docs"

@ -7,6 +7,7 @@ build:ubuntu1604_gcc5:
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1604_cuda
@ -16,41 +17,24 @@ build:ubuntu1604_gcc5:
CC: "gcc-5"
CXX: "g++-5"
CMAKE_BUILD_TYPE: RelWithDebInfo
VTKM_SETTINGS: "cuda+pascal"
VTKM_SETTINGS: "cuda+pascal+no_virtual"
# Temporarily disabled as we don't have a pascal hw gitlab-runner
# test:ubuntu1604_gcc5:
# tags:
# - test
# - cuda-rt
# - pascal
# - vtkm
# - docker
# - linux
# extends:
# - .ubuntu1604_cuda
# - .cmake_test_linux
# - .only-default
# dependencies:
# - build:ubuntu1604_gcc5
# needs:
# - build:ubuntu1604_gcc5
# test:ubuntu1804_test_ubuntu1604_gcc5:
# tags:
# - test
# - cuda-rt
# - pascal
# - vtkm
# - docker
# - linux
# extends:
# - .ubuntu1804_cuda
# - .cmake_test_linux
# - .only-default
# dependencies:
# - build:ubuntu1604_gcc5
# needs:
# - build:ubuntu1604_gcc5
test:ubuntu1604_gcc5:
tags:
- test
- vtkm
- docker
- linux
- cuda-rt
- pascal
extends:
- .ubuntu1604_cuda
- .cmake_test_linux
- .only-default
dependencies:
- build:ubuntu1604_gcc5
needs:
- build:ubuntu1604_gcc5
# Build on ubuntu1704 with OpenMP + CUDA
# Runs only on nightlies
@ -60,6 +44,7 @@ build:ubuntu1604_gcc5_2:
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1604_cuda
@ -71,6 +56,25 @@ build:ubuntu1604_gcc5_2:
CMAKE_BUILD_TYPE: Release
VTKM_SETTINGS: "openmp+cuda+pascal+examples"
test:ubuntu1804_test_ubuntu1604_gcc5_2:
tags:
- test
- vtkm
- docker
- linux
- cuda-rt
- pascal
extends:
- .ubuntu1804_cuda
- .cmake_test_linux
- .only-master
variables:
CTEST_EXCLUSIONS: "built_against_test_install"
dependencies:
- build:ubuntu1604_gcc5_2
needs:
- build:ubuntu1604_gcc5_2
# Build on ubuntu1604 with mpi + tbb and test on ubuntu1604
# Uses gcc 4.8
# Uses OpenMPI

@ -46,6 +46,7 @@ build:ubuntu1804_gcc7:
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1804_cuda
@ -54,16 +55,16 @@ build:ubuntu1804_gcc7:
variables:
CC: "gcc-7"
CXX: "g++-7"
VTKM_SETTINGS: "cuda+turing+mpi+64bit_floats"
VTKM_SETTINGS: "cuda+turing+mpi+64bit_floats+no_virtual"
test:ubuntu1804_gcc7:
tags:
- test
- cuda-rt
- turing
- vtkm
- docker
- linux
- cuda-rt
- turing
extends:
- .ubuntu1804_cuda
- .cmake_test_linux
@ -74,42 +75,45 @@ test:ubuntu1804_gcc7:
- build:ubuntu1804_gcc7
# Build on ubuntu1804 with OpenMP and test on ubuntu1804
# Uses gcc 7.4
# Build on ubuntu1804 with CUDA+TBB and test on ubuntu1804
# Uses clang as CUDA host compiler
# Runs only on nightlies
build:ubuntu1804_gcc7_2:
build:ubuntu1804_clang_cuda:
tags:
- build
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1804
- .ubuntu1804_cuda
- .cmake_build_linux
- .only-master
- .only-default
# - .only-master
variables:
CC: "gcc-7"
CXX: "g++-7"
VTKM_SETTINGS: "openmp+shared+examples"
CC: "clang-8"
CXX: "clang++-8"
CUDAHOSTCXX: "clang++-8"
VTKM_SETTINGS: "cuda+pascal+tbb+static+examples"
test:ubuntu1804_gcc7_2:
test:ubuntu1804_clang_cuda:
tags:
- test
- vtkm
- docker
- linux
- cuda-rt
- pascal
extends:
- .ubuntu1804
- .ubuntu1804_cuda
- .cmake_test_linux
- .only-master
variables:
#Restrict OpenMP number of threads since multiple test stages
#execute on the same hardware concurrently
OMP_NUM_THREADS: 4
- .only-default
# - .only-master
dependencies:
- build:ubuntu1804_gcc7_2
- build:ubuntu1804_clang_cuda
needs:
- build:ubuntu1804_gcc7_2
- build:ubuntu1804_clang_cuda
# Build on ubuntu1804 with OpenMP and test on ubuntu1804
# Uses gcc 6.5
@ -179,3 +183,41 @@ test:ubuntu1804_clang8:
- build:ubuntu1804_clang8
needs:
- build:ubuntu1804_clang8
# Build on ubuntu1804 with kokkos and test on ubuntu1804
# Uses CUDA 11
build:ubuntu1804_kokkos:
tags:
- build
- vtkm
- docker
- linux
- cuda-rt
- large-memory
extends:
- .ubuntu1804_cuda_kokkos
- .cmake_build_linux
- .only-default
variables:
CMAKE_GENERATOR: "Ninja"
CMAKE_BUILD_TYPE: Release
VTKM_SETTINGS: "kokkos+turing+static+64bit_floats"
test:ubuntu1804_kokkos:
tags:
- test
- vtkm
- docker
- linux
- cuda-rt
- turing
extends:
- .ubuntu1804_cuda_kokkos
- .cmake_test_linux
- .only-default
dependencies:
- build:ubuntu1804_kokkos
needs:
- build:ubuntu1804_kokkos
variables:
CUDA_LAUNCH_BLOCKING: "1"

28
.gitlab/ci/ubuntu2004.yml Normal file

@ -0,0 +1,28 @@
build:ubuntu2004_kokkos:
tags:
- build
- vtkm
- docker
- linux
extends:
- .ubuntu2004_kokkos
- .cmake_build_linux
- .only-default
variables:
CMAKE_BUILD_TYPE: RelWithDebInfo
VTKM_SETTINGS: "kokkos+shared+64bit_floats"
test:ubuntu2004_kokkos:
tags:
- test
- vtkm
- docker
- linux
extends:
- .ubuntu2004_kokkos
- .cmake_test_linux
- .only-default
dependencies:
- build:ubuntu2004_kokkos
needs:
- build:ubuntu2004_kokkos

@ -0,0 +1,23 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
file(GLOB cmake_version_backports
LIST_DIRECTORIES true
RELATIVE "${CMAKE_CURRENT_LIST_DIR}/patches"
"${CMAKE_CURRENT_LIST_DIR}/patches/*")
foreach (cmake_version_backport IN LISTS cmake_version_backports)
if (NOT IS_DIRECTORY "${CMAKE_CURRENT_LIST_DIR}/patches/${cmake_version_backport}")
continue ()
endif ()
if (CMAKE_VERSION VERSION_LESS "${cmake_version_backport}")
list(INSERT CMAKE_MODULE_PATH 0 "${CMAKE_CURRENT_LIST_DIR}/patches/${cmake_version_backport}")
endif ()
endforeach ()

@ -22,6 +22,8 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set(VTKM_COMPILER_IS_CLANG 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set(VTKM_COMPILER_IS_GNU 1)
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "XLClang")
set(VTKM_COMPILER_IS_XL 1)
endif()
#-----------------------------------------------------------------------------
@ -51,7 +53,7 @@ if(VTKM_COMPILER_IS_MSVC)
if(TARGET vtkm::cuda)
target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler="/Gy">)
endif()
elseif(NOT VTKM_COMPILER_IS_PGI) #can't find an equivalant PGI flag
elseif(NOT (VTKM_COMPILER_IS_PGI OR VTKM_COMPILER_IS_XL)) #can't find an equivalant PGI/XL flag
target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-ffunction-sections>)
if(TARGET vtkm::cuda)
target_compile_options(vtkm_compiler_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-ffunction-sections>)
@ -122,8 +124,8 @@ elseif(VTKM_COMPILER_IS_ICC)
target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CXX>:-wd1478 -wd13379>)
elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common)
set(cuda_flags -Xcompiler=-Wall,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-local-typedefs,-Wno-unused-function,-Wcast-align,-Wchar-subscripts,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-Wunused,-fno-common)
set(cxx_flags -Wall -Wcast-align -Wchar-subscripts -Wextra -Wpointer-arith -Wformat -Wformat-security -Wshadow -Wunused -fno-common -Wno-unused-function)
set(cuda_flags -Xcompiler=-Wall,-Wcast-align,-Wchar-subscripts,-Wpointer-arith,-Wformat,-Wformat-security,-Wshadow,-fno-common,-Wunused,-Wno-unknown-pragmas,-Wno-unused-local-typedefs,-Wno-unused-function)
#Only add float-conversion warnings for gcc as the integer warnigns in GCC
#include the implicit casting of all types smaller than int to ints.
@ -161,17 +163,21 @@ elseif(VTKM_COMPILER_IS_GNU OR VTKM_COMPILER_IS_CLANG)
endif()
endif()
#common warnings for all platforms when building cuda
if(TARGET vtkm::cuda)
function(setup_cuda_flags)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA")
#nvcc 9 introduced specific controls to disable the stack size warning
#otherwise we let the warning occur. We have to set this in CMAKE_CUDA_FLAGS
#as it is passed to the device link step, unlike compile_options
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink=--suppress-stack-size-warning")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xnvlink=--suppress-stack-size-warning" PARENT_SCOPE)
endif()
set(display_error_nums -Xcudafe=--display_error_number)
target_compile_options(vtkm_developer_flags INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:${display_error_nums}>)
endfunction()
#common warnings for all platforms when building cuda
if ((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
setup_cuda_flags()
endif()
if(NOT VTKm_INSTALL_ONLY_LIBRARIES)

@ -39,6 +39,7 @@
# VTKm_ENABLE_CUDA Will be enabled if VTK-m was built with CUDA support
# VTKm_ENABLE_TBB Will be enabled if VTK-m was built with TBB support
# VTKm_ENABLE_OPENMP Will be enabled if VTK-m was built with OpenMP support
# VTKm_ENABLE_KOKKOS Will be enabled if VTK-m was built with Kokkos support
# VTKm_ENABLE_LOGGING Will be enabled if VTK-m was built with logging support
# VTKm_ENABLE_MPI Will be enabled if VTK-m was built with MPI support
# VTKm_ENABLE_RENDERING Will be enabled if VTK-m was built with rendering support
@ -67,8 +68,9 @@ set(VTKm_VERSION "@VTKm_VERSION@")
set(VTKm_BUILD_SHARED_LIBS "@VTKm_BUILD_SHARED_LIBS@")
set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@")
set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
set(VTKm_ENABLE_KOKKOS "@VTKm_ENABLE_KOKKOS@")
set(VTKm_ENABLE_OPENMP "@VTKm_ENABLE_OPENMP@")
set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
set(VTKm_ENABLE_LOGGING "@VTKm_ENABLE_LOGGING@")
set(VTKm_ENABLE_RENDERING "@VTKm_ENABLE_RENDERING@")
set(VTKm_ENABLE_GL_CONTEXT "@VTKm_ENABLE_GL_CONTEXT@")
@ -101,6 +103,12 @@ endif()
if(VTKm_ENABLE_CUDA AND VTKM_FROM_INSTALL_DIR)
set_target_properties(vtkm::cuda PROPERTIES cuda_architecture_flags "@VTKm_CUDA_Architecture_Flags@")
set_target_properties(vtkm::cuda PROPERTIES requires_static_builds TRUE)
# If VTK-m is built with 3.18+ and the consumer is < 3.18 we need to drop
# these properties as they break the VTK-m cuda flag logic
if(CMAKE_VERSION VERSION_LESS 3.18)
set_target_properties(vtkm::cuda PROPERTIES INTERFACE_LINK_OPTIONS "")
endif()
endif()
# VTKm requires some CMake Find modules not included with CMake, so

@ -127,10 +127,13 @@ if(VTKm_ENABLE_CUDA)
requires_static_builds TRUE
)
target_compile_options(vtkm_cuda INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
set_target_properties(vtkm_cuda PROPERTIES
INTERFACE_COMPILE_OPTIONS $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND
CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11.0)
# CUDA 11+ deprecated C++11 support
target_compile_features(vtkm_cuda INTERFACE cxx_std_14)
endif()
# add the -gencode flags so that all cuda code
# way compiled properly
@ -241,13 +244,103 @@ if(VTKm_ENABLE_CUDA)
endif()
string(REPLACE ";" " " arch_flags "${arch_flags}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
#We propagate cuda flags via target* options so that they
#export cleanly
set(CMAKE_CUDA_ARCHITECTURES OFF)
target_compile_options(vtkm_cuda INTERFACE $<$<COMPILE_LANGUAGE:CUDA>:${arch_flags}>)
target_link_options(vtkm_cuda INTERFACE $<DEVICE_LINK:${arch_flags}>)
else()
# Before 3.18 we had to use CMAKE_CUDA_FLAGS as we had no way
# to propagate flags to the device link step
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${arch_flags}")
endif()
# This needs to be lower-case for the property to be properly exported
# CMake 3.15 we can add `cuda_architecture_flags` to the EXPORT_PROPERTIES
# target property to have this automatically exported for us
set_target_properties(vtkm_cuda PROPERTIES cuda_architecture_flags "${arch_flags}")
set(VTKm_CUDA_Architecture_Flags "${arch_flags}")
set_target_properties(vtkm_cuda PROPERTIES cuda_architecture_flags "${arch_flags}")
unset(arch_flags)
endif()
endif()
#-----------------------------------------------------------------------------
# Kokkos with its Cuda backend enabled, expects everything to be compiled using its
# `nvcc-wrapper` as the CXX compiler. As the name suggests, nvcc-wrapper is a wrapper around
# Cuda's nvcc compiler. Kokkos targets have all of the flags meant for the nvcc compiler set as the
# CXX compiler flags. This function changes all such flags to be CUDA flags so that we can use
# CMake and vtk-m's existing infrastructure to compile for Cuda and Host separately. Without this
# all of the files will be compiled using nvcc which can be very time consuming. It can also have
# issues with calling host functions from device functions when compiling code for other backends.
function(kokkos_fix_compile_options)
set(targets Kokkos::kokkos)
set(seen_targets)
set(cuda_arch)
while(targets)
list(GET targets 0 target_name)
list(REMOVE_AT targets 0)
get_target_property(link_libraries ${target_name} INTERFACE_LINK_LIBRARIES)
foreach(lib_target IN LISTS link_libraries)
if (TARGET ${lib_target})
if (lib_target IN_LIST seen_targets)
continue()
endif()
list(APPEND seen_targets ${lib_target})
list(APPEND targets ${lib_target})
get_target_property(compile_options ${lib_target} INTERFACE_COMPILE_OPTIONS)
if (compile_options)
string(REGEX MATCH "[$]<[$]<COMPILE_LANGUAGE:CXX>:-Xcompiler;.*>" cxx_compile_options "${compile_options}")
string(REGEX MATCH "-arch=sm_[0-9][0-9]" cuda_arch "${compile_options}")
string(REPLACE "-Xcompiler;" "" cxx_compile_options "${cxx_compile_options}")
list(TRANSFORM compile_options REPLACE "--relocatable-device-code=true" "") #We use CMake for this flag
list(TRANSFORM compile_options REPLACE "COMPILE_LANGUAGE:CXX" "COMPILE_LANGUAGE:CUDA")
list(APPEND compile_options "${cxx_compile_options}")
set_property(TARGET ${lib_target} PROPERTY INTERFACE_COMPILE_OPTIONS ${compile_options})
endif()
set_property(TARGET ${lib_target} PROPERTY INTERFACE_LINK_OPTIONS "")
endif()
endforeach()
endwhile()
set_property(TARGET vtkm::kokkos PROPERTY INTERFACE_LINK_OPTIONS "$<DEVICE_LINK:${cuda_arch}>")
if (OPENMP IN_LIST Kokkos_DEVICES)
set_property(TARGET vtkm::kokkos PROPERTY INTERFACE_LINK_OPTIONS "$<HOST_LINK:-fopenmp>")
endif()
endfunction()
if(VTKm_ENABLE_KOKKOS AND NOT TARGET vtkm::kokkos)
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
find_package(Kokkos REQUIRED)
if (CUDA IN_LIST Kokkos_DEVICES)
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
enable_language(CUDA)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND
CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "10.0" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS "11.0" AND
CMAKE_BUILD_TYPE STREQUAL "Release")
message(WARNING "There is a known issue with Cuda 10 and -O3 optimization. Switching to -O2. Please refer to issue #555.")
string(REPLACE "-O3" "-O2" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
string(REPLACE "-O3" "-O2" CMAKE_CUDA_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
endif()
string(REGEX MATCH "[0-9][0-9]$" cuda_arch ${Kokkos_ARCH})
set(CMAKE_CUDA_ARCHITECTURES ${cuda_arch})
message(STATUS "Detected Cuda arch from Kokkos: ${cuda_arch}")
add_library(vtkm::kokkos_cuda INTERFACE IMPORTED GLOBAL)
endif()
add_library(vtkm::kokkos INTERFACE IMPORTED GLOBAL)
set_target_properties(vtkm::kokkos PROPERTIES INTERFACE_LINK_LIBRARIES "Kokkos::kokkos")
if (TARGET vtkm::kokkos_cuda)
kokkos_fix_compile_options()
endif()
endif()

@ -1,24 +0,0 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
if(CMAKE_VERSION VERSION_LESS 3.15)
#While CMake 3.10 introduced the new MPI module.
#Fixes related to MPI+CUDA that VTK-m needs are
#only found in CMake 3.15+.
find_package(MPI REQUIRED MODULE)
else()
#clunky but we need to make sure we use the upstream module if it exists
set(orig_CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH "")
find_package(MPI REQUIRED MODULE)
set(CMAKE_MODULE_PATH ${orig_CMAKE_MODULE_PATH})
endif()
endif()

@ -10,9 +10,13 @@
include(CMakeParseArguments)
include(VTKmCMakeBackports)
include(VTKmDeviceAdapters)
include(VTKmCPUVectorization)
include(VTKmMPI)
if(VTKm_ENABLE_MPI AND NOT TARGET MPI::MPI_CXX)
find_package(MPI REQUIRED MODULE)
endif()
#-----------------------------------------------------------------------------
# INTERNAL FUNCTIONS
@ -62,7 +66,7 @@ function(vtkm_generate_export_header lib_name)
# Now generate a header that holds the macros needed to easily export
# template classes. This
string(TOUPPER ${kit_name} BASE_NAME_UPPER)
string(TOUPPER ${lib_name} BASE_NAME_UPPER)
set(EXPORT_MACRO_NAME "${BASE_NAME_UPPER}")
set(EXPORT_IS_BUILT_STATIC 0)
@ -77,17 +81,17 @@ function(vtkm_generate_export_header lib_name)
if(NOT EXPORT_IMPORT_CONDITION)
#set EXPORT_IMPORT_CONDITION to what the DEFINE_SYMBOL would be when
#building shared
set(EXPORT_IMPORT_CONDITION ${kit_name}_EXPORTS)
set(EXPORT_IMPORT_CONDITION ${lib_name}_EXPORTS)
endif()
configure_file(
${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
${VTKm_BINARY_DIR}/include/${dir_prefix}/${kit_name}_export.h
${VTKm_BINARY_DIR}/include/${dir_prefix}/${lib_name}_export.h
@ONLY)
if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
install(FILES ${VTKm_BINARY_DIR}/include/${dir_prefix}/${kit_name}_export.h
install(FILES ${VTKm_BINARY_DIR}/include/${dir_prefix}/${lib_name}_export.h
DESTINATION ${VTKm_INSTALL_INCLUDE_DIR}/${dir_prefix}
)
endif()
@ -146,9 +150,14 @@ endfunction()
# Pass to consumers extra compile flags they need to add to CMAKE_CUDA_FLAGS
# to have CUDA compatibility.
#
# This is required as currently the -sm/-gencode flags when specified inside
# COMPILE_OPTIONS / target_compile_options are not propagated to the device
# linker. Instead they must be specified in CMAKE_CUDA_FLAGS
# If VTK-m was built with CMake 3.18+ and you are using CMake 3.18+ and have
# a cmake_minimum_required of 3.18 or have set policy CMP0105 to new, this will
# return an empty string as the `vtkm::cuda` target will correctly propagate
# all the necessary flags.
#
# This is required for CMake < 3.18 as they don't support the `$<DEVICE_LINK>`
# generator expression for `target_link_options`. Instead they need to be
# specified in CMAKE_CUDA_FLAGS
#
#
# add_library(lib_that_uses_vtkm ...)
@ -156,7 +165,18 @@ endfunction()
# target_link_libraries(lib_that_uses_vtkm PRIVATE vtkm_filter)
#
function(vtkm_get_cuda_flags settings_var)
if(TARGET vtkm::cuda)
if(POLICY CMP0105)
cmake_policy(GET CMP0105 does_device_link)
get_property(arch_flags
TARGET vtkm::cuda
PROPERTY INTERFACE_LINK_OPTIONS)
if(arch_flags AND CMP0105 STREQUAL "NEW")
return()
endif()
endif()
get_property(arch_flags
TARGET vtkm::cuda
PROPERTY cuda_architecture_flags)
@ -232,8 +252,14 @@ endfunction()
#
#
# MODIFY_CUDA_FLAGS: If enabled will add the required -arch=<ver> flags
# that VTK-m was compiled with. If you have multiple libraries that use
# VTK-m calling `vtkm_add_target_information` multiple times with
# that VTK-m was compiled with.
#
# If VTK-m was built with CMake 3.18+ and you are using CMake 3.18+ and have
# a cmake_minimum_required of 3.18 or have set policy CMP0105 to new, this will
# return an empty string as the `vtkm::cuda` target will correctly propagate
# all the necessary flags.
#
# Note: calling `vtkm_add_target_information` multiple times with
# `MODIFY_CUDA_FLAGS` will cause duplicate compiler flags. To resolve this issue
# you can; pass all targets and sources to a single `vtkm_add_target_information`
# call, have the first one use `MODIFY_CUDA_FLAGS`, or use the provided
@ -275,10 +301,11 @@ function(vtkm_add_target_information uses_vtkm_target)
${ARGN}
)
if(VTKm_TI_MODIFY_CUDA_FLAGS)
vtkm_get_cuda_flags(CMAKE_CUDA_FLAGS)
set(CMAKE_CUDA_FLAGS ${CMAKE_CUDA_FLAGS} PARENT_SCOPE)
vtkm_get_cuda_flags(cuda_flags)
if(cuda_flags)
set(CMAKE_CUDA_FLAGS ${cuda_flags} PARENT_SCOPE)
endif()
endif()
set(targets ${uses_vtkm_target})
@ -291,6 +318,8 @@ function(vtkm_add_target_information uses_vtkm_target)
# set the required target properties
set_target_properties(${targets} PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(${targets} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
# CUDA_ARCHITECTURES added in CMake 3.18
set_target_properties(${targets} PROPERTIES CUDA_ARCHITECTURES OFF)
if(VTKm_TI_DROP_UNUSED_SYMBOLS)
foreach(target IN LISTS targets)
@ -305,11 +334,16 @@ function(vtkm_add_target_information uses_vtkm_target)
#
# This is required as CUDA currently doesn't support device side calls across
# dynamic library boundaries.
if(TARGET vtkm::cuda)
if((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda))
set_source_files_properties(${VTKm_TI_DEVICE_SOURCES} PROPERTIES LANGUAGE "CUDA")
foreach(target IN LISTS targets)
get_target_property(lib_type ${target} TYPE)
get_target_property(requires_static vtkm::cuda requires_static_builds)
if (TARGET vtkm::cuda)
get_target_property(requires_static vtkm::cuda requires_static_builds)
endif()
if (TARGET vtkm::kokkos)
get_target_property(requires_static vtkm::kokkos requires_static_builds)
endif()
if(requires_static AND ${lib_type} STREQUAL "SHARED_LIBRARY" AND VTKm_TI_EXTENDS_VTKM)
#We provide different error messages based on if we are building VTK-m

7
CMake/patches/README.md Normal file

@ -0,0 +1,7 @@
# CMake backports
This directory contains backports from newer CMake versions to help support
actually using older CMake versions for building VTK-m. The directory name is the
minimum version of CMake for which the contained files are no longer necessary.
For example, the files under the `3.15` directory are not needed for 3.15 or
3.16, but are for 3.14.

@ -37,11 +37,19 @@ if(NOT GENERATED_FILE)
return()
endif()
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
RESULT_VARIABLE pyexpander_result
OUTPUT_VARIABLE pyexpander_output
if(MSVC)
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
RESULT_VARIABLE pyexpander_result
OUTPUT_VARIABLE pyexpander_output
)
else()
execute_process(
COMMAND ${PYEXPANDER_COMMAND} ${SOURCE_FILE}.in
RESULT_VARIABLE pyexpander_result
OUTPUT_VARIABLE pyexpander_output
)
endif()
if(pyexpander_result)
# If pyexpander returned non-zero, it failed.

@ -110,8 +110,15 @@ function(do_verify root_dir prefix)
)
set(file_exceptions
cont/ColorTablePrivate.hxx
thirdparty/diy/vtkmdiy/cmake/mpi_types.h
# Ignore deprecated virtual classes (which are not installed if VTKm_NO_DEPRECATED_VIRTUAL
# is on). These exceptions can be removed when these files are completely removed.
cont/ArrayHandleVirtual.h
cont/ArrayHandleVirtual.hxx
cont/ArrayHandleVirtualCoordinates.h
cont/StorageVirtual.h
cont/StorageVirtual.hxx
)
#by default every header in a testing directory doesn't need to be installed

@ -110,6 +110,10 @@ function(vtkm_test_against_install dir)
)
endif()
if(TARGET vtkm::kokkos)
list(APPEND args "-DKokkos_DIR=${Kokkos_DIR}")
endif()
#determine if the test is expected to compile or fail to build. We use
#this information to built the test name to make it clear to the user
#what a 'passing' test means

@ -27,7 +27,6 @@ function(vtkm_create_test_executable
# for MPI tests, suffix test name and add MPI_Init/MPI_Finalize calls.
if (is_mpi_test)
set(extraArgs EXTRA_INCLUDE "vtkm/thirdparty/diy/environment.h")
set(CMAKE_TESTDRIVER_BEFORE_TESTMAIN "vtkmdiy::mpi::environment env(ac, av);")
if (use_mpi)
vtkm_diy_use_mpi(ON)
@ -50,7 +49,7 @@ function(vtkm_create_test_executable
#if all backends are enabled, we can use cuda compiler to handle all possible backends.
set(device_sources)
if(TARGET vtkm::cuda AND enable_all_backends)
if(((TARGET vtkm::cuda) OR (TARGET vtkm::kokkos_cuda)) AND enable_all_backends)
set(device_sources ${sources})
endif()
vtkm_add_target_information(${prog} DEVICE_SOURCES ${device_sources})
@ -153,6 +152,13 @@ function(vtkm_unit_tests)
#serially
list(APPEND per_device_serial TRUE)
endif()
if (VTKm_ENABLE_KOKKOS)
list(APPEND per_device_command_line_arguments --device=kokkos)
list(APPEND per_device_suffix "KOKKOS")
#may require more time because of kernel generation.
list(APPEND per_device_timeout 1500)
list(APPEND per_device_serial FALSE)
endif()
endif()
set(test_prog)

@ -8,16 +8,10 @@
## PURPOSE. See the above copyright notice for more information.
##============================================================================
# If you want CUDA support, you will need to have CMake 3.9 on Linux/OSX.
# We require CMake 3.11 with the MSVC generator as the $<COMPILE_LANGUAGE:>
# generator expression is not supported on older versions.
# If you want CUDA support, you will need to have CMake 3.13 on Linux/OSX.
cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
project (VTKm)
if(${CMAKE_GENERATOR} MATCHES "Visual Studio")
cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
endif()
# Update module path
set(VTKm_CMAKE_MODULE_PATH ${VTKm_SOURCE_DIR}/CMake)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${VTKm_CMAKE_MODULE_PATH})
@ -79,8 +73,9 @@ endmacro ()
# Configurable Options
vtkm_option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
vtkm_option(VTKm_ENABLE_KOKKOS "Enable Kokkos support" OFF)
vtkm_option(VTKm_ENABLE_OPENMP "Enable OpenMP support" OFF)
vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
vtkm_option(VTKm_ENABLE_RENDERING "Enable rendering library" ON)
vtkm_option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
vtkm_option(VTKm_ENABLE_MPI "Enable MPI support" OFF)
@ -108,6 +103,17 @@ vtkm_option(VTKm_ENABLE_LOGGING "Enable VTKm Logging" ON)
# performance.
vtkm_option(VTKm_NO_ASSERT "Disable assertions in debugging builds." OFF)
# The CUDA compiler (as of CUDA 11) takes a surprising long time to compile
# kernels with assert in them. By default we turn off asserts when compiling
# for CUDA devices.
vtkm_option(VTKm_NO_ASSERT_CUDA "Disable assertions for CUDA devices." ON)
# The HIP compiler (as of ROCm 3.7) takes a surprising long time to compile
# kernels with assert in them they generate `printf` calls which are very
# slow ( cause massive register spillage). By default we turn off asserts when
# compiling for HIP devices.
vtkm_option(VTKm_NO_ASSERT_HIP "Disable assertions for HIP devices." ON)
# When VTK-m is embedded into larger projects that wish to make end user
# applications they want to only install libraries and don't want CMake/headers
# installed.
@ -132,13 +138,22 @@ vtkm_option(VTKm_ENABLE_DEVELOPER_FLAGS "Enable compiler flags that are useful w
# Some application might need not to install those, hence this option.
vtkm_option(VTKm_NO_INSTALL_README_LICENSE "disable the installation of README and LICENSE files" OFF)
# We are in the process of deprecating the use of virtual methods because they
# are not well supported on many accelerators. Turn this option on to remove
# the code entirely. Note that the deprecation of virtual methods is work in
# progress, so not all use of virtual methods may be done. In VTK-m 2.0
# virtual methods should be removed entirely and this option will be removed.
vtkm_option(VTKm_NO_DEPRECATED_VIRTUAL "Do not compile support of deprecated virtual methods" OFF)
mark_as_advanced(
VTKm_ENABLE_LOGGING
VTKm_NO_ASSERT
VTKm_NO_ASSERT_CUDA
VTKm_INSTALL_ONLY_LIBRARIES
VTKm_HIDE_PRIVATE_SYMBOLS
VTKm_ENABLE_DEVELOPER_FLAGS
VTKm_NO_INSTALL_README_LICENSE
VTKm_NO_DEPRECATED_VIRTUAL
)
#-----------------------------------------------------------------------------
@ -266,8 +281,9 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
# Install helper configure files.
install(
FILES
${VTKm_SOURCE_DIR}/CMake/VTKmCMakeBackports.cmake
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
${VTKm_SOURCE_DIR}/CMake/FindMPI.cmake
${VTKm_SOURCE_DIR}/CMake/patches/3.15/FindMPI.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
)
@ -279,7 +295,6 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
${VTKm_SOURCE_DIR}/CMake/VTKmDeviceAdapters.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmDIYUtils.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmExportHeaderTemplate.h.in
${VTKm_SOURCE_DIR}/CMake/VTKmMPI.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmRenderingContexts.cmake
${VTKm_SOURCE_DIR}/CMake/VTKmWrappers.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
@ -313,7 +328,7 @@ endif ()
#-----------------------------------------------------------------------------
#add the benchmarking folder
if(VTKm_ENABLE_BENCHMARKS)
add_subdirectory(benchmarking)
add_subdirectory(benchmarking)
endif()
#-----------------------------------------------------------------------------

@ -55,7 +55,7 @@ list(APPEND CTEST_CUSTOM_WARNING_EXCEPTION
"nvlink warning : .*ArrayPortalVirtual.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorBoundingIntervalHierarchyExec.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorRectilinearGrid.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorUniformBins.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorTwoLevel.* has address taken but no possible call to it"
"nvlink warning : .*CellLocatorUniformGrid.* has address taken but no possible call to it"
)

@ -1,4 +1,4 @@
#!/bin/env python3
#!/usr/bin/env python3
#=============================================================================
#

@ -77,6 +77,14 @@ struct ReadWriteValues : vtkm::worklet::WorkletMapField
}
};
// Takes a vector of data and creates a fresh ArrayHandle with memory just allocated
// in the control environment.
template <typename T>
vtkm::cont::ArrayHandle<T> CreateFreshArrayHandle(const std::vector<T>& vec)
{
return vtkm::cont::make_ArrayHandleMove(std::vector<T>(vec));
}
//------------- Benchmark functors -------------------------------------------
// Copies NumValues from control environment to execution environment and
@ -97,14 +105,18 @@ void BenchContToExecRead(benchmark::State& state)
state.SetLabel(desc.str());
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
ArrayType array = vtkm::cont::make_ArrayHandle(vec);
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadValues{}, array);
timer.Stop();
@ -181,19 +193,26 @@ void BenchContToExecReadWrite(benchmark::State& state)
state.SetLabel(desc.str());
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
ArrayType array = vtkm::cont::make_ArrayHandle(vec);
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadWriteValues{}, array);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
// Remove data from execution environment so it has to be transferred again.
array.ReleaseResourcesExecution();
}
const int64_t iterations = static_cast<int64_t>(state.iterations());
@ -223,21 +242,23 @@ void BenchRoundTripRead(benchmark::State& state)
state.SetLabel(desc.str());
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
ArrayType array = vtkm::cont::make_ArrayHandle(vec);
std::vector<ValueType> vec(static_cast<std::size_t>(numValues), 2);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Ensure data is in control before we start:
array.ReleaseResourcesExecution();
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
invoker(ReadValues{}, array);
// Copy back to host and read:
// (Note, this probably does not copy. The array exists in both control and execution for read.)
auto portal = array.ReadPortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
@ -277,21 +298,23 @@ void BenchRoundTripReadWrite(benchmark::State& state)
}
std::vector<ValueType> vec(static_cast<std::size_t>(numValues));
ArrayType array = vtkm::cont::make_ArrayHandle(vec);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
// Ensure data is in control before we start:
array.ReleaseResourcesExecution();
// Make a fresh array each iteration to force a copy from control to execution each time.
// (Prevents unified memory devices from caching data.)
ArrayType array = CreateFreshArrayHandle(vec);
timer.Start();
// Do work on device:
invoker(ReadWriteValues{}, array);
// Copy back to host and read/write:
auto portal = array.WritePortal();
for (vtkm::Id i = 0; i < numValues; ++i)
{
@ -330,14 +353,14 @@ void BenchExecToContRead(benchmark::State& state)
state.SetLabel(desc.str());
}
ArrayType array;
array.Allocate(numValues);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
// Time the copy:
timer.Start();
@ -383,14 +406,14 @@ void BenchExecToContWrite(benchmark::State& state)
state.SetLabel(desc.str());
}
ArrayType array;
array.Allocate(numValues);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
timer.Start();
// Allocate/write data on device
@ -435,14 +458,14 @@ void BenchExecToContReadWrite(benchmark::State& state)
state.SetLabel(desc.str());
}
ArrayType array;
array.Allocate(numValues);
vtkm::cont::Invoker invoker{ device };
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
ArrayType array;
array.Allocate(numValues);
timer.Start();
// Allocate/write data on device

@ -13,13 +13,16 @@
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleMultiplexer.h>
#include <vtkm/cont/ArrayHandleVirtual.h>
#include <vtkm/cont/CellSetStructured.h>
#include <vtkm/cont/ImplicitFunctionHandle.h>
#include <vtkm/cont/Initialize.h>
#include <vtkm/cont/Invoker.h>
#include <vtkm/cont/Timer.h>
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
#include <vtkm/cont/ArrayHandleVirtual.h>
#endif
#include <vtkm/worklet/WorkletMapField.h>
#include <vtkm/worklet/WorkletMapTopology.h>
@ -433,15 +436,19 @@ void BenchBlackScholesStatic(::benchmark::State& state)
};
VTKM_BENCHMARK_TEMPLATES(BenchBlackScholesStatic, ValueTypes);
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchBlackScholesDynamic(::benchmark::State& state)
{
VTKM_DEPRECATED_SUPPRESS_BEGIN
BenchBlackScholesImpl<ValueType> impl{ state };
impl.Run(vtkm::cont::make_ArrayHandleVirtual(impl.StockPrice),
vtkm::cont::make_ArrayHandleVirtual(impl.OptionStrike),
vtkm::cont::make_ArrayHandleVirtual(impl.OptionYears));
VTKM_DEPRECATED_SUPPRESS_END
};
VTKM_BENCHMARK_TEMPLATES(BenchBlackScholesDynamic, ValueTypes);
#endif //VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchBlackScholesMultiplexer0(::benchmark::State& state)
@ -537,15 +544,19 @@ void BenchMathStatic(::benchmark::State& state)
};
VTKM_BENCHMARK_TEMPLATES(BenchMathStatic, ValueTypes);
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchMathDynamic(::benchmark::State& state)
{
VTKM_DEPRECATED_SUPPRESS_BEGIN
BenchMathImpl<ValueType> impl{ state };
impl.Run(vtkm::cont::make_ArrayHandleVirtual(impl.InputHandle),
vtkm::cont::make_ArrayHandleVirtual(impl.TempHandle1),
vtkm::cont::make_ArrayHandleVirtual(impl.TempHandle2));
VTKM_DEPRECATED_SUPPRESS_END
};
VTKM_BENCHMARK_TEMPLATES(BenchMathDynamic, ValueTypes);
#endif //VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchMathMultiplexer0(::benchmark::State& state)
@ -636,13 +647,17 @@ void BenchFusedMathStatic(::benchmark::State& state)
};
VTKM_BENCHMARK_TEMPLATES(BenchFusedMathStatic, ValueTypes);
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchFusedMathDynamic(::benchmark::State& state)
{
VTKM_DEPRECATED_SUPPRESS_BEGIN
BenchFusedMathImpl<ValueType> impl{ state };
impl.Run(vtkm::cont::make_ArrayHandleVirtual(impl.InputHandle));
VTKM_DEPRECATED_SUPPRESS_END
};
VTKM_BENCHMARK_TEMPLATES(BenchFusedMathDynamic, ValueTypes);
#endif //VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchFusedMathMultiplexer0(::benchmark::State& state)
@ -756,15 +771,19 @@ void BenchEdgeInterpStatic(::benchmark::State& state)
};
VTKM_BENCHMARK_TEMPLATES(BenchEdgeInterpStatic, InterpValueTypes);
#ifndef VTKM_NO_DEPRECATED_VIRTUAL
template <typename ValueType>
void BenchEdgeInterpDynamic(::benchmark::State& state)
{
VTKM_DEPRECATED_SUPPRESS_BEGIN
BenchEdgeInterpImpl<ValueType> impl{ state };
impl.Run(vtkm::cont::make_ArrayHandleVirtual(impl.EdgePairHandle),
vtkm::cont::make_ArrayHandleVirtual(impl.WeightHandle),
vtkm::cont::make_ArrayHandleVirtual(impl.FieldHandle));
VTKM_DEPRECATED_SUPPRESS_END
};
VTKM_BENCHMARK_TEMPLATES(BenchEdgeInterpDynamic, InterpValueTypes);
#endif //VTKM_NO_DEPRECATED_VIRTUAL
struct ImplicitFunctionBenchData
{

@ -24,7 +24,6 @@
#include <vtkm/cont/ErrorInternal.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/StorageBasic.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/internal/OptionParser.h>
@ -166,8 +165,8 @@ void BenchGradient(::benchmark::State& state, int options)
}
}
#define VTKM_PRIVATE_GRADIENT_BENCHMARK(Name, Opts) \
void BenchGradient##Name(::benchmark::State& state) { BenchGradient(state, Opts); } \
#define VTKM_PRIVATE_GRADIENT_BENCHMARK(Name, Opts) \
void BenchGradient##Name(::benchmark::State& state) { BenchGradient(state, Opts); } \
VTKM_BENCHMARK(BenchGradient##Name)
VTKM_PRIVATE_GRADIENT_BENCHMARK(Scalar, Gradient | ScalarInput);
@ -861,8 +860,12 @@ void InitDataSet(int& argc, char** argv)
if (options[HELP])
{
// FIXME: Print google benchmark usage too
option::printUsage(std::cerr, usage.data());
option::printUsage(std::cout, usage.data());
// Print google benchmark usage too
const char* helpstr = "--help";
char* tmpargv[] = { argv[0], const_cast<char*>(helpstr), nullptr };
int tmpargc = 2;
VTKM_EXECUTE_BENCHMARKS(tmpargc, tmpargv);
exit(0);
}
@ -1015,16 +1018,12 @@ int main(int argc, char* argv[])
// Parse VTK-m options:
Config = vtkm::cont::Initialize(argc, args.data(), opts);
// This occurs when it is help
if (opts == vtkm::cont::InitializeOptions::None)
{
std::cout << Config.Usage << std::endl;
}
else
// This opts changes when it is help
if (opts != vtkm::cont::InitializeOptions::None)
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
InitDataSet(argc, args.data());
}
InitDataSet(argc, args.data());
const std::string dataSetSummary = []() -> std::string {
std::ostringstream out;

@ -0,0 +1,97 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include "Benchmarker.h"
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/DataSetBuilderUniform.h>
#include <vtkm/cont/ErrorInternal.h>
#include <vtkm/cont/Logging.h>
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/Timer.h>
#include <vtkm/cont/internal/OptionParser.h>
#include <vtkm/filter/ParticleAdvection.h>
#include <vtkm/worklet/particleadvection/EulerIntegrator.h>
#include <vtkm/worklet/particleadvection/RK4Integrator.h>
#ifdef VTKM_ENABLE_TBB
#include <tbb/task_scheduler_init.h>
#endif
#ifdef VTKM_ENABLE_OPENMP
#include <omp.h>
#endif
namespace
{
// Hold configuration state (e.g. active device):
vtkm::cont::InitializeResult Config;
// Wrapper around RK4:
void BenchParticleAdvection(::benchmark::State& state)
{
const vtkm::cont::DeviceAdapterId device = Config.Device;
const vtkm::Id3 dims(5, 5, 5);
const vtkm::Vec3f vecX(1, 0, 0);
vtkm::Id numPoints = dims[0] * dims[1] * dims[2];
std::vector<vtkm::Vec3f> vectorField(static_cast<std::size_t>(numPoints));
for (std::size_t i = 0; i < static_cast<std::size_t>(numPoints); i++)
vectorField[i] = vecX;
vtkm::cont::DataSetBuilderUniform dataSetBuilder;
vtkm::cont::DataSet ds = dataSetBuilder.Create(dims);
ds.AddPointField("vector", vectorField);
vtkm::cont::ArrayHandle<vtkm::Particle> seedArray =
vtkm::cont::make_ArrayHandle({ vtkm::Particle(vtkm::Vec3f(.2f, 1.0f, .2f), 0),
vtkm::Particle(vtkm::Vec3f(.2f, 2.0f, .2f), 1),
vtkm::Particle(vtkm::Vec3f(.2f, 3.0f, .2f), 2) });
vtkm::filter::ParticleAdvection particleAdvection;
particleAdvection.SetStepSize(vtkm::FloatDefault(1) / state.range(0));
particleAdvection.SetNumberOfSteps(static_cast<vtkm::Id>(state.range(0)));
particleAdvection.SetSeeds(seedArray);
particleAdvection.SetActiveField("vector");
vtkm::cont::Timer timer{ device };
for (auto _ : state)
{
(void)_;
timer.Start();
auto output = particleAdvection.Execute(ds);
::benchmark::DoNotOptimize(output);
timer.Stop();
state.SetIterationTime(timer.GetElapsedTime());
}
state.SetComplexityN(state.range(0));
}
VTKM_BENCHMARK_OPTS(BenchParticleAdvection,
->RangeMultiplier(2)
->Range(32, 4096)
->ArgName("Steps")
->Complexity());
} // end anon namespace
int main(int argc, char* argv[])
{
auto opts = vtkm::cont::InitializeOptions::DefaultAnyDevice;
std::vector<char*> args(argv, argv + argc);
vtkm::bench::detail::InitializeArgs(&argc, args, opts);
Config = vtkm::cont::Initialize(argc, args.data(), opts);
if (opts != vtkm::cont::InitializeOptions::None)
{
vtkm::cont::GetRuntimeDeviceTracker().ForceDevice(Config.Device);
}
VTKM_EXECUTE_BENCHMARKS(argc, args.data());
}

@ -26,8 +26,6 @@
#include <vtkm/exec/FunctorBase.h>
#include <vtkm/cont/ColorTable.hxx>
#include <sstream>
#include <string>
#include <vector>

@ -170,7 +170,7 @@
/// and modified using the passed arguments; see the Google Benchmark documentation
/// for more details. The `preamble` string may be used to supply additional
/// information that will be appended to the output's preamble.
#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
#define VTKM_EXECUTE_BENCHMARKS_PREAMBLE(argc, argv, preamble) \
vtkm::bench::detail::ExecuteBenchmarks(argc, argv, preamble)
/// \def VTKM_BENCHMARK(BenchFunc)
@ -181,7 +181,7 @@
/// ```
/// void BenchFunc(::benchmark::State& state)
/// ```
#define VTKM_BENCHMARK(BenchFunc) \
#define VTKM_BENCHMARK(BenchFunc) \
BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond)
/// \def VTKM_BENCHMARK_OPTS(BenchFunc, Args)
@ -196,7 +196,7 @@
/// Note the similarity to the raw Google Benchmark usage of
/// `BENCHMARK(MyBenchmark)->ArgName("MyParam")->Range(32, 1024*1024);`. See
/// the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
#define VTKM_BENCHMARK_OPTS(BenchFunc, options) \
BENCHMARK(BenchFunc)->UseManualTime()->Unit(benchmark::kMillisecond) options
/// \def VTKM_BENCHMARK_APPLY(BenchFunc, ConfigFunc)
@ -211,7 +211,7 @@
/// ```
///
/// See the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
#define VTKM_BENCHMARK_APPLY(BenchFunc, applyFunctor) \
BENCHMARK(BenchFunc)->Apply(applyFunctor)->UseManualTime()->Unit(benchmark::kMillisecond)
/// \def VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList)
@ -224,7 +224,7 @@
/// template <typename T>
/// void BenchFunc(::benchmark::State& state)
/// ```
#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
#define VTKM_BENCHMARK_TEMPLATES(BenchFunc, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, vtkm::bench::detail::NullApply, TypeList)
/// \def VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, Args, TypeList)
@ -237,10 +237,10 @@
/// ->ArgName("MyParam")->Range(32, 1024*1024),
/// vtkm::List<vtkm::Float32, vtkm::Vec3f_32>);
/// ```
#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY( \
BenchFunc, \
[](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
#define VTKM_BENCHMARK_TEMPLATES_OPTS(BenchFunc, options, TypeList) \
VTKM_BENCHMARK_TEMPLATES_APPLY( \
BenchFunc, \
[](::benchmark::internal::Benchmark* bm) { bm options->Unit(benchmark::kMillisecond); }, \
TypeList)
/// \def VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ConfigFunc, TypeList)
@ -255,22 +255,22 @@
/// ```
///
/// See the Google Benchmark documentation for more details on the available options.
#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
namespace \
#define VTKM_BENCHMARK_TEMPLATES_APPLY(BenchFunc, ApplyFunctor, TypeList) \
namespace \
{ /* A template function cannot be used as a template parameter, so wrap the function with \
* a template struct to get it into the GenerateTemplateBenchmarks class. */ \
template <typename... Ts> \
struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
{ \
static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
}; \
} /* end anon namespace */ \
int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks< \
brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>, \
template <typename... Ts> \
struct VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
{ \
static ::benchmark::internal::Function* GetFunction() { return BenchFunc<Ts...>; } \
}; \
} /* end anon namespace */ \
int BENCHMARK_PRIVATE_NAME(BenchFunc) = vtkm::bench::detail::GenerateTemplateBenchmarks< \
brigand::bind<VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc)>, \
TypeList>::Register(#BenchFunc, ApplyFunctor)
// Internal use only:
#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
#define VTKM_BENCHMARK_WRAPPER_NAME(BenchFunc) \
BENCHMARK_PRIVATE_CONCAT(_wrapper_, BenchFunc, __LINE__)
namespace vtkm
@ -280,9 +280,7 @@ namespace bench
namespace detail
{
static inline void NullApply(::benchmark::internal::Benchmark*)
{
}
static inline void NullApply(::benchmark::internal::Benchmark*) {}
/// Do not use directly. The VTKM_BENCHMARK_TEMPLATES macros should be used
/// instead.

@ -44,6 +44,7 @@ set(benchmarks
BenchmarkDeviceAdapter
BenchmarkFieldAlgorithms
BenchmarkFilters
BenchmarkODEIntegrators
BenchmarkTopologyAlgorithms
)

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5f85560cc05688d09c21b22e91c14cec22deecb3c51dc364d82cc9fd460c6ab6
size 328

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a47045b1ae5539ef0125273ee9c50a9a6e809f78411f6a850ac34e6fa43189bb
size 535

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ef3dfd79f0c8d18780d0749014d71c0226134041283d33de0bcd994e343dd421
size 2001070

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2bb3d36ea5ecef5e7ef1057d0dddebbc590424915083091ead3dac2928000524
size 2904465

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bffad7dae3dd6ef018ad7a9e109464ced0f3b9bc15cf1fb5d555f6d0d00b621f
size 3001624

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:2cbdf56fd5445ddc5b6bc05507b8825fb8d74fe1ccce894bde03e5ff2ecf5fb6
size 525141

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:752021630d25aff8dfd00064badd452896be70bc8b2f94b008900b4fc70d4dd5
size 1811

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4d1dbb4c28f1c829769ad3e03fc58f667935d8a461d3515036d5d98f5e3841cb
size 395

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c1860e747d7f460afc63e32de184e445ffb966a42fb07f9d44ba39020584864f
size 496

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3d9bea2064cd3402f3f5b7862e6b775e37f33210ba099f59358857d4bdae1020
size 255

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e154ba13346e6998b864316868da3f155e99efe4f330c8e080b0d7ece22b505a
size 488

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3d0ddc7c712a6d544db85660cd9d325884892b18d6f0ed451361aaeae2a96413
size 204

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:75b5601eb23b1724d5309e69a51839615bce625f6e7641b52dc3d06e10b0c5ee
size 745

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:ff3108d009d2eef410593811857e38388001f7df624ddeaed3edceafbc838aea
size 849

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5aca6667b06deb4ec6236d5caa3d9518345bc1eb9021bc721289b81acc980af9
size 789

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:861fc904b7d4db43288fce85c8c1398726b54ac82d7bcbcebd8f12808cb5599b
size 1002

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:29e43c695763535251ab22af815651caa53d103b5fd168c72dfb9188e72e4ff4
size 1244

@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3731448fe4d87b204e185829237a6a6b0140aed2fb27eea0533883a4cf4ed79d
size 1065

@ -60,14 +60,14 @@ Current gitlab runner tags for VTK-m are:
Used to state that we require a linux based gitlab-runner
- large-memory
Used to state that this step will require a machine that has lots of memory.
This is currently used for cuda `build` requests
This is currently used for CUDA `build` requests
- cuda-rt
Used to state that the runner is required to have the cuda runtime enviornment.
This isn't required to `build` VTK-m, only `test`
Used to state that the runner is required to have the CUDA runtime environment.
This is required to `build` and `test` VTK-m when using CUDA
- maxwell
- pascal
- turing
Only used on a `test` stage to signifiy which GPU hardware is required to
Only used on a `test` stage to signify which GPU hardware is required to
run the VTK-m tests
# How to use docker builders locally
@ -259,22 +259,3 @@ sudo docker login --username=<docker_hub_name>
cd .gitlab/ci/docker
sudo ./update_all.sh 20201230
```
# ECP OSTI CI
`.gitlab-ci-ecp.yml` allows for VTK-m to run CI on provided by ECP at NMC.
To have this work properly you will need to make sure that the gitlab repository
has been updated to this non-standard yaml file location
( "Settings" -> "CI/CD" -> "General pipelines" -> "Custom CI configuration path").
The ECP CI is setup to verify VTK-m mainly on Power9 hardware as that currently is
missing from VTK-m standard CI infrastructure.
Currently we verify Power9 support with `cuda` and `openmp` builders. The `cuda` builder
is setup to use the default cuda SDK on the machine and the required `c++` compiler which
currently is `gcc-4.8.5`. The `openmp` builder is setup to use the newest `c++` compiler provided
on the machine so that we maximimze compiler coverage.
## Issues
Currently these builders don't report back to the VTK-m CDash instance.

@ -0,0 +1,7 @@
# Remove VTKDataSetWriter::WriteDataSet just_points parameter
In the method `VTKDataSetWriter::WriteDataSet`, `just_points` parameter has been
removed due to lack of usage.
The purpose of `just_points` was to allow exporting only the points of a
DataSet without its cell data.

@ -0,0 +1,5 @@
# Add Kokkos backend
Adds a new device backend `Kokkos` which uses the kokkos library for parallelism.
User must provide the kokkos build and Vtk-m will use the default configured execution
space.

@ -0,0 +1,29 @@
# `vtkm::cont::internal::Buffer` now can have ownership transferred
Memory once transferred to `Buffer` always had to be managed by VTK-m. This is problematic
for applications that needed VTK-m to allocate memory, but have the memory ownership
be longer than VTK-m.
`Buffer::TakeHostBufferOwnership` allows for easy transfer ownership of memory out of VTK-m.
When taking ownership of an VTK-m buffer you are provided the following information:
- Memory: A `void*` pointer to the array
- Container: A `void*` pointer used to free the memory. This is necessary to support cases such as allocations transferred into VTK-m from a `std::vector`.
- Delete: The function to call to actually delete the transferred memory
- Reallocate: The function to call to re-allocate the transferred memory. This will throw an exception if users try
to reallocate a buffer that was 'view' only
- Size: The size in number of elements of the array
To properly steal memory from VTK-m you do the following:
```cpp
vtkm::cont::ArrayHandle<T> arrayHandle;
...
auto stolen = arrayHandle.GetBuffers()->TakeHostBufferOwnership();
...
stolen.Delete(stolen.Container);
```

202
docs/changelog/buffer.md Normal file

@ -0,0 +1,202 @@
# Redesign of ArrayHandle to access data using typeless buffers
The original implementation of `ArrayHandle` is meant to be very generic.
To define an `ArrayHandle`, you actually create a `Storage` class that
maintains the data and provides portals to access it (on the host). Because
the `Storage` can provide any type of data structure it wants, you also
need to define an `ArrayTransfer` that describes how to move the
`ArrayHandle` to and from a device. It also has to be repeated for every
translation unit that uses them.
This is a very powerful mechanism. However, one of the major problems with
this approach is that every `ArrayHandle` type needs to have a separate
compile path for every value type crossed with every device. Because of
this limitation, the `ArrayHandle` for the basic storage has a special
implementation that manages the actual data allocation and movement as
`void *` arrays. In this way all the data management can be compiled once
and put into the `vtkm_cont` library. This has dramatically improved the
VTK-m compile time.
This new design replicates the basic `ArrayHandle`'s success to all other
storage types. The basic idea is to make the implementation of
`ArrayHandle` storage slightly less generic. Instead of requiring it to
manage the data it stores, it instead just builds `ArrayPortal`s from
`void` pointers that it is given. The management of `void` pointers can be
done in non-templated classes that are compiled into a library.
This initial implementation does not convert all `ArrayHandle`s to avoid
making non-backward compatible changes before the next minor revision of
VTK-m. In particular, it would be particularly difficult to convert
`ArrayHandleVirtual`. It could be done, but it would be a lot of work for a
class that will likely be removed.
## Buffer
Key to these changes is the introduction of a
`vtkm::cont::internal::Buffer` object. As the name implies, the `Buffer`
object manages a single block of bytes. `Buffer` is agnostic to the type of
data being stored. It only knows the length of the buffer in bytes. It is
responsible for allocating space on the host and any devices as necessary
and for transferring data among them. (Since `Buffer` knows nothing about
the type of data, a precondition of VTK-m would be that the host and all
devices have to have the same endian.)
The idea of the `Buffer` object is similar in nature to the existing
`vtkm::cont::internal::ExecutionArrayInterfaceBasicBase` except that it
will manage a buffer of data among the control and all devices rather than
in one device through a templated subclass.
As explained below, `ArrayHandle` holds some fixed number of `Buffer`
objects. (The number can be zero for implicit `ArrayHandle`s.) Because all
the interaction with the devices happen through `Buffer`, it will no longer
be necessary to compile any reference to `ArrayHandle` for devices (e.g.
you wont have to use nvcc just because the code links `ArrayHandle.h`).
## Storage
The `vtkm::cont::internal::Storage` class changes dramatically. Although an
instance will be kept, the intention is for `Storage` itself to be a
stateless object. It will manage its data through `Buffer` objects provided
from the `ArrayHandle`.
That said, it is possible for `Storage` to have some state. For example,
the `Storage` for `ArrayHandleImplicit` must hold on to the instance of the
portal used to manage the state.
## ArrayTransport
The `vtkm::cont::internal::ArrayTransfer` class will be removed completely.
All data transfers will be handled internally with the `Buffer` object
## Portals
A big change for this design is that the type of a portal for an
`ArrayHandle` will be the same for all devices and the host. Thus, we no
longer need specialized versions of portals for each device. We only have
one portal type. And since they are constructed from `void *` pointers, one
method can create them all.
## Advantages
The `ArrayHandle` interface should not change significantly for external
uses, but this redesign offers several advantages.
### Faster Compiles
Because the memory management is contained in a non-templated `Buffer`
class, it can be compiled once in a library and used by all template
instances of `ArrayHandle`. It should have similar compile advantages to
our current specialization of the basic `ArrayHandle`, but applied to all
types of `ArrayHandle`s.
### Fewer Templates
Hand-in-hand with faster compiles, the new design should require fewer
templates and template instances. We have immediately gotten rid of
`ArrayTransport`. `Storage` is also much shorter. Because all
`ArrayPortal`s are the same for every device and the host, we need many
fewer versions of those classes. In the device adapter, we can probably
collapse the three `ArrayManagerExecution` classes into a single, much
simpler class that does simple memory allocation and copy.
### Fewer files need to be compiled for CUDA
Including `ArrayHandle.h` no longer adds code that compiles for a device.
Thus, we should no longer need to compile for a specific device adapter
just because we access an `ArrayHandle`. This should make it much easier to
achieve our goal of a "firewall". That is, code that just calls VTK-m
filters does not need to support all its compilers and flags.
### Simpler ArrayHandle specialization
The newer code should simplify the implementation of special `ArrayHandle`s
a bit. You need only implement an `ArrayPortal` that operates on one or
more `void *` arrays and a simple `Storage` class.
### Out of band memory sharing
With the current version of `ArrayHandle`, if you want to take data from
one `ArrayHandle` you pretty much have to create a special template to wrap
another `ArrayHandle` around that. With this new design, it is possible to
take data from one `ArrayHandle` and give it to another `ArrayHandle` of a
completely different type. You cant do this willy-nilly since different
`ArrayHandle` types will interpret buffers differently. But there can be
some special important use cases.
One such case could be an `ArrayHandle` that provides strided access to a
buffer. (Lets call it `ArrayHandleStride`.) The idea is that it interprets
the buffer as an array for a particular type (like a basic `ArrayHandle`)
but also defines a stride, skip, and repeat so that given an index it looks
up the value `((index / skip) % repeat) * stride`. The point is that it can
take an AoS array of tuples and represent an array of one of the
components.
The point would be that if you had a `VariantArrayHandle` or `Field`, you
could pull out an array of one of the components as an `ArrayHandleStride`.
An `ArrayHandleStride<vtkm::Float32>` could be used to represent that data
that comes from any basic `ArrayHandle` with `vtkm::Float32` or a
`vtkm::Vec` of that type. It could also represent data from an
`ArrayHandleCartesianProduct` and `ArrayHandleSoA`. We could even represent
an `ArrayHandleUniformPointCoordinates` by just making a small array. This
allows us to statically access a whole bunch of potential array storage
classes with a single type.
### Potentially faster device transfers
There is currently a fast-path for basic `ArrayHandle`s that does a block
cuda memcpy between host and device. But for other `ArrayHandle`s that do
not defer their `ArrayTransfer` to a sub-array, the transfer first has to
copy the data into a known buffer.
Because this new design stores all data in `Buffer` objects, any of these
can be easily and efficiently copied between devices.
## Disadvantages
This new design gives up some features of the original `ArrayHandle` design.
### Can only interface data that can be represented in a fixed number of buffers
Because the original `ArrayHandle` design required the `Storage` to
completely manage the data, it could represent it in any way possible. In
this redesign, the data need to be stored in some fixed number of memory
buffers.
This is a pretty open requirement. I suspect most data formats will be
storable in this. The users guide has an example of data stored in a
`std::deque` that will not be representable. But that is probably not a
particularly practical example.
### VTK-m would only be able to support hosts and devices with the same endian
Because data are transferred as `void *` blocks of memory, there is no way
to correct words if the endian on the two devices does not agree. As far as
I know, there should be no issues with the proposed ECP machines.
If endian becomes an issue, it might be possible to specify a word length
in the `Buffer`. That would assume that all numbers stored in the `Buffer`
have the same word length.
### ArrayPortals must be completely recompiled in each translation unit
We can declare that an `ArrayHandle` does not need to include the device
adapter header files in part because it no longer needs specialized
`ArrayPortal`s for each device. However, that means that a translation unit
compiled with the host compiler (say gcc) will produce different code for
the `ArrayPortal`s than those with the device compiler (say nvcc). This
could lead to numerous linking problems.
To get around these issues, we will probably have to enforce no exporting
of any of the `ArrayPotal` symbols and force them all to be recompiled for
each translation unit. This will serve to increase the compile times a bit.
We will probably also still encounter linking errors as there would be no
way to enforce this requirement.
### Cannot have specialized portals for the control environment
Because the new design unifies `ArrayPortal` types across control and
execution environments, it is no longer possible to have a special version
for the control environment to manage resources. This will require removing
some recent behavior of control portals such as with MR !1988.

@ -0,0 +1,10 @@
# Disable asserts for CUDA architecture builds
`assert` is supported on recent CUDA cards, but compiling it appears to be
very slow. By default, the `VTKM_ASSERT` macro has been disabled whenever
compiling for a CUDA device (i.e. when `__CUDA_ARCH__` is defined).
Asserts for CUDA devices can be turned back on by turning the
`VTKm_NO_ASSERT_CUDA` CMake variable off. Turning this CMake variable off
will enable assertions in CUDA kernels unless there is another reason
turning off all asserts (such as a release build).

@ -0,0 +1,39 @@
# Deprecate ArrayHandleVirtualCoordinates
As we port VTK-m to more types of accelerator architectures, supporting
virtual methods is becoming more problematic. Thus, we are working to back
out of using virtual methods in the execution environment.
One of the most widespread users of virtual methods in the execution
environment is `ArrayHandleVirtual`. As a first step of deprecating this
class, we first deprecate the `ArrayHandleVirtualCoordinates` subclass.
Not surprisingly, `ArrayHandleVirtualCoordinates` is used directly by
`CoordinateSystem`. The biggest change necessary was that the `GetData`
method returned an `ArrayHandleVirtualCoordinates`, which obviously would
not work if that class is deprecated.
An oddness about this return type is that it is quite different from the
superclass's method of the same name. Rather, `Field` returns a
`VariantArrayHandle`. Since this had to be corrected anyway, it was decided
to change `CoordinateSystem`'s `GetData` to also return a
`VariantArrayHandle`, although its typelist is set to just `vtkm::Vec3f`.
To try to still support old code that expects the deprecated behavior of
returning an `ArrayHandleVirtualCoordinates`, `CoordinateSystem::GetData`
actually returns a "hidden" subclass of `VariantArrayHandle` that
automatically converts itself to an `ArrayHandleVirtualCoordinates`. (A
deprecation warning is given if this is done.)
This approach to support deprecated code is not perfect. The returned value
for `CoordinateSystem::GetData` can only be used as an `ArrayHandle` if a
method is directly called on it or if it is cast specifically to
`ArrayHandleVirtualCoordinates` or its superclass. For example, if passing
it to a method argument typed as `vtkm::cont::ArrayHandle<T, S>` where `T`
and `S` are template parameters, then the conversion will fail.
To continue to support ease of use, `CoordinateSystem` now has a method
named `GetDataAsMultiplexer` that returns the data as an
`ArrayHandleMultiplexer`. This can be employed to quickly use the
`CoordinateSystem` as an array without the overhead of a `CastAndCall`.

@ -0,0 +1,17 @@
# Virtual methods in execution environment deprecated
The use of classes with any virtual methods in the execution environment is
deprecated. Although we had code to correctly build virtual methods on some
devices such as CUDA, this feature was not universally supported on all
programming models we wish to support. Plus, the implementation of virtual
methods is not hugely convenient on CUDA because the virtual methods could
not be embedded in a library. To get around virtual methods declared in
different libraries, all builds had to be static, and a special linking
step to pull in possible virtual method implementations was required.
For these reasons, VTK-m is no longer relying on virtual methods. (Other
approaches like multiplexers are used instead.) The code will be officially
removed in version 2.0. It is still supported in a deprecated sense (you
should get a warning). However, if you want to build without virtual
methods, you can set the `VTKm_NO_DEPRECATED_VIRTUAL` CMake flag, and they
will not be compiled.

@ -40,11 +40,11 @@ using OldAlias VTKM_DEPRECATED(1.6, "Use NewClass instead.") = NewClass;
```
Functions and methods are marked as deprecated by adding `VTKM_DEPRECATED`
as a modifier before the return value.
as a modifier before the return value and any markup (VTKM_CONT, VTKM_EXEC, or VTKM_EXEC_CONT).
``` cpp
VTKM_EXEC_CONT
VTKM_DEPRECATED(1.6, "You must now specify a tolerance.") void ImportantMethod(double x)
VTKM_EXEC_CONT
{
this->ImportantMethod(x, 1e-6);
}
@ -83,8 +83,8 @@ support this a pair of macros, `VTKM_DEPRECATED_SUPPRESS_BEGIN` and
deprecated items should be wrapped in these macros.
``` cpp
VTKM_EXEC_CONT
VTKM_DEPRECATED(1.6, "You must now specify both a value and tolerance.")
VTKM_EXEC_CONT
void ImportantMethod()
{
// It can be the case that to implement a deprecated method you need to

@ -0,0 +1,14 @@
# Add atomic free functions
Previously, all atomic functions were stored in classes named
`AtomicInterfaceControl` and `AtomicInterfaceExecution`, which required
you to know at compile time which device was using the methods. That in
turn means that anything using an atomic needed to be templated on the
device it is running on.
That can be a big hassle (and is problematic for some code structure).
Instead, these methods are moved to free functions in the `vtkm`
namespace. These functions operate like those in `Math.h`. Using
compiler directives, an appropriate version of the function is compiled
for the current device the compiler is using.

@ -0,0 +1,12 @@
# Disable asserts for HIP architecture builds
`assert` is supported on recent HIP cards, but compiling it is very slow,
as it triggers the usage of `printf` which. Currently (ROCm 3.7) `printf`
has a severe performance penalty and should be avoided when possible.
By default, the `VTKM_ASSERT` macro has been disabled whenever compiling
for a HIP device via kokkos.
Asserts for HIP devices can be turned back on by turning the
`VTKm_NO_ASSERT_HIP` CMake variable off. Turning this CMake variable off
will enable assertions in HIP kernels unless there is another reason
turning off all asserts (such as a release build).

@ -0,0 +1,120 @@
# Improvements to moving data into ArrayHandle
We have made several improvements to adding data into an `ArrayHandle`.
## Moving data from an `std::vector`
For numerous reasons, it is convenient to define data in a `std::vector`
and then wrap that into an `ArrayHandle`. There are two obvious ways to do
this. First, you could deep copy the data into an `ArrayHandle`, which has
obvious drawbacks. Second, you could take the pointer for the data in the
`std::vector` and use that as user-allocated memory in the `ArrayHandle`
without deep copying it. The problem with this shallow copy is that it is
unsafe. If the `std::vector` goes out of scope (or gets resized), then the
data the `ArrayHandle` is pointing to becomes unallocated, which will lead
to unpredictable behavior.
However, there is a third option. It is often the case that an
`std::vector` is filled and then becomes unused once it is converted to an
`ArrayHandle`. In this case, what we really want is to pass the data off to
the `ArrayHandle` so that the `ArrayHandle` is now managing the data and
not the `std::vector`.
C++11 has a mechanism to do this: move semantics. You can now pass
variables to functions as an "rvalue" (right-hand value). When something is
passed as an rvalue, it can pull state out of that variable and move it
somewhere else. `std::vector` implements this movement so that an rvalue
can be moved to another `std::vector` without actually copying the data.
`make_ArrayHandle` now also takes advantage of this feature to move rvalue
`std::vector`s.
There is a special form of `make_ArrayHandle` named `make_ArrayHandleMove`
that takes an rvalue. There is also a special overload of
`make_ArrayHandle` itself that handles an rvalue `vector`. (However, using
the explicit move version is better if you want to make sure the data is
actually moved.)
So if you create the `std::vector` in the call to `make_ArrayHandle`, then
the data only gets created once.
``` cpp
auto array = vtkm::cont::make_ArrayHandleMove(std::vector<vtkm::Id>{ 2, 6, 1, 7, 4, 3, 9 });
```
Note that there is now a better way to express an initializer list to
`ArrayHandle` documented below. But this form of `ArrayHandleMove` can be
particularly useful for initializing an array to all of a particular value.
For example, an easy way to initialize an array of 1000 elements all to 1
is
``` cpp
auto array = vtkm::cont::make_ArrayHandleMove(std::vector<vtkm::Id>(1000, 1));
```
You can also move the data from an already created `std::vector` by using
the `std::move` function to convert it to an rvalue. When you do this, the
`std::vector` becomes invalid after the call and any use will be undefined.
``` cpp
std::vector<vtkm::Id> vector;
// fill vector
auto array = vtkm::cont::make_ArrayHandleMove(std::move(vector));
```
## Make `ArrayHandle` from initalizer list
A common use case for using `std::vector` (particularly in our unit tests)
is to quickly add an initalizer list into an `ArrayHandle`. Repeating the
example from above:
``` cpp
auto array = vtkm::cont::make_ArrayHandleMove(std::vector<vtkm::Id>{ 2, 6, 1, 7, 4, 3, 9 });
```
However, creating the `std::vector` should be unnecessary. Why not be able
to create the `ArrayHandle` directly from an initializer list? Now you can
by simply passing an initializer list to `make_ArrayHandle`.
``` cpp
auto array = vtkm::cont::make_ArrayHandle({ 2, 6, 1, 7, 4, 3, 9 });
```
There is an issue here. The type here can be a little ambiguous (for
humans). In this case, `array` will be of type
`vtkm::cont::ArrayHandleBasic<int>`, since that is what an integer literal
defaults to. This could be a problem if, for example, you want to use
`array` as an array of `vtkm::Id`, which could be of type `vtkm::Int64`.
This is easily remedied by specifying the desired value type as a template
argument to `make_ArrayHandle`.
``` cpp
auto array = vtkm::cont::make_ArrayHandle<vtkm::Id>({ 2, 6, 1, 7, 4, 3, 9 });
```
## Deprecated `make_ArrayHandle` with default shallow copy
For historical reasons, passing an `std::vector` or a pointer to
`make_ArrayHandle` does a shallow copy (i.e. `CopyFlag` defaults to `Off`).
Although more efficient, this mode is inherintly unsafe, and making it the
default is asking for trouble.
To combat this, calling `make_ArrayHandle` without a copy flag is
deprecated. In this way, if you wish to do the faster but more unsafe
creation of an `ArrayHandle` you should explicitly express that.
This requried quite a few changes through the VTK-m source (particularly in
the tests).
## Similar changes to `Field`
`vtkm::cont::Field` has a `make_Field` helper function that is similar to
`make_ArrayHandle`. It also features the ability to create fields from
`std::vector`s and C arrays. It also likewise had the same unsafe behavior
by default of not copying from the source of the arrays.
That behavior has similarly been depreciated. You now have to specify a
copy flag.
The ability to construct a `Field` from an initializer list of values has
also been added.

@ -0,0 +1,109 @@
# UnknownArrayHandle and UncertainArrayHandle for runtime-determined types
Two new classes have been added to VTK-m: `UnknownArrayHandle` and
`UncertainArrayHandle`. These classes serve the same purpose as the set of
`VariantArrayHandle` classes and will replace them.
Motivated mostly by the desire to move away from `ArrayHandleVirtual`, we
have multiple reasons to completely refactor the `VariantArrayHandle`
class. These include changing the implementation, some behavior, and even
the name.
## Motivation
We have several reasons that have accumulated to revisit the implementation
of `VariantArrayHandle`.
### Move away from `ArrayHandleVirtual`
The current implementation of `VariantArrayHandle` internally stores the
array wrapped in an `ArrayHandleVirtual`. That makes sense since you might
as well consolidate the hierarchy of virtual objects into one.
Except `ArrayHandleVirtual` is being deprecated, so it no longer makes
sense to use that internally.
So we will transition the class back to managing the data as typeless on
its own. We will consider using function pointers rather than actual
virtual functions because compilers can be slow in creating lots of virtual
subclasses.
### Reintroduce storage tag lists
The original implementation of `VariantArrayHandle` (which at the time was
called `DynamicArrayHandle`) actually had two type lists: one for the array
value type and one for the storage type. The storage type list was removed
soon after `ArrayHandleVirtual` was introduced because whatever the type of
array it could be access as `ArrayHandleVirtual`.
However, with `ArrayHandleVirtual` being deprecated, this feature is no
longer possible. We are in need again for the list of storage types to try.
Thus, we need to reintroduce this template argument to
`VariantArrayHandle`.
### More clear name
The name of this class has always been unsatisfactory. The first name,
`DynamicArrayHandle`, makes it sound like the data is always changing. The
second name, `VariantArrayHandle`, makes it sound like an array that holds
a value type that can vary (like an `std::variant`).
We can use a more clear name that expresses better that it is holding an
`ArrayHandle` of an _unknown_ type.
### Take advantage of default types for less templating
Once upon a time everything in VTK-m was templated header library. Things
have changed quite a bit since then. The most recent development is the
ability to select the "default types" with CMake configuration that allows
you to select a global set of types you care about during compilation. This
is so units like filters can be compiled into a library with all types we
care about, and we don't have to constantly recompile units.
This means that we are becoming less concerned about maintaining type lists
everywhere. Often we can drop the type list and pass data across libraries.
With that in mind, it makes less sense for `VariantArrayHandle` to actually
be a `using` alias for `VariantArrayHandleBase<VTKM_DEFAULT_TYPE_LIST>`.
In response, we can revert the is-a relationship between the two. Have a
completely typeless version as the base class and have a second version
templated version to express when the type of the array has been partially
narrowed down to given type lists.
## New Name and Structure
The ultimate purpose of this class is to store an `ArrayHandle` where the
value and storage types are unknown. Thus, an appropriate name for the
class is `UnknownArrayHandle`.
`UnknownArrayHandle` is _not_ templated. It simply stores an `ArrayHandle`
in a typeless (`void *`) buffer. It does, however, contain many templated
methods that allow you to query whether the contained array matches given
types, to cast to given types, and to cast and call to a given functor
(from either given type lists or default lists).
Rather than have a virtual class structure to manage the typeless array,
the new management will use function pointers. This has shown to sometimes
improve compile times and generate less code.
Sometimes it is the case that the set of potential types can be narrowed. In
this case, the array ceases to be unknown and becomes _uncertain_. Thus,
the companion class to `UnknownArrayHandle` is `UncertainArrayHandle`.
`UncertainArrayHandle` has two template parameters: a list of potential
value types and a list of potential storage types. The behavior of
`UncertainArrayHandle` matches that of `UnknownArrayHandle` (and might
inherit from it). However, for `CastAndCall` operations, it will use the
type lists defined in its template parameters.
## Serializing UnknownArrayHandle
Because `UnknownArrayHandle` is not templated, it contains some
opportunities to compile things into the `vtkm_cont` library. Templated
methods like `CastAndCall` cannot be, but the specializations of DIY's
serialize can be.
And since it only has to be compiled once into a library, we can spend some
extra time compiling for more types. We don't have to restrict ourselves to
`VTKM_DEFAULT_TYPE_LIST`. We can compile for vtkm::TypeListTagAll.

@ -0,0 +1,13 @@
# Write uniform and rectilinear grids to legacy VTK files
As a programming convenience, all `vtkm::cont::DataSet` written by
`vtkm::io::VTKDataSetWriter` were written as a structured grid. Although
technically correct, it changed the structure of the data. This meant that
if you wanted to capture data to run elsewhere, it would run as a different
data type. This was particularly frustrating if the data of that structure
was causing problems and you wanted to debug it.
Now, `VTKDataSetWriter` checks the type of the `CoordinateSystem` to
determine whether the data should be written out as `STRUCTURED_POINTS`
(i.e. a uniform grid), `RECTILINEAR_GRID`, or `STRUCTURED_GRID`
(curvilinear).

@ -19,14 +19,16 @@ if(VTKm_ENABLE_EXAMPLES)
add_subdirectory(contour_tree_distributed)
add_subdirectory(cosmotools)
add_subdirectory(demo)
#add_subdirectory(game_of_life)
add_subdirectory(game_of_life)
add_subdirectory(hello_worklet)
add_subdirectory(histogram)
add_subdirectory(ising)
add_subdirectory(lagrangian)
add_subdirectory(mesh_quality)
add_subdirectory(multi_backend)
add_subdirectory(oscillator)
add_subdirectory(particle_advection)
add_subdirectory(streamline_mpi)
add_subdirectory(polyline_archimedean_helix)
add_subdirectory(redistribute_points)
add_subdirectory(temporal_advection)

@ -370,28 +370,14 @@ int main(int argc, char* argv[])
VTKM_LOG_IF_S(vtkm::cont::LogLevel::Info,
numLevels > 0,
std::endl
<< " ------------ Settings Isolevel Selection -----------"
<< std::endl
<< " levels="
<< numLevels
<< std::endl
<< " eps="
<< eps
<< std::endl
<< " comp"
<< numComp
<< std::endl
<< " type="
<< contourType
<< std::endl
<< " method="
<< contourSelectMethod
<< std::endl
<< " mc="
<< useMarchingCubes
<< std::endl
<< " use"
<< (usePersistenceSorter ? "PersistenceSorter" : "VolumeSorter"));
<< " ------------ Settings Isolevel Selection -----------" << std::endl
<< " levels=" << numLevels << std::endl
<< " eps=" << eps << std::endl
<< " comp" << numComp << std::endl
<< " type=" << contourType << std::endl
<< " method=" << contourSelectMethod << std::endl
<< " mc=" << useMarchingCubes << std::endl
<< " use" << (usePersistenceSorter ? "PersistenceSorter" : "VolumeSorter"));
}
currTime = totalTime.GetElapsedTime();
vtkm::Float64 startUpTime = currTime - prevTime;
@ -401,8 +387,8 @@ int main(int argc, char* argv[])
#ifdef WITH_MPI
#ifdef DEBUG_PRINT
// From https://www.unix.com/302983597-post2.html
char* cstr_filename = new char[15];
snprintf(cstr_filename, sizeof(filename), "cout_%d.log", rank);
char cstr_filename[32];
snprintf(cstr_filename, sizeof(cstr_filename), "cout_%d.log", rank);
int out = open(cstr_filename, O_RDWR | O_CREAT | O_APPEND, 0600);
if (-1 == out)
{
@ -431,8 +417,6 @@ int main(int argc, char* argv[])
perror("cannot redirect stderr");
return 255;
}
delete[] cstr_filename;
#endif
#endif
@ -458,23 +442,27 @@ int main(int argc, char* argv[])
// Copy the data into the values array so we can construct a multiblock dataset
// TODO All we should need to do to implement BOV support is to copy the values
// in the values vector and copy the dimensions in the dims vector
vtkm::Id nRows, nCols, nSlices;
vtkm::worklet::contourtree_augmented::GetRowsColsSlices temp;
temp(inDataSet.GetCellSet(), nRows, nCols, nSlices);
dims[0] = nRows;
dims[1] = nCols;
dims[2] = nSlices;
auto tempField = inDataSet.GetField("values").GetData();
values.resize(static_cast<std::size_t>(tempField.GetNumberOfValues()));
auto tempFieldHandle = tempField.AsVirtual<ValueType>().ReadPortal();
for (vtkm::Id i = 0; i < tempField.GetNumberOfValues(); i++)
{
values[static_cast<std::size_t>(i)] = static_cast<ValueType>(tempFieldHandle.Get(i));
}
vtkm::Id3 meshSize;
vtkm::worklet::contourtree_augmented::GetPointDimensions temp;
temp(inDataSet.GetCellSet(), meshSize);
dims[0] = meshSize[0];
dims[1] = meshSize[1];
dims[2] = meshSize[2];
// TODO/FIXME: The following is commented out since it creates a a warning that
// AsVirtual() will no longer be supported. Since this implementation is
// incomplete anyway, it currently makes more sense to comment it out than
// to fix the warning.
// auto tempField = inDataSet.GetField("values").GetData();
// values.resize(static_cast<std::size_t>(tempField.GetNumberOfValues()));
// auto tempFieldHandle = tempField.AsVirtual<ValueType>().ReadPortal();
// for (vtkm::Id i = 0; i < tempField.GetNumberOfValues(); i++)
// {
// values[static_cast<std::size_t>(i)] = static_cast<ValueType>(tempFieldHandle.Get(i));
// }
VTKM_LOG_S(vtkm::cont::LogLevel::Error,
"BOV reader not yet support in MPI mode by this example");
MPI_Finalize();
return EXIT_SUCCESS;
return EXIT_FAILURE;
#endif
}
else // Read ASCII data input
@ -529,6 +517,9 @@ int main(int argc, char* argv[])
dataReadTime = currTime - prevTime;
prevTime = currTime;
// swap dims order
std::swap(dims[0], dims[1]);
#ifndef WITH_MPI // We only need the inDataSet if are not using MPI otherwise we'll constructe a multi-block dataset
// build the input dataset
vtkm::cont::DataSetBuilderUniform dsb;
@ -536,16 +527,16 @@ int main(int argc, char* argv[])
if (nDims == 2)
{
vtkm::Id2 vdims;
vdims[0] = static_cast<vtkm::Id>(dims[1]);
vdims[1] = static_cast<vtkm::Id>(dims[0]);
vdims[0] = static_cast<vtkm::Id>(dims[0]);
vdims[1] = static_cast<vtkm::Id>(dims[1]);
inDataSet = dsb.Create(vdims);
}
// 3D data
else
{
vtkm::Id3 vdims;
vdims[0] = static_cast<vtkm::Id>(dims[1]);
vdims[1] = static_cast<vtkm::Id>(dims[0]);
vdims[0] = static_cast<vtkm::Id>(dims[0]);
vdims[1] = static_cast<vtkm::Id>(dims[1]);
vdims[2] = static_cast<vtkm::Id>(dims[2]);
inDataSet = dsb.Create(vdims);
}
@ -558,19 +549,17 @@ int main(int argc, char* argv[])
{
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
std::endl
<< " ---------------- Input Mesh Properties --------------"
<< std::endl
<< " Number of dimensions: "
<< nDims);
<< " ---------------- Input Mesh Properties --------------" << std::endl
<< " Number of dimensions: " << nDims);
}
// Check if marching cubes is enabled for non 3D data
bool invalidMCOption = (useMarchingCubes && nDims != 3);
VTKM_LOG_IF_S(
vtkm::cont::LogLevel::Error,
invalidMCOption && (rank == 0),
"The input mesh is " << nDims << "D. "
<< "Contour tree using marching cubes is only supported for 3D data.");
VTKM_LOG_IF_S(vtkm::cont::LogLevel::Error,
invalidMCOption && (rank == 0),
"The input mesh is "
<< nDims << "D. "
<< "Contour tree using marching cubes is only supported for 3D data.");
// If we found any errors in the setttings than finalize MPI and exit the execution
if (invalidMCOption)
@ -583,7 +572,7 @@ int main(int argc, char* argv[])
#ifndef WITH_MPI // construct regular, single-block VTK-M input dataset
vtkm::cont::DataSet useDataSet = inDataSet; // Single block dataset
#else // Create a multi-block dataset for multi-block DIY-paralle processing
#else // Create a multi-block dataset for multi-block DIY-paralle processing
vtkm::cont::PartitionedDataSet useDataSet; // Partitioned variant of the input dataset
vtkm::Id3 blocksPerDim =
nDims == 3 ? vtkm::Id3(1, 1, numBlocks) : vtkm::Id3(1, numBlocks, 1); // Decompose the data into
@ -610,8 +599,8 @@ int main(int argc, char* argv[])
{
VTKM_LOG_IF_S(vtkm::cont::LogLevel::Error,
rank == 0,
"Number of ranks to large for data. Use " << lastDimSize / 2
<< "or fewer ranks");
"Number of ranks too large for data. Use " << lastDimSize / 2
<< "or fewer ranks");
MPI_Finalize();
return EXIT_FAILURE;
}
@ -645,8 +634,8 @@ int main(int argc, char* argv[])
if (nDims == 2)
{
vtkm::Id2 vdims;
vdims[0] = static_cast<vtkm::Id>(currBlockSize);
vdims[1] = static_cast<vtkm::Id>(dims[0]);
vdims[0] = static_cast<vtkm::Id>(dims[0]);
vdims[1] = static_cast<vtkm::Id>(currBlockSize);
vtkm::Vec<ValueType, 2> origin(0, blockIndex * blockSize);
vtkm::Vec<ValueType, 2> spacing(1, 1);
ds = dsb.Create(vdims, origin, spacing);
@ -661,8 +650,8 @@ int main(int argc, char* argv[])
else
{
vtkm::Id3 vdims;
vdims[0] = static_cast<vtkm::Id>(dims[0]);
vdims[1] = static_cast<vtkm::Id>(dims[1]);
vdims[0] = static_cast<vtkm::Id>(dims[1]);
vdims[1] = static_cast<vtkm::Id>(dims[0]);
vdims[2] = static_cast<vtkm::Id>(currBlockSize);
vtkm::Vec<ValueType, 3> origin(0, 0, (blockIndex * blockSize));
vtkm::Vec<ValueType, 3> spacing(1, 1, 1);
@ -683,7 +672,7 @@ int main(int argc, char* argv[])
useDataSet.AppendPartition(ds);
}
}
#endif // WITH_MPI construct input dataset
#endif // WITH_MPI construct input dataset
currTime = totalTime.GetElapsedTime();
buildDatasetTime = currTime - prevTime;
@ -706,6 +695,21 @@ int main(int argc, char* argv[])
vtkm::Float64 computeContourTreeTime = currTime - prevTime;
prevTime = currTime;
#ifdef WITH_MPI
#ifdef DEBUG_PRINT
std::cout << std::flush;
close(out);
std::cerr << std::flush;
close(err);
dup2(save_out, fileno(stdout));
dup2(save_err, fileno(stderr));
close(save_out);
close(save_err);
#endif
#endif
////////////////////////////////////////////
// Compute the branch decomposition
////////////////////////////////////////////
@ -719,12 +723,12 @@ int main(int argc, char* argv[])
ctaug_ns::IdArrayType superarcDependentWeight;
ctaug_ns::IdArrayType supernodeTransferWeight;
ctaug_ns::IdArrayType hyperarcDependentWeight;
ctaug_ns::ProcessContourTree::ComputeVolumeWeights(filter.GetContourTree(),
filter.GetNumIterations(),
superarcIntrinsicWeight, // (output)
superarcDependentWeight, // (output)
supernodeTransferWeight, // (output)
hyperarcDependentWeight); // (output)
ctaug_ns::ProcessContourTree::ComputeVolumeWeightsSerial(filter.GetContourTree(),
filter.GetNumIterations(),
superarcIntrinsicWeight, // (output)
superarcDependentWeight, // (output)
supernodeTransferWeight, // (output)
hyperarcDependentWeight); // (output)
// Record the timings for the branch decomposition
std::stringstream timingsStream; // Use a string stream to log in one message
timingsStream << std::endl;
@ -740,14 +744,14 @@ int main(int argc, char* argv[])
ctaug_ns::IdArrayType branchMaximum;
ctaug_ns::IdArrayType branchSaddle;
ctaug_ns::IdArrayType branchParent;
ctaug_ns::ProcessContourTree::ComputeVolumeBranchDecomposition(filter.GetContourTree(),
superarcDependentWeight,
superarcIntrinsicWeight,
whichBranch, // (output)
branchMinimum, // (output)
branchMaximum, // (output)
branchSaddle, // (output)
branchParent); // (output)
ctaug_ns::ProcessContourTree::ComputeVolumeBranchDecompositionSerial(filter.GetContourTree(),
superarcDependentWeight,
superarcIntrinsicWeight,
whichBranch, // (output)
branchMinimum, // (output)
branchMaximum, // (output)
branchSaddle, // (output)
branchParent); // (output)
// Record and log the branch decompostion timings
timingsStream << " " << std::setw(38) << std::left << "Compute Volume Branch Decomposition"
<< ": " << branchDecompTimer.GetElapsedTime() << " seconds" << std::endl;
@ -866,116 +870,47 @@ int main(int argc, char* argv[])
currTime = totalTime.GetElapsedTime();
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
std::endl
<< " -------------------------- Totals "
<< rank
<< " -----------------------------"
<< std::endl
<< std::setw(42)
<< std::left
<< " Start-up"
<< ": "
<< startUpTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Data Read"
<< ": "
<< dataReadTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Build VTKM Dataset"
<< ": "
<< buildDatasetTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Compute Contour Tree"
<< ": "
<< computeContourTreeTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Compute Branch Decomposition"
<< ": "
<< computeBranchDecompTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Total Time"
<< ": "
<< currTime
<< " seconds");
<< " -------------------------- Totals " << rank
<< " -----------------------------" << std::endl
<< std::setw(42) << std::left << " Start-up"
<< ": " << startUpTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Data Read"
<< ": " << dataReadTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Build VTKM Dataset"
<< ": " << buildDatasetTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Compute Contour Tree"
<< ": " << computeContourTreeTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Compute Branch Decomposition"
<< ": " << computeBranchDecompTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Total Time"
<< ": " << currTime << " seconds");
const ctaug_ns::ContourTree& ct = filter.GetContourTree();
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
std::endl
<< " ---------------- Contour Tree Array Sizes ---------------------"
<< std::endl
<< std::setw(42)
<< std::left
<< " #Nodes"
<< ": "
<< ct.Nodes.GetNumberOfValues()
<< std::endl
<< std::setw(42)
<< std::left
<< " #Arcs"
<< ": "
<< ct.Arcs.GetNumberOfValues()
<< std::endl
<< std::setw(42)
<< std::left
<< " #Superparents"
<< ": "
<< ct.Superparents.GetNumberOfValues()
<< std::endl
<< std::setw(42)
<< std::left
<< " #Superarcs"
<< ": "
<< ct.Superarcs.GetNumberOfValues()
<< std::endl
<< std::setw(42)
<< std::left
<< " #Supernodes"
<< ": "
<< ct.Supernodes.GetNumberOfValues()
<< std::endl
<< std::setw(42)
<< std::left
<< " #Hyperparents"
<< ": "
<< ct.Hyperparents.GetNumberOfValues()
<< std::endl
<< std::setw(42)
<< std::left
<< " #WhenTransferred"
<< ": "
<< ct.WhenTransferred.GetNumberOfValues()
<< std::endl
<< std::setw(42)
<< std::left
<< " #Hypernodes"
<< ": "
<< ct.Hypernodes.GetNumberOfValues()
<< std::endl
<< std::setw(42)
<< std::left
<< " #Hyperarcs"
<< ": "
<< ct.Hyperarcs.GetNumberOfValues()
<< std::endl);
<< " ---------------- Contour Tree Array Sizes ---------------------" << std::endl
<< std::setw(42) << std::left << " #Nodes"
<< ": " << ct.Nodes.GetNumberOfValues() << std::endl
<< std::setw(42) << std::left << " #Arcs"
<< ": " << ct.Arcs.GetNumberOfValues() << std::endl
<< std::setw(42) << std::left << " #Superparents"
<< ": " << ct.Superparents.GetNumberOfValues() << std::endl
<< std::setw(42) << std::left << " #Superarcs"
<< ": " << ct.Superarcs.GetNumberOfValues() << std::endl
<< std::setw(42) << std::left << " #Supernodes"
<< ": " << ct.Supernodes.GetNumberOfValues() << std::endl
<< std::setw(42) << std::left << " #Hyperparents"
<< ": " << ct.Hyperparents.GetNumberOfValues() << std::endl
<< std::setw(42) << std::left << " #WhenTransferred"
<< ": " << ct.WhenTransferred.GetNumberOfValues() << std::endl
<< std::setw(42) << std::left << " #Hypernodes"
<< ": " << ct.Hypernodes.GetNumberOfValues() << std::endl
<< std::setw(42) << std::left << " #Hyperarcs"
<< ": " << ct.Hyperarcs.GetNumberOfValues() << std::endl);
// Print hyperstructure statistics
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
std::endl
<< ct.PrintHyperStructureStatistics(false)
<< std::endl);
<< ct.PrintHyperStructureStatistics(false) << std::endl);
// Flush ouput streams just to make sure everything has been logged (in particular when using MPI)
std::cout << std::flush;

@ -60,7 +60,7 @@ find_package(VTKm REQUIRED QUIET)
####################################
if (VTKm_ENABLE_MPI)
add_executable(ContourTree_Distributed ContourTreeApp.cxx)
target_link_libraries(ContourTree_Distributed vtkm_filter)
target_link_libraries(ContourTree_Distributed vtkm_filter MPI::MPI_CXX)
vtkm_add_target_information(ContourTree_Distributed
MODIFY_CUDA_FLAGS
DEVICE_SOURCES ContourTreeApp.cxx)

@ -160,7 +160,7 @@ int main(int argc, char* argv[])
auto comm = MPI_COMM_WORLD;
// Tell VTK-m which communicator it should use.
vtkm::cont::EnvironmentTracker::SetCommunicator(vtkmdiy::mpi::communicator(comm));
vtkm::cont::EnvironmentTracker::SetCommunicator(vtkmdiy::mpi::communicator());
// get the rank and size
int rank, size;
@ -252,25 +252,14 @@ int main(int argc, char* argv[])
{
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
std::endl
<< " ------------ Settings -----------"
<< std::endl
<< " filename="
<< filename
<< std::endl
<< " device="
<< device.GetName()
<< std::endl
<< " mc="
<< useMarchingCubes
<< std::endl
<< " ------------ Settings -----------" << std::endl
<< " filename=" << filename << std::endl
<< " device=" << device.GetName() << std::endl
<< " mc=" << useMarchingCubes << std::endl
#ifdef ENABLE_SET_NUM_THREADS
<< " numThreads="
<< numThreads
<< std::endl
<< " numThreads=" << numThreads << std::endl
#endif
<< " nblocks="
<< numBlocks
<< std::endl);
<< " nblocks=" << numBlocks << std::endl);
}
currTime = totalTime.GetElapsedTime();
vtkm::Float64 startUpTime = currTime - prevTime;
@ -341,14 +330,9 @@ int main(int argc, char* argv[])
{
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
std::endl
<< " ---------------- Input Mesh Properties --------------"
<< std::endl
<< " Number of dimensions: "
<< nDims
<< std::endl
<< " Number of mesh vertices: "
<< numVertices
<< std::endl);
<< " ---------------- Input Mesh Properties --------------" << std::endl
<< " Number of dimensions: " << nDims << std::endl
<< " Number of mesh vertices: " << numVertices << std::endl);
}
// Check for fatal input errors
@ -359,13 +343,14 @@ int main(int argc, char* argv[])
// Log any errors if found on rank 0
VTKM_LOG_IF_S(vtkm::cont::LogLevel::Error,
invalidNumDimensions && (rank == 0),
"The input mesh is " << nDims << "D. "
"The input data must be either 2D or 3D.");
VTKM_LOG_IF_S(
vtkm::cont::LogLevel::Error,
invalidMCOption && (rank == 0),
"The input mesh is " << nDims << "D. "
<< "Contour tree using marching cubes is only supported for 3D data.");
"The input mesh is " << nDims
<< "D. "
"The input data must be either 2D or 3D.");
VTKM_LOG_IF_S(vtkm::cont::LogLevel::Error,
invalidMCOption && (rank == 0),
"The input mesh is "
<< nDims << "D. "
<< "Contour tree using marching cubes is only supported for 3D data.");
// If we found any errors in the setttings than finalize MPI and exit the execution
if (invalidNumDimensions || invalidMCOption)
{
@ -519,44 +504,18 @@ int main(int argc, char* argv[])
currTime = totalTime.GetElapsedTime();
VTKM_LOG_S(vtkm::cont::LogLevel::Info,
std::endl
<< " -------------------------- Totals "
<< rank
<< " -----------------------------"
<< std::endl
<< std::setw(42)
<< std::left
<< " Start-up"
<< ": "
<< startUpTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Data Read"
<< ": "
<< dataReadTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Build VTKM Dataset"
<< ": "
<< buildDatasetTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Compute Contour Tree"
<< ": "
<< computeContourTreeTime
<< " seconds"
<< std::endl
<< std::setw(42)
<< std::left
<< " Total Time"
<< ": "
<< currTime
<< " seconds");
<< " -------------------------- Totals " << rank
<< " -----------------------------" << std::endl
<< std::setw(42) << std::left << " Start-up"
<< ": " << startUpTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Data Read"
<< ": " << dataReadTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Build VTKM Dataset"
<< ": " << buildDatasetTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Compute Contour Tree"
<< ": " << computeContourTreeTime << " seconds" << std::endl
<< std::setw(42) << std::left << " Total Time"
<< ": " << currTime << " seconds");
// Flush ouput streams just to make sure everything has been logged (in particular when using MPI)
std::cout << std::flush;

@ -52,11 +52,11 @@ void TestCosmoCenterFinder(const char* fileName)
}
vtkm::cont::ArrayHandle<vtkm::Float32> xLocArray =
vtkm::cont::make_ArrayHandle<vtkm::Float32>(xLocation, nParticles);
vtkm::cont::make_ArrayHandle<vtkm::Float32>(xLocation, nParticles, vtkm::CopyFlag::Off);
vtkm::cont::ArrayHandle<vtkm::Float32> yLocArray =
vtkm::cont::make_ArrayHandle<vtkm::Float32>(yLocation, nParticles);
vtkm::cont::make_ArrayHandle<vtkm::Float32>(yLocation, nParticles, vtkm::CopyFlag::Off);
vtkm::cont::ArrayHandle<vtkm::Float32> zLocArray =
vtkm::cont::make_ArrayHandle<vtkm::Float32>(zLocation, nParticles);
vtkm::cont::make_ArrayHandle<vtkm::Float32>(zLocation, nParticles, vtkm::CopyFlag::Off);
// Output MBP particleId pairs array
vtkm::Pair<vtkm::Id, vtkm::Float32> nxnResult;

@ -53,11 +53,11 @@ void TestCosmoHaloFinder(const char* fileName)
}
vtkm::cont::ArrayHandle<vtkm::Float32> xLocArray =
vtkm::cont::make_ArrayHandle<vtkm::Float32>(xLocation, nParticles);
vtkm::cont::make_ArrayHandleMove<vtkm::Float32>(xLocation, nParticles);
vtkm::cont::ArrayHandle<vtkm::Float32> yLocArray =
vtkm::cont::make_ArrayHandle<vtkm::Float32>(yLocation, nParticles);
vtkm::cont::make_ArrayHandleMove<vtkm::Float32>(yLocation, nParticles);
vtkm::cont::ArrayHandle<vtkm::Float32> zLocArray =
vtkm::cont::make_ArrayHandle<vtkm::Float32>(zLocation, nParticles);
vtkm::cont::make_ArrayHandleMove<vtkm::Float32>(zLocation, nParticles);
// Output halo id, mbp id and min potential per particle
vtkm::cont::ArrayHandle<vtkm::Id> resultHaloId;
@ -88,10 +88,6 @@ void TestCosmoHaloFinder(const char* fileName)
xLocArray.ReleaseResources();
yLocArray.ReleaseResources();
zLocArray.ReleaseResources();
delete[] xLocation;
delete[] yLocation;
delete[] zLocation;
}
/////////////////////////////////////////////////////////////////////

@ -25,10 +25,10 @@
// write that image to a file. It then computes an isosurface on the input data set and renders
// this output data set in a separate image file
using vtkm::rendering::MapperVolume;
using vtkm::rendering::MapperRayTracer;
using vtkm::rendering::MapperWireframer;
using vtkm::rendering::CanvasRayTracer;
using vtkm::rendering::MapperRayTracer;
using vtkm::rendering::MapperVolume;
using vtkm::rendering::MapperWireframer;
int main(int argc, char* argv[])
{

@ -17,7 +17,6 @@
#include <iostream>
#include <random>
#include <vtkm/Math.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleCounting.h>
#include <vtkm/cont/DataSetBuilderUniform.h>
@ -29,10 +28,8 @@
#include <vtkm/filter/FilterDataSet.h>
#include <vtkm/worklet/WorkletPointNeighborhood.h>
#include <vtkm/cont/Invoker.h>
#include <vtkm/cont/TryExecute.h>
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
//Suppress warnings about glut being deprecated on OSX
#if (defined(VTKM_GCC) || defined(VTKM_CLANG))

@ -0,0 +1,21 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
project(IsingModel CXX)
#Find the VTK-m package
find_package(VTKm REQUIRED QUIET)
add_executable(Ising Ising.cxx)
target_link_libraries(Ising PRIVATE vtkm_worklet vtkm_io vtkm_rendering)
vtkm_add_target_information(Ising
DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
DEVICE_SOURCES Ising.cxx)

122
examples/ising/Ising.cxx Normal file

@ -0,0 +1,122 @@
//
// Created by ollie on 7/8/20.
//
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
/// Simulation of ferromagnetism using the Ising Model
/// Reference: Computational Physics 2nd Edition, Nicholas Giordano & Hisao Nakanishi
#include <iomanip>
#include <vtkm/cont/ArrayHandleRandomUniformReal.h>
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/DataSetBuilderUniform.h>
#include <vtkm/cont/Initialize.h>
#include <vtkm/rendering/CanvasRayTracer.h>
#include <vtkm/rendering/MapperRayTracer.h>
#include <vtkm/rendering/Scene.h>
#include <vtkm/rendering/View2D.h>
#include <vtkm/worklet/WorkletCellNeighborhood.h>
struct UpDown
{
VTKM_EXEC_CONT vtkm::Float32 operator()(vtkm::Float32 p) const { return p > 0.5 ? 1.0f : -1.0f; }
};
vtkm::cont::DataSet SpinField(vtkm::Id2 dims)
{
auto result =
vtkm::cont::DataSetBuilderUniform::Create(dims, vtkm::Vec2f{ 0, 0 }, vtkm::Vec2f{ 1, 1 });
vtkm::cont::ArrayHandle<vtkm::Float32> spins;
vtkm::cont::ArrayCopy(
vtkm::cont::make_ArrayHandleTransform(
vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float32>(result.GetNumberOfCells()), UpDown{}),
spins);
result.AddCellField("spins", spins);
return result;
}
struct UpdateSpins : public vtkm::worklet::WorkletCellNeighborhood
{
using ControlSignature = void(CellSetIn,
FieldInNeighborhood prevspin,
FieldIn prob,
FieldOut spin);
using ExecutionSignature = void(_2, _3, _4);
template <typename NeighIn>
VTKM_EXEC_CONT void operator()(const NeighIn& prevspin,
vtkm::Float32 p,
vtkm::Float32& spin) const
{
// TODO: what is the real value and unit of the change constant J and Boltzmann constant kB?
const vtkm::Float32 J = 1.f;
const vtkm::Float32 kB = 1.f;
// TODO: temperature in Kelvin
const vtkm::Float32 T = 5.f;
const auto mySpin = prevspin.Get(0, 0, 0);
// 1. Calculate the energy of flipping, E_flip
vtkm::Float32 E_flip = J * mySpin *
(prevspin.Get(-1, -1, 0) + prevspin.Get(-1, 0, 0) + prevspin.Get(-1, 1, 0) +
prevspin.Get(0, -1, 0) + prevspin.Get(0, 1, 0) + prevspin.Get(1, -1, 0) +
prevspin.Get(1, 0, 0) + prevspin.Get(1, 1, 0));
if (E_flip <= 0)
{
// 2. If E_flip <= 0, just flip the spin
spin = -1.f * mySpin;
}
else
{
// 3. otherwise, flip the spin if the Boltzmann factor exp(-E_flip/kB*T) is larger than the
// uniform real random number p.
if (p <= vtkm::Exp(-E_flip / (kB * T)))
spin = -1.f * mySpin;
else
spin = mySpin;
}
}
};
int main(int argc, char** argv)
{
auto opts =
vtkm::cont::InitializeOptions::DefaultAnyDevice | vtkm::cont::InitializeOptions::Strict;
vtkm::cont::Initialize(argc, argv, opts);
auto dataSet = SpinField({ 5, 5 });
vtkm::cont::ArrayHandle<vtkm::Float32> spins;
dataSet.GetCellField("spins").GetData().CopyTo(spins);
vtkm::rendering::Scene scene;
vtkm::rendering::Actor actor(dataSet.GetCellSet(),
dataSet.GetCoordinateSystem(),
dataSet.GetCellField("spins"),
vtkm::cont::ColorTable("Cool To Warm"));
scene.AddActor(actor);
vtkm::rendering::CanvasRayTracer canvas(1024, 1024);
vtkm::rendering::MapperRayTracer mapper;
mapper.SetShadingOn(false);
vtkm::rendering::View2D view(scene, mapper, canvas);
view.Paint();
view.SaveAs("spin0.png");
vtkm::cont::Invoker invoker;
for (vtkm::UInt32 i = 1; i < 10; ++i)
{
vtkm::cont::ArrayHandleRandomUniformReal<vtkm::Float32> prob(dataSet.GetNumberOfCells(), { i });
invoker(UpdateSpins{}, dataSet.GetCellSet(), spins, prob, spins);
view.Paint();
view.SaveAs("spin" + std::to_string(i) + ".png");
}
}

@ -42,7 +42,7 @@ vtkm::cont::DataSet make_test3DImageData(vtkm::Id3 dims)
vtkm::cont::ArrayHandle<vtkm::Vec3f> field;
vtkm::cont::Invoker invoke;
invoke(WaveField{}, ds.GetCoordinateSystem(), field);
invoke(WaveField{}, ds.GetCoordinateSystem().GetDataAsMultiplexer(), field);
ds.AddPointField("vec_field", field);
return ds;

@ -102,7 +102,12 @@ void read_oscillators(std::string filePath, vtkm::source::Oscillator& source)
// ArcticViewer helper
// ----------------------------------------------------------------------------
void writeData(std::string& basePath, int timestep, int iSize, int jSize, int kSize, double* values)
void writeData(std::string& basePath,
int timestep,
int iSize,
int jSize,
int kSize,
const double* values)
{
int size = iSize * jSize * kSize;
std::ostringstream timeValues;
@ -158,7 +163,7 @@ void writeData(std::string& basePath, int timestep, int iSize, int jSize, int kS
else
{
int stackSize = size * 8;
dataFilePathPointer.write((char*)values, stackSize);
dataFilePathPointer.write(reinterpret_cast<const char*>(values), stackSize);
dataFilePathPointer.flush();
dataFilePathPointer.close();
}
@ -313,9 +318,9 @@ int main(int argc, char** argv)
vtkm::cont::DataSet rdata = source.Execute();
if (generateOutput)
{
vtkm::cont::ArrayHandle<vtkm::Float64> tmp;
vtkm::cont::ArrayHandleBasic<vtkm::Float64> tmp;
rdata.GetField("scalars", vtkm::cont::Field::Association::POINTS).GetData().CopyTo(tmp);
double* values = tmp.GetStorage().GetArray();
const double* values = tmp.GetReadPointer();
writeData(outputDirectory, count++, sizeX, sizeY, sizeZ, values);
}

@ -71,7 +71,7 @@ int main(int argc, char** argv)
p.ID = i;
seeds.push_back(p);
}
auto seedArray = vtkm::cont::make_ArrayHandle(seeds);
auto seedArray = vtkm::cont::make_ArrayHandle(seeds, vtkm::CopyFlag::Off);
//compute streamlines
vtkm::filter::Streamline streamline;

@ -223,11 +223,12 @@ inline VTKM_CONT vtkm::cont::PartitionedDataSet RedistributePoints::PrepareForEx
vtkmdiy::RegularDecomposer<vtkmdiy::ContinuousBounds> decomposer(
/*dim*/ 3, internal::convert(gbounds), assigner.nblocks());
vtkmdiy::Master master(comm,
/*threads*/ 1,
/*limit*/ -1,
[]() -> void* { return new vtkm::cont::DataSet(); },
[](void* ptr) { delete static_cast<vtkm::cont::DataSet*>(ptr); });
vtkmdiy::Master master(
comm,
/*threads*/ 1,
/*limit*/ -1,
[]() -> void* { return new vtkm::cont::DataSet(); },
[](void* ptr) { delete static_cast<vtkm::cont::DataSet*>(ptr); });
decomposer.decompose(comm.rank(), assigner, master);
assert(static_cast<vtkm::Id>(master.size()) == input.GetNumberOfPartitions());

@ -0,0 +1,27 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##============================================================================
cmake_minimum_required(VERSION 3.12...3.15 FATAL_ERROR)
project(StreamlineMPI CXX)
#Find the VTK-m package
find_package(VTKm REQUIRED QUIET)
if (VTKm_ENABLE_MPI)
add_executable(StreamlineMPI StreamlineMPI.cxx)
target_compile_definitions(StreamlineMPI PRIVATE "MPI_ENABLED")
target_link_libraries(StreamlineMPI PRIVATE vtkm_filter vtkm_io MPI::MPI_CXX)
vtkm_add_target_information(StreamlineMPI
DROP_UNUSED_SYMBOLS MODIFY_CUDA_FLAGS
DEVICE_SOURCES StreamlineMPI.cxx)
endif()
#if(TARGET vtkm::tbb)
# target_compile_definitions(streamline_mpi PRIVATE BUILDING_TBB_VERSION)
#endif()

@ -0,0 +1,120 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#include <vtkm/cont/AssignerPartitionedDataSet.h>
#include <vtkm/cont/DataSet.h>
#include <vtkm/cont/EnvironmentTracker.h>
#include <vtkm/cont/Field.h>
#include <vtkm/cont/Initialize.h>
#include <vtkm/cont/PartitionedDataSet.h>
#include <vtkm/filter/Streamline.h>
#include <vtkm/io/VTKDataSetReader.h>
#include <vtkm/io/VTKDataSetWriter.h>
#include <vtkm/io/reader/VTKDataSetReader.h>
#include <mpi.h>
#include <vtkm/thirdparty/diy/diy.h>
#include <vtkm/thirdparty/diy/mpi-cast.h>
#include <vtkm/filter/ParticleAdvection.h>
#include <vtkm/filter/particleadvection/BoundsMap.h>
#include <vtkm/filter/particleadvection/ParticleMessenger.h>
void LoadData(std::string& fname, std::vector<vtkm::cont::DataSet>& dataSets, int rank, int nRanks)
{
std::string buff;
std::ifstream is;
is.open(fname);
std::cout << "Opening: " << fname << std::endl;
if (!is)
{
std::cout << "File not found! : " << fname << std::endl;
throw "unknown file: " + fname;
}
auto p0 = fname.rfind(".visit");
if (p0 == std::string::npos)
throw "Only .visit files are supported.";
auto tmp = fname.substr(0, p0);
auto p1 = tmp.rfind("/");
auto dir = tmp.substr(0, p1);
std::getline(is, buff);
auto numBlocks = std::stoi(buff.substr(buff.find("!NBLOCKS ") + 9, buff.size()));
if (rank == 0)
std::cout << "numBlocks= " << numBlocks << std::endl;
int nPer = numBlocks / nRanks;
int b0 = rank * nPer, b1 = (rank + 1) * nPer;
if (rank == (nRanks - 1))
b1 = numBlocks;
for (int i = 0; i < numBlocks; i++)
{
std::getline(is, buff);
if (i >= b0 && i < b1)
{
vtkm::cont::DataSet ds;
std::string vtkFile = dir + "/" + buff;
vtkm::io::reader::VTKDataSetReader reader(vtkFile);
ds = reader.ReadDataSet();
auto f = ds.GetField("grad").GetData();
vtkm::cont::ArrayHandle<vtkm::Vec<double, 3>> fieldArray;
fieldArray = f.Cast<vtkm::cont::ArrayHandle<vtkm::Vec<double, 3>>>();
int n = fieldArray.GetNumberOfValues();
auto portal = fieldArray.WritePortal();
for (int ii = 0; ii < n; ii++)
portal.Set(ii, vtkm::Vec<double, 3>(1, 0, 0));
dataSets.push_back(ds);
}
}
}
// Example computing streamlines.
// An example vector field is available in the vtk-m data directory: magField.vtk
// Example usage:
// this will advect 200 particles 50 steps using a step size of 0.01
//
// Particle_Advection <path-to-data-dir>/magField.vtk vec 200 50 0.01 output.vtk
//
int main(int argc, char** argv)
{
MPI_Init(&argc, &argv);
auto comm = vtkm::cont::EnvironmentTracker::GetCommunicator();
int rank = comm.rank();
int size = comm.size();
std::string dataFile = argv[1];
std::vector<vtkm::cont::DataSet> dataSets;
LoadData(dataFile, dataSets, rank, size);
vtkm::filter::ParticleAdvection pa;
vtkm::cont::ArrayHandle<vtkm::Particle> seedArray;
std::vector<vtkm::Particle> seeds;
seeds.push_back(vtkm::Particle(vtkm::Vec3f(.1f, .1f, .9f), 0));
seeds.push_back(vtkm::Particle(vtkm::Vec3f(.1f, .6f, .6f), 1));
seeds.push_back(vtkm::Particle(vtkm::Vec3f(.1f, .9f, .1f), 2));
seedArray = vtkm::cont::make_ArrayHandle(seeds);
pa.SetStepSize(0.001f);
pa.SetNumberOfSteps(10000);
pa.SetSeeds(seedArray);
pa.SetActiveField("grad");
vtkm::cont::PartitionedDataSet pds(dataSets);
auto output = pa.Execute(pds);
output.PrintSummary(std::cout);
return 0;
}

@ -15,6 +15,17 @@
#include <cassert>
// Pick up conditions where we want to turn on/off assert.
#ifndef VTKM_NO_ASSERT
#if defined(NDEBUG)
#define VTKM_NO_ASSERT
#elif defined(VTKM_CUDA_DEVICE_PASS) && defined(VTKM_NO_ASSERT_CUDA)
#define VTKM_NO_ASSERT
#elif defined(VTKM_HIP) && defined(VTKM_NO_ASSERT_HIP)
#define VTKM_NO_ASSERT
#endif
#endif // VTKM_NO_ASSERT
/// \def VTKM_ASSERT(condition)
///
/// Asserts that \a condition resolves to true. If \a condition is false,
@ -28,11 +39,7 @@
///
/// The VTKM_NO_ASSERT cmake and preprocessor option allows debugging builds
/// to remove assertions for performance reasons.
#if defined(VTKM_CUDA_VERSION_MAJOR) && (VTKM_CUDA_VERSION_MAJOR == 7)
//CUDA 7.5 doesn't support assert in device code
#define VTKM_ASSERT(condition) (void)(condition)
#elif !defined(NDEBUG) && !defined(VTKM_NO_ASSERT)
//Only assert if we are in debug mode and don't have VTKM_NO_ASSERT defined
#ifndef VTKM_NO_ASSERT
#define VTKM_ASSERT(condition) assert(condition)
#define VTKM_ASSERTS_CHECKED
#else

821
vtkm/Atomic.h Normal file

@ -0,0 +1,821 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
//
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//============================================================================
#ifndef vtk_m_Atomic_h
#define vtk_m_Atomic_h
#include <vtkm/List.h>
#include <vtkm/internal/Windows.h>
#include <atomic>
namespace vtkm
{
/// \brief Specifies memory order semantics for atomic operations.
///
/// The memory order parameter controls how all other memory operations are
/// ordered around a specific atomic instruction.
///
/// Memory access is complicated. Compilers can reorder instructions to optimize
/// scheduling, processors can speculatively read memory, and caches make
/// assumptions about coherency that we may not normally be aware of. Because of
/// this complexity, the order in which multiple updates to shared memory become
/// visible to other threads is not guaranteed, nor is it guaranteed that each
/// thread will see memory updates occur in the same order as any other thread.
/// This can lead to surprising behavior and cause problems when using atomics
/// to communicate between threads.
///
/// These problems are solved by using a standard set of memory orderings which
/// describe common access patterns used for shared memory programming. Their
/// goal is to provide guarantees that changes made in one thread will be visible
/// to another thread at a specific and predictable point in execution, regardless
/// of any hardware or compiler optimizations.
///
/// If unsure, use `SequentiallyConsistent` memory orderings. It will "do the right
/// thing", but at the cost of increased and possibly unnecessary memory ordering
/// restrictions. The other orderings are optimizations that are only applicable
/// in very specific situations.
///
/// See https://en.cppreference.com/w/cpp/atomic/memory_order for a detailed
/// description of the different orderings and their usage.
///
/// The memory order semantics follow those of other common atomic operations such as
/// the `std::memory_order` identifiers used for `std::atomic`.
///
/// Note that when a memory order is specified, the enforced memory order is guaranteed
/// to be as good or better than that requested.
///
enum class MemoryOrder
{
/// An atomic operations with `Relaxed` memory order enforces no synchronization or ordering
/// constraints on local reads and writes. That is, a read or write to a local, non-atomic
/// variable may be moved to before or after an atomic operation with `Relaxed` memory order.
///
Relaxed,
/// A load operation with `Acquire` memory order will enforce that any local read or write
/// operations listed in the program after the atomic will happen after the atomic.
///
Acquire,
/// A store operation with `Release` memory order will enforce that any local read or write
/// operations listed in the program before the atomic will happen before the atomic.
///
Release,
/// A read-modify-write operation with `AcquireAndRelease` memory order will enforce that any
/// local read or write operations listed in the program before the atomic will happen before the
/// atomic and likewise any read or write operations listed in the program after the atomic will
/// happen after the atomic.
///
AcquireAndRelease,
/// An atomic with `SequentiallyConsistent` memory order will enforce any appropriate semantics
/// as `Acquire`, `Release`, and `AcquireAndRelease`. Additionally, `SequentiallyConsistent` will
/// enforce a consistent ordering of atomic operations across all threads. That is, all threads
/// observe the modifications in the same order.
///
SequentiallyConsistent
};
namespace internal
{
VTKM_EXEC_CONT inline std::memory_order StdAtomicMemOrder(vtkm::MemoryOrder order)
{
switch (order)
{
case vtkm::MemoryOrder::Relaxed:
return std::memory_order_relaxed;
case vtkm::MemoryOrder::Acquire:
return std::memory_order_acquire;
case vtkm::MemoryOrder::Release:
return std::memory_order_release;
case vtkm::MemoryOrder::AcquireAndRelease:
return std::memory_order_acq_rel;
case vtkm::MemoryOrder::SequentiallyConsistent:
return std::memory_order_seq_cst;
}
// Should never reach here, but avoid compiler warnings
return std::memory_order_seq_cst;
}
} // namespace internal
} // namespace vtkm
#if defined(VTKM_CUDA_DEVICE_PASS)
namespace vtkm
{
namespace detail
{
// Fence to ensure that previous non-atomic stores are visible to other threads.
VTKM_EXEC_CONT inline void AtomicStoreFence(vtkm::MemoryOrder order)
{
if ((order == vtkm::MemoryOrder::Release) || (order == vtkm::MemoryOrder::AcquireAndRelease) ||
(order == vtkm::MemoryOrder::SequentiallyConsistent))
{
__threadfence();
}
}
// Fence to ensure that previous non-atomic stores are visible to other threads.
VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
{
if ((order == vtkm::MemoryOrder::Acquire) || (order == vtkm::MemoryOrder::AcquireAndRelease) ||
(order == vtkm::MemoryOrder::SequentiallyConsistent))
{
__threadfence();
}
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
{
const volatile T* vaddr = addr; /* volatile to bypass cache*/
if (order == vtkm::MemoryOrder::SequentiallyConsistent)
{
__threadfence();
}
const T value = *vaddr;
/* fence to ensure that dependent reads are correctly ordered */
AtomicLoadFence(order);
return value;
}
template <typename T>
VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
{
volatile T* vaddr = addr; /* volatile to bypass cache */
/* fence to ensure that previous non-atomic stores are visible to other threads */
AtomicStoreFence(order);
*vaddr = value;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
auto result = atomicAdd(addr, arg);
AtomicLoadFence(order);
return result;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
auto result = atomicAnd(addr, mask);
AtomicLoadFence(order);
return result;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
auto result = atomicOr(addr, mask);
AtomicLoadFence(order);
return result;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
auto result = atomicXor(addr, mask);
AtomicLoadFence(order);
return result;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
{
return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
T desired,
T expected,
vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
auto result = atomicCAS(addr, expected, desired);
AtomicLoadFence(order);
return result;
}
}
} // namespace vtkm::detail
#elif defined(VTKM_ENABLE_KOKKOS)
VTKM_THIRDPARTY_PRE_INCLUDE
// Superhack! Kokkos_Macros.hpp defines macros to include modifiers like __device__.
// However, we don't want to actually use those if compiling this with a standard
// C++ compiler (because this particular code does not run on a device). Thus,
// we want to disable that behavior when not using the device compiler. To do that,
// we are going to have to load the KokkosCore_config.h file (which you are not
// supposed to do), then undefine the device enables if necessary, then load
// Kokkos_Macros.hpp to finish the state.
#ifndef KOKKOS_MACROS_HPP
#define KOKKOS_MACROS_HPP
#include <KokkosCore_config.h>
#undef KOKKOS_MACROS_HPP
#define KOKKOS_DONT_INCLUDE_CORE_CONFIG_H
#if defined(KOKKOS_ENABLE_CUDA) && !defined(VTKM_CUDA)
#undef KOKKOS_ENABLE_CUDA
#endif
#endif //KOKKOS_MACROS_HPP not loaded
#include <Kokkos_Core.hpp>
VTKM_THIRDPARTY_POST_INCLUDE
namespace vtkm
{
namespace detail
{
// Fence to ensure that previous non-atomic stores are visible to other threads.
VTKM_EXEC_CONT inline void AtomicStoreFence(vtkm::MemoryOrder order)
{
if ((order == vtkm::MemoryOrder::Release) || (order == vtkm::MemoryOrder::AcquireAndRelease) ||
(order == vtkm::MemoryOrder::SequentiallyConsistent))
{
Kokkos::memory_fence();
}
}
// Fence to ensure that previous non-atomic stores are visible to other threads.
VTKM_EXEC_CONT inline void AtomicLoadFence(vtkm::MemoryOrder order)
{
if ((order == vtkm::MemoryOrder::Acquire) || (order == vtkm::MemoryOrder::AcquireAndRelease) ||
(order == vtkm::MemoryOrder::SequentiallyConsistent))
{
Kokkos::memory_fence();
}
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
{
switch (order)
{
case vtkm::MemoryOrder::Relaxed:
return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_relaxed);
case vtkm::MemoryOrder::Acquire:
case vtkm::MemoryOrder::Release: // Release doesn't make sense. Use Acquire.
case vtkm::MemoryOrder::AcquireAndRelease: // Release doesn't make sense. Use Acquire.
return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_acquire);
case vtkm::MemoryOrder::SequentiallyConsistent:
return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_seq_cst);
}
// Should never reach here, but avoid compiler warnings
return Kokkos::Impl::atomic_load(addr, Kokkos::Impl::memory_order_seq_cst);
}
template <typename T>
VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
{
switch (order)
{
case vtkm::MemoryOrder::Relaxed:
Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_relaxed);
break;
case vtkm::MemoryOrder::Acquire: // Acquire doesn't make sense. Use Release.
case vtkm::MemoryOrder::Release:
case vtkm::MemoryOrder::AcquireAndRelease: // Acquire doesn't make sense. Use Release.
Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_release);
break;
case vtkm::MemoryOrder::SequentiallyConsistent:
Kokkos::Impl::atomic_store(addr, value, Kokkos::Impl::memory_order_seq_cst);
break;
}
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
T result = Kokkos::atomic_fetch_add(addr, arg);
AtomicLoadFence(order);
return result;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
T result = Kokkos::atomic_fetch_and(addr, mask);
AtomicLoadFence(order);
return result;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
T result = Kokkos::atomic_fetch_or(addr, mask);
AtomicLoadFence(order);
return result;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
T result = Kokkos::atomic_fetch_xor(addr, mask);
AtomicLoadFence(order);
return result;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
{
return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
T desired,
T expected,
vtkm::MemoryOrder order)
{
AtomicStoreFence(order);
T result = Kokkos::atomic_compare_exchange(addr, expected, desired);
AtomicLoadFence(order);
return result;
}
}
} // namespace vtkm::detail
#elif defined(VTKM_MSVC)
// Supports vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64
#include <cstdint>
#include <cstring>
#include <intrin.h> // For MSVC atomics
namespace vtkm
{
namespace detail
{
template <typename To, typename From>
VTKM_EXEC_CONT inline To BitCast(const From& src)
{
// The memcpy should be removed by the compiler when possible, but this
// works around a host of issues with bitcasting using reinterpret_cast.
VTKM_STATIC_ASSERT(sizeof(From) == sizeof(To));
To dst;
std::memcpy(&dst, &src, sizeof(From));
return dst;
}
template <typename T>
VTKM_EXEC_CONT inline T BitCast(T&& src)
{
return std::forward<T>(src);
}
// Note about Load and Store implementations:
//
// "Simple reads and writes to properly-aligned 32-bit variables are atomic
// operations"
//
// "Simple reads and writes to properly aligned 64-bit variables are atomic on
// 64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
// atomic on 32-bit Windows."
//
// "Reads and writes to variables of other sizes [than 32 or 64 bits] are not
// guaranteed to be atomic on any platform."
//
// https://docs.microsoft.com/en-us/windows/desktop/sync/interlocked-variable-access
VTKM_EXEC_CONT inline vtkm::UInt8 AtomicLoadImpl(const vtkm::UInt8* addr, vtkm::MemoryOrder order)
{
// This assumes that the memory interface is smart enough to load a 32-bit
// word atomically and a properly aligned 8-bit word from it.
// We could build address masks and do shifts to perform this manually if
// this assumption is incorrect.
auto result = *static_cast<volatile const vtkm::UInt8*>(addr);
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
return result;
}
VTKM_EXEC_CONT inline vtkm::UInt16 AtomicLoadImpl(const vtkm::UInt16* addr, vtkm::MemoryOrder order)
{
// This assumes that the memory interface is smart enough to load a 32-bit
// word atomically and a properly aligned 16-bit word from it.
// We could build address masks and do shifts to perform this manually if
// this assumption is incorrect.
auto result = *static_cast<volatile const vtkm::UInt16*>(addr);
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
return result;
}
VTKM_EXEC_CONT inline vtkm::UInt32 AtomicLoadImpl(const vtkm::UInt32* addr, vtkm::MemoryOrder order)
{
auto result = *static_cast<volatile const vtkm::UInt32*>(addr);
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
return result;
}
VTKM_EXEC_CONT inline vtkm::UInt64 AtomicLoadImpl(const vtkm::UInt64* addr, vtkm::MemoryOrder order)
{
auto result = *static_cast<volatile const vtkm::UInt64*>(addr);
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
return result;
}
VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt8* addr,
vtkm::UInt8 val,
vtkm::MemoryOrder order)
{
// There doesn't seem to be an atomic store instruction in the windows
// API, so just exchange and discard the result.
_InterlockedExchange8(reinterpret_cast<volatile CHAR*>(addr), BitCast<CHAR>(val));
}
VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt16* addr,
vtkm::UInt16 val,
vtkm::MemoryOrder order)
{
// There doesn't seem to be an atomic store instruction in the windows
// API, so just exchange and discard the result.
_InterlockedExchange16(reinterpret_cast<volatile SHORT*>(addr), BitCast<SHORT>(val));
}
VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt32* addr,
vtkm::UInt32 val,
vtkm::MemoryOrder order)
{
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
*addr = val;
}
VTKM_EXEC_CONT inline void AtomicStoreImpl(vtkm::UInt64* addr,
vtkm::UInt64 val,
vtkm::MemoryOrder order)
{
std::atomic_thread_fence(internal::StdAtomicMemOrder(order));
*addr = val;
}
#define VTKM_ATOMIC_OP(vtkmName, winName, vtkmType, winType, suffix) \
VTKM_EXEC_CONT inline vtkmType vtkmName(vtkmType* addr, vtkmType arg, vtkm::MemoryOrder order) \
{ \
return BitCast<vtkmType>( \
winName##suffix(reinterpret_cast<volatile winType*>(addr), BitCast<winType>(arg))); \
}
#define VTKM_ATOMIC_OPS_FOR_TYPE(vtkmType, winType, suffix) \
VTKM_ATOMIC_OP(AtomicAddImpl, _InterlockedExchangeAdd, vtkmType, winType, suffix) \
VTKM_ATOMIC_OP(AtomicAndImpl, _InterlockedAnd, vtkmType, winType, suffix) \
VTKM_ATOMIC_OP(AtomicOrImpl, _InterlockedOr, vtkmType, winType, suffix) \
VTKM_ATOMIC_OP(AtomicXorImpl, _InterlockedXor, vtkmType, winType, suffix) \
VTKM_EXEC_CONT inline vtkmType AtomicNotImpl(vtkmType* addr, vtkm::MemoryOrder order) \
{ \
return AtomicXorImpl(addr, static_cast<vtkmType>(~vtkmType{ 0u }), order); \
} \
VTKM_EXEC_CONT inline vtkmType AtomicCompareAndSwapImpl( \
vtkmType* addr, vtkmType desired, vtkmType expected, vtkm::MemoryOrder order) \
{ \
return BitCast<vtkmType>( \
_InterlockedCompareExchange##suffix(reinterpret_cast<volatile winType*>(addr), \
BitCast<winType>(desired), \
BitCast<winType>(expected))); \
}
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt8, CHAR, 8)
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt16, SHORT, 16)
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt32, LONG, )
VTKM_ATOMIC_OPS_FOR_TYPE(vtkm::UInt64, LONG64, 64)
#undef VTKM_ATOMIC_OPS_FOR_TYPE
}
} // namespace vtkm::detail
#else // gcc/clang for CPU
// Supports vtkm::UInt8, vtkm::UInt16, vtkm::UInt32, vtkm::UInt64
#include <cstdint>
#include <cstring>
namespace vtkm
{
namespace detail
{
VTKM_EXEC_CONT inline int GccAtomicMemOrder(vtkm::MemoryOrder order)
{
switch (order)
{
case vtkm::MemoryOrder::Relaxed:
return __ATOMIC_RELAXED;
case vtkm::MemoryOrder::Acquire:
return __ATOMIC_ACQUIRE;
case vtkm::MemoryOrder::Release:
return __ATOMIC_RELEASE;
case vtkm::MemoryOrder::AcquireAndRelease:
return __ATOMIC_ACQ_REL;
case vtkm::MemoryOrder::SequentiallyConsistent:
return __ATOMIC_SEQ_CST;
}
// Should never reach here, but avoid compiler warnings
return __ATOMIC_SEQ_CST;
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicLoadImpl(const T* addr, vtkm::MemoryOrder order)
{
return __atomic_load_n(addr, GccAtomicMemOrder(order));
}
template <typename T>
VTKM_EXEC_CONT inline void AtomicStoreImpl(T* addr, T value, vtkm::MemoryOrder order)
{
return __atomic_store_n(addr, value, GccAtomicMemOrder(order));
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicAddImpl(T* addr, T arg, vtkm::MemoryOrder order)
{
return __atomic_fetch_add(addr, arg, GccAtomicMemOrder(order));
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicAndImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
return __atomic_fetch_and(addr, mask, GccAtomicMemOrder(order));
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicOrImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
return __atomic_fetch_or(addr, mask, GccAtomicMemOrder(order));
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicXorImpl(T* addr, T mask, vtkm::MemoryOrder order)
{
return __atomic_fetch_xor(addr, mask, GccAtomicMemOrder(order));
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicNotImpl(T* addr, vtkm::MemoryOrder order)
{
return AtomicXorImpl(addr, static_cast<T>(~T{ 0u }), order);
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicCompareAndSwapImpl(T* addr,
T desired,
T expected,
vtkm::MemoryOrder order)
{
__atomic_compare_exchange_n(
addr, &expected, desired, false, GccAtomicMemOrder(order), GccAtomicMemOrder(order));
return expected;
}
}
} // namespace vtkm::detail
#endif // gcc/clang
namespace vtkm
{
namespace detail
{
template <typename T>
using OppositeSign = typename std::conditional<std::is_signed<T>::value,
typename std::make_unsigned<T>::type,
typename std::make_signed<T>::type>::type;
} // namespace detail
/// \brief The preferred type to use for atomic operations.
///
using AtomicTypePreferred = vtkm::UInt32;
/// \brief A list of types that can be used with atomic operations.
///
/// TODO: Adjust based on devices being compiled.
///
/// BUG: vtkm::UInt64 is provided in this list even though it is not supported on CUDA
/// before compute capability 3.5.
///
using AtomicTypesSupported = vtkm::List<vtkm::UInt32, vtkm::UInt64>;
/// \brief Atomic function to load a value from a shared memory location.
///
/// Given a pointer, returns the value in that pointer. If other threads are writing to
/// that same location, the returned value will be consistent to what was present before
/// or after that write.
///
template <typename T>
VTKM_EXEC_CONT inline T AtomicLoad(const T* pointer,
vtkm::MemoryOrder order = vtkm::MemoryOrder::Acquire)
{
return detail::AtomicLoadImpl(pointer, order);
}
///@{
/// \brief Atomic function to save a value to a shared memory location.
///
/// Given a pointer and a value, stores that value at the pointer's location. If two
/// threads are simultaneously using `AtomicStore` at the same location, the resulting
/// value will be one of the values or the other (as opposed to a mix of bits).
///
template <typename T>
VTKM_EXEC_CONT inline void AtomicStore(T* pointer,
T value,
vtkm::MemoryOrder order = vtkm::MemoryOrder::Release)
{
detail::AtomicStoreImpl(pointer, value, order);
}
template <typename T>
VTKM_EXEC_CONT inline void AtomicStore(T* pointer,
detail::OppositeSign<T> value,
vtkm::MemoryOrder order = vtkm::MemoryOrder::Release)
{
detail::AtomicStoreImpl(pointer, static_cast<T>(value), order);
}
///@}
///@{
/// \brief Atomic function to add a value to a shared memory location.
///
/// Given a pointer and an operand, adds the operand to the value at the given memory
/// location. The result of the addition is put into that memory location and the
/// _old_ value that was originally in the memory is returned. For example, if you
/// call `AtomicAdd` on a memory location that holds a 5 with an operand of 3, the
/// value of 8 is stored in the memory location and the value of 5 is returned.
///
/// If multiple threads call `AtomicAdd` simultaneously, they will not interfere with
/// each other. The result will be consistent as if one was called before the other
/// (although it is indeterminate which will be applied first).
///
template <typename T>
VTKM_EXEC_CONT inline T AtomicAdd(
T* pointer,
T operand,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicAddImpl(pointer, operand, order);
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicAdd(
T* pointer,
detail::OppositeSign<T> operand,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicAddImpl(pointer, static_cast<T>(operand), order);
}
///@}
///@{
/// \brief Atomic function to AND bits to a shared memory location.
///
/// Given a pointer and an operand, performs a bitwise AND of the operand and thevalue at the given
/// memory location. The result of the AND is put into that memory location and the _old_ value
/// that was originally in the memory is returned. For example, if you call `AtomicAnd` on a memory
/// location that holds a 0x6 with an operand of 0x3, the value of 0x2 is stored in the memory
/// location and the value of 0x6 is returned.
///
/// If multiple threads call `AtomicAnd` simultaneously, they will not interfere with
/// each other. The result will be consistent as if one was called before the other
/// (although it is indeterminate which will be applied first).
///
template <typename T>
VTKM_EXEC_CONT inline T AtomicAnd(
T* pointer,
T operand,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicAndImpl(pointer, operand, order);
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicAnd(
T* pointer,
detail::OppositeSign<T> operand,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicAndImpl(pointer, static_cast<T>(operand), order);
}
///@}
///@{
/// \brief Atomic function to OR bits to a shared memory location.
///
/// Given a pointer and an operand, performs a bitwise OR of the operand and the value at the given
/// memory location. The result of the OR is put into that memory location and the _old_ value
/// that was originally in the memory is returned. For example, if you call `AtomicOr` on a memory
/// location that holds a 0x6 with an operand of 0x3, the value of 0x7 is stored in the memory
/// location and the value of 0x6 is returned.
///
/// If multiple threads call `AtomicOr` simultaneously, they will not interfere with
/// each other. The result will be consistent as if one was called before the other
/// (although it is indeterminate which will be applied first).
///
template <typename T>
VTKM_EXEC_CONT inline T
AtomicOr(T* pointer, T operand, vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicOrImpl(pointer, operand, order);
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicOr(
T* pointer,
detail::OppositeSign<T> operand,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicOrImpl(pointer, static_cast<T>(operand), order);
}
///@}
///@{
/// \brief Atomic function to XOR bits to a shared memory location.
///
/// Given a pointer and an operand, performs a bitwise exclusive-OR of the operand and the value at
/// the given memory location. The result of the XOR is put into that memory location and the _old_
/// value that was originally in the memory is returned. For example, if you call `AtomicXor` on a
/// memory location that holds a 0x6 with an operand of 0x3, the value of 0x5 is stored in the
/// memory location and the value of 0x6 is returned.
///
/// If multiple threads call `AtomicXor` simultaneously, they will not interfere with
/// each other. The result will be consistent as if one was called before the other.
///
template <typename T>
VTKM_EXEC_CONT inline T AtomicXor(
T* pointer,
T operand,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicXorImpl(pointer, operand, order);
}
template <typename T>
VTKM_EXEC_CONT inline T AtomicXor(
T* pointer,
detail::OppositeSign<T> operand,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicXorImpl(pointer, static_cast<T>(operand), order);
}
///@}
/// \brief Atomic function to NOT bits to a shared memory location.
///
/// Given a pointer, performs a bitwise NOT of the value at the given
/// memory location. The result of the NOT is put into that memory location and the _old_ value
/// that was originally in the memory is returned.
///
/// If multiple threads call `AtomicNot` simultaneously, they will not interfere with
/// each other. The result will be consistent as if one was called before the other.
///
template <typename T>
VTKM_EXEC_CONT inline T AtomicNot(
T* pointer,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicNotImpl(pointer, order);
}
/// \brief Atomic function that replaces a value given a condition.
///
/// Given a pointer, a new desired value, and an expected value, replaces the value at the
/// pointer if it is the same as the expected value with the new desired value. If the original
/// value in the pointer does not equal the expected value, then the memory at the pointer
/// remains unchanged. In either case, the function returns the _old_ original value that
/// was at the pointer.
///
/// If multiple threads call `AtomicCompareAndSwap` simultaneously, the result will be consistent
/// as if one was called before the other (although it is indeterminate which will be applied
/// first).
///
template <typename T>
VTKM_EXEC_CONT inline T AtomicCompareAndSwap(
T* pointer,
T desired,
T expected,
vtkm::MemoryOrder order = vtkm::MemoryOrder::SequentiallyConsistent)
{
return detail::AtomicCompareAndSwapImpl(pointer, desired, expected, order);
}
} // namespace vtkm
#endif //vtk_m_Atomic_h

@ -55,6 +55,11 @@ struct Bitset
return ((this->Mask & (static_cast<MaskType>(1) << bitIndex)) != 0);
}
VTKM_EXEC_CONT bool operator==(const vtkm::Bitset<MaskType>& otherBitset) const
{
return this->Mask == otherBitset.Mask;
}
private:
MaskType Mask = 0;
};

@ -19,6 +19,7 @@ vtkm_install_headers(
set(headers
Algorithms.h
Assert.h
Atomic.h
BinaryPredicates.h
BinaryOperators.h
Bitset.h

@ -79,8 +79,8 @@ struct CellShapeTagVtkmToVtkc;
/// concept check to make sure that a template argument is a proper cell shape
/// tag.
///
#define VTKM_IS_CELL_SHAPE_TAG(tag) \
VTKM_STATIC_ASSERT_MSG(::vtkm::internal::CellShapeTagCheck<tag>::value, \
#define VTKM_IS_CELL_SHAPE_TAG(tag) \
VTKM_STATIC_ASSERT_MSG(::vtkm::internal::CellShapeTagCheck<tag>::value, \
"Provided type is not a valid VTK-m cell shape tag.")
/// A traits-like class to get an CellShapeId known at compile time to a tag.
@ -98,32 +98,32 @@ struct CellShapeIdToTag
// Define a tag for each cell shape as well as the support structs to go
// between tags and ids. The following macro is only valid here.
#define VTKM_DEFINE_CELL_TAG(name, idname) \
struct CellShapeTag##name \
{ \
static constexpr vtkm::UInt8 Id = vtkm::idname; \
}; \
namespace internal \
{ \
template <> \
struct CellShapeTagCheck<vtkm::CellShapeTag##name> : std::true_type \
{ \
}; \
template <> \
struct CellShapeTagVtkmToVtkc<vtkm::CellShapeTag##name> \
{ \
using Type = lcl::name; \
}; \
} \
static inline VTKM_EXEC_CONT const char* GetCellShapeName(vtkm::CellShapeTag##name) \
{ \
return #name; \
} \
template <> \
struct CellShapeIdToTag<vtkm::idname> \
{ \
using valid = std::true_type; \
using Tag = vtkm::CellShapeTag##name; \
#define VTKM_DEFINE_CELL_TAG(name, idname) \
struct CellShapeTag##name \
{ \
static constexpr vtkm::UInt8 Id = vtkm::idname; \
}; \
namespace internal \
{ \
template <> \
struct CellShapeTagCheck<vtkm::CellShapeTag##name> : std::true_type \
{ \
}; \
template <> \
struct CellShapeTagVtkmToVtkc<vtkm::CellShapeTag##name> \
{ \
using Type = lcl::name; \
}; \
} \
static inline VTKM_EXEC_CONT const char* GetCellShapeName(vtkm::CellShapeTag##name) \
{ \
return #name; \
} \
template <> \
struct CellShapeIdToTag<vtkm::idname> \
{ \
using valid = std::true_type; \
using Tag = vtkm::CellShapeTag##name; \
}
VTKM_DEFINE_CELL_TAG(Empty, CELL_SHAPE_EMPTY);
@ -189,12 +189,12 @@ inline lcl::Cell make_LclCellShapeTag(const vtkm::CellShapeTagGeneric& tag,
} // namespace internal
#define vtkmGenericCellShapeMacroCase(cellShapeId, call) \
case vtkm::cellShapeId: \
{ \
using CellShapeTag = vtkm::CellShapeIdToTag<vtkm::cellShapeId>::Tag; \
call; \
} \
#define vtkmGenericCellShapeMacroCase(cellShapeId, call) \
case vtkm::cellShapeId: \
{ \
using CellShapeTag = vtkm::CellShapeIdToTag<vtkm::cellShapeId>::Tag; \
call; \
} \
break
/// \brief A macro used in a \c switch statement to determine cell shape.
@ -227,17 +227,17 @@ inline lcl::Cell make_LclCellShapeTag(const vtkm::CellShapeTagGeneric& tag,
/// Note that \c vtkmGenericCellShapeMacro does not have a default case. You
/// should consider adding one that gives a
///
#define vtkmGenericCellShapeMacro(call) \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_EMPTY, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_VERTEX, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_LINE, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_POLY_LINE, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_TRIANGLE, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_POLYGON, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_QUAD, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_TETRA, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_HEXAHEDRON, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_WEDGE, call); \
#define vtkmGenericCellShapeMacro(call) \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_EMPTY, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_VERTEX, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_LINE, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_POLY_LINE, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_TRIANGLE, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_POLYGON, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_QUAD, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_TETRA, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_HEXAHEDRON, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_WEDGE, call); \
vtkmGenericCellShapeMacroCase(CELL_SHAPE_PYRAMID, call)
} // namespace vtkm

@ -81,23 +81,23 @@ struct CellTraits
// Define traits for every cell type.
#define VTKM_DEFINE_CELL_TRAITS(name, dimensions, numPoints) \
template <> \
struct CellTraits<vtkm::CellShapeTag##name> \
{ \
static constexpr vtkm::IdComponent TOPOLOGICAL_DIMENSIONS = dimensions; \
using TopologicalDimensionsTag = vtkm::CellTopologicalDimensionsTag<TOPOLOGICAL_DIMENSIONS>; \
using IsSizeFixed = vtkm::CellTraitsTagSizeFixed; \
static constexpr vtkm::IdComponent NUM_POINTS = numPoints; \
#define VTKM_DEFINE_CELL_TRAITS(name, dimensions, numPoints) \
template <> \
struct CellTraits<vtkm::CellShapeTag##name> \
{ \
static constexpr vtkm::IdComponent TOPOLOGICAL_DIMENSIONS = dimensions; \
using TopologicalDimensionsTag = vtkm::CellTopologicalDimensionsTag<TOPOLOGICAL_DIMENSIONS>; \
using IsSizeFixed = vtkm::CellTraitsTagSizeFixed; \
static constexpr vtkm::IdComponent NUM_POINTS = numPoints; \
}
#define VTKM_DEFINE_CELL_TRAITS_VARIABLE(name, dimensions) \
template <> \
struct CellTraits<vtkm::CellShapeTag##name> \
{ \
static constexpr vtkm::IdComponent TOPOLOGICAL_DIMENSIONS = dimensions; \
using TopologicalDimensionsTag = vtkm::CellTopologicalDimensionsTag<TOPOLOGICAL_DIMENSIONS>; \
using IsSizeFixed = vtkm::CellTraitsTagSizeVariable; \
#define VTKM_DEFINE_CELL_TRAITS_VARIABLE(name, dimensions) \
template <> \
struct CellTraits<vtkm::CellShapeTag##name> \
{ \
static constexpr vtkm::IdComponent TOPOLOGICAL_DIMENSIONS = dimensions; \
using TopologicalDimensionsTag = vtkm::CellTopologicalDimensionsTag<TOPOLOGICAL_DIMENSIONS>; \
using IsSizeFixed = vtkm::CellTraitsTagSizeVariable; \
}
VTKM_DEFINE_CELL_TRAITS(Empty, 0, 0);

@ -13,9 +13,9 @@
#include <vtkm/StaticAssert.h>
#include <vtkm/Types.h>
#define VTK_M_DEPRECATED_MAKE_MESSAGE(...) \
#define VTK_M_DEPRECATED_MAKE_MESSAGE(...) \
VTKM_EXPAND(VTK_M_DEPRECATED_MAKE_MESSAGE_IMPL(__VA_ARGS__, "", vtkm::internal::NullType{}))
#define VTK_M_DEPRECATED_MAKE_MESSAGE_IMPL(version, message, ...) \
#define VTK_M_DEPRECATED_MAKE_MESSAGE_IMPL(version, message, ...) \
message " Deprecated in version " #version "."
/// \def VTKM_DEPRECATED(version, message)
@ -104,7 +104,7 @@
#if defined(VTKM_GCC) || defined(VTKM_CLANG)
#define VTKM_DEPRECATED_SUPPRESS_SUPPORTED
#define VTKM_DEPRECATED_SUPPRESS_BEGIN \
#define VTKM_DEPRECATED_SUPPRESS_BEGIN \
_Pragma("GCC diagnostic push") _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
#define VTKM_DEPRECATED_SUPPRESS_END _Pragma("GCC diagnostic pop")

@ -108,14 +108,14 @@ VTKM_EXEC_CONT inline vtkm::ErrorCode LclErrorToVtkmError(lcl::ErrorCode code) n
} // namespace vtkm
#define VTKM_RETURN_ON_ERROR(call) \
do \
{ \
auto status = (call); \
if (status != ::vtkm::ErrorCode::Success) \
{ \
return status; \
} \
#define VTKM_RETURN_ON_ERROR(call) \
do \
{ \
auto status = (call); \
if (status != ::vtkm::ErrorCode::Success) \
{ \
return status; \
} \
} while (false)
#endif //vtk_m_exec_ErrorCode_h

@ -16,7 +16,7 @@ namespace vtkm
template <typename CoordType, int Dim, bool IsTwoSided>
template <int Dim_, typename std::enable_if<Dim_ == 2, int>::type>
Ray<CoordType, Dim, IsTwoSided>::Ray()
VTKM_EXEC_CONT Ray<CoordType, Dim, IsTwoSided>::Ray()
: Origin{ 0.f }
, Direction{ 1.f, 0.f }
{
@ -24,50 +24,42 @@ Ray<CoordType, Dim, IsTwoSided>::Ray()
template <typename CoordType, int Dim, bool IsTwoSided>
template <int Dim_, typename std::enable_if<Dim_ == 3, int>::type>
Ray<CoordType, Dim, IsTwoSided>::Ray()
VTKM_EXEC_CONT Ray<CoordType, Dim, IsTwoSided>::Ray()
: Origin{ 0.f }
, Direction{ 1.f, 0.f, 0.f }
{
}
template <typename CoordType, int Dim, bool IsTwoSided>
Ray<CoordType, Dim, IsTwoSided>::Ray(const LineSegment<CoordType, Dim>& segment)
VTKM_EXEC_CONT Ray<CoordType, Dim, IsTwoSided>::Ray(const LineSegment<CoordType, Dim>& segment)
: Origin(segment.Endpoints[0])
, Direction(vtkm::Normal(segment.Direction()))
{
}
template <typename CoordType, int Dim, bool IsTwoSided>
Ray<CoordType, Dim, IsTwoSided>::Ray(const Vector& point, const Vector& direction)
VTKM_EXEC_CONT Ray<CoordType, Dim, IsTwoSided>::Ray(const Vector& point, const Vector& direction)
: Origin(point)
, Direction(vtkm::Normal(direction))
{
}
template <typename CoordType, int Dim, bool IsTwoSided>
typename Ray<CoordType, Dim, IsTwoSided>::Vector Ray<CoordType, Dim, IsTwoSided>::Evaluate(
CoordType param) const
VTKM_EXEC_CONT typename Ray<CoordType, Dim, IsTwoSided>::Vector
Ray<CoordType, Dim, IsTwoSided>::Evaluate(CoordType param) const
{
auto pointOnLine = this->Origin + this->Direction * param;
return pointOnLine;
}
template <typename CoordType, int Dim, bool IsTwoSided>
bool Ray<CoordType, Dim, IsTwoSided>::IsValid() const
VTKM_EXEC_CONT bool Ray<CoordType, Dim, IsTwoSided>::IsValid() const
{
// At least on Ubuntu 17.10, cuda 9.1 will fail with an internal
// compiler error when calling vtkm::IsInf() here. But the fix
// below works. The fix should be removed as soon as our dashboards
// allow it.
#if __CUDACC_VER_MAJOR__ == 9 && __CUDACC_VER_MINOR__ == 1
return !isinf(this->Direction[0]);
#else
return !vtkm::IsInf(this->Direction[0]);
#endif
}
template <typename CoordType, int Dim, bool IsTwoSided>
CoordType Ray<CoordType, Dim, IsTwoSided>::DistanceTo(const Vector& point) const
VTKM_EXEC_CONT CoordType Ray<CoordType, Dim, IsTwoSided>::DistanceTo(const Vector& point) const
{
Vector closest;
CoordType param;
@ -75,9 +67,9 @@ CoordType Ray<CoordType, Dim, IsTwoSided>::DistanceTo(const Vector& point) const
}
template <typename CoordType, int Dim, bool IsTwoSided>
CoordType Ray<CoordType, Dim, IsTwoSided>::DistanceTo(const Vector& point,
CoordType& param,
Vector& projectedPoint) const
VTKM_EXEC_CONT CoordType Ray<CoordType, Dim, IsTwoSided>::DistanceTo(const Vector& point,
CoordType& param,
Vector& projectedPoint) const
{
const auto& dir = this->Direction;
auto mag2 = vtkm::MagnitudeSquared(dir);
@ -105,9 +97,10 @@ CoordType Ray<CoordType, Dim, IsTwoSided>::DistanceTo(const Vector& point,
template <typename CoordType, int Dim, bool IsTwoSided>
template <bool OtherTwoSided, int Dim_, typename std::enable_if<Dim_ == 2, int>::type>
bool Ray<CoordType, Dim, IsTwoSided>::Intersect(const Ray<CoordType, Dim, OtherTwoSided>& other,
Vector& point,
CoordType tol)
VTKM_EXEC_CONT bool Ray<CoordType, Dim, IsTwoSided>::Intersect(
const Ray<CoordType, Dim, OtherTwoSided>& other,
Vector& point,
CoordType tol)
{
auto d1 = this->Direction;
auto d2 = other.Direction;
@ -139,33 +132,33 @@ bool Ray<CoordType, Dim, IsTwoSided>::Intersect(const Ray<CoordType, Dim, OtherT
template <typename CoordType, int Dim>
template <int Dim_, typename std::enable_if<Dim_ == 2, int>::type>
LineSegment<CoordType, Dim>::LineSegment()
VTKM_EXEC_CONT LineSegment<CoordType, Dim>::LineSegment()
: Endpoints{ { 0.f }, { 1.f, 0.f } }
{
}
template <typename CoordType, int Dim>
template <int Dim_, typename std::enable_if<Dim_ == 3, int>::type>
LineSegment<CoordType, Dim>::LineSegment()
VTKM_EXEC_CONT LineSegment<CoordType, Dim>::LineSegment()
: Endpoints{ { 0.f }, { 1.f, 0.f, 0.f } }
{
}
template <typename CoordType, int Dim>
LineSegment<CoordType, Dim>::LineSegment(const Vector& p0, const Vector& p1)
VTKM_EXEC_CONT LineSegment<CoordType, Dim>::LineSegment(const Vector& p0, const Vector& p1)
: Endpoints{ p0, p1 }
{
}
template <typename CoordType, int Dim>
bool LineSegment<CoordType, Dim>::IsSingular(CoordType tol2) const
VTKM_EXEC_CONT bool LineSegment<CoordType, Dim>::IsSingular(CoordType tol2) const
{
return vtkm::MagnitudeSquared(this->Direction()) < tol2;
}
template <typename CoordType, int Dim>
template <int Dim_, typename std::enable_if<Dim_ == 2, int>::type>
Ray<CoordType, Dim, true> LineSegment<CoordType, Dim>::PerpendicularBisector() const
VTKM_EXEC_CONT Ray<CoordType, Dim, true> LineSegment<CoordType, Dim>::PerpendicularBisector() const
{
const Vector dir = this->Direction();
const Vector perp(-dir[1], dir[0]);
@ -175,13 +168,13 @@ Ray<CoordType, Dim, true> LineSegment<CoordType, Dim>::PerpendicularBisector() c
template <typename CoordType, int Dim>
template <int Dim_, typename std::enable_if<Dim_ == 3, int>::type>
Plane<CoordType> LineSegment<CoordType, Dim>::PerpendicularBisector() const
VTKM_EXEC_CONT Plane<CoordType> LineSegment<CoordType, Dim>::PerpendicularBisector() const
{
return Plane<CoordType>(this->Center(), this->Direction());
}
template <typename CoordType, int Dim>
typename LineSegment<CoordType, Dim>::Vector LineSegment<CoordType, Dim>::Evaluate(
VTKM_EXEC_CONT typename LineSegment<CoordType, Dim>::Vector LineSegment<CoordType, Dim>::Evaluate(
CoordType param) const
{
auto pointOnLine = this->Endpoints[0] * (1.0f - param) + this->Endpoints[1] * param;
@ -189,7 +182,7 @@ typename LineSegment<CoordType, Dim>::Vector LineSegment<CoordType, Dim>::Evalua
}
template <typename CoordType, int Dim>
CoordType LineSegment<CoordType, Dim>::DistanceTo(const Vector& point) const
VTKM_EXEC_CONT CoordType LineSegment<CoordType, Dim>::DistanceTo(const Vector& point) const
{
Vector closest;
CoordType param;
@ -197,9 +190,9 @@ CoordType LineSegment<CoordType, Dim>::DistanceTo(const Vector& point) const
}
template <typename CoordType, int Dim>
CoordType LineSegment<CoordType, Dim>::DistanceTo(const Vector& point,
CoordType& param,
Vector& projectedPoint) const
VTKM_EXEC_CONT CoordType LineSegment<CoordType, Dim>::DistanceTo(const Vector& point,
CoordType& param,
Vector& projectedPoint) const
{
auto dir = this->Endpoints[1] - this->Endpoints[0];
auto mag2 = vtkm::MagnitudeSquared(dir);
@ -224,9 +217,10 @@ CoordType LineSegment<CoordType, Dim>::DistanceTo(const Vector& point,
template <typename CoordType, int Dim>
template <int Dim_, typename std::enable_if<Dim_ == 2, int>::type>
bool LineSegment<CoordType, Dim>::IntersectInfinite(const LineSegment<CoordType, Dim>& other,
Vector& point,
CoordType tol)
VTKM_EXEC_CONT bool LineSegment<CoordType, Dim>::IntersectInfinite(
const LineSegment<CoordType, Dim>& other,
Vector& point,
CoordType tol)
{
auto d1 = this->Direction();
auto d2 = other.Direction();
@ -249,14 +243,14 @@ bool LineSegment<CoordType, Dim>::IntersectInfinite(const LineSegment<CoordType,
// Plane
template <typename CoordType>
Plane<CoordType>::Plane()
VTKM_EXEC_CONT VTKM_EXEC_CONT Plane<CoordType>::Plane()
: Origin{ 0.f, 0.f, 0.f }
, Normal{ 0.f, 0.f, 1.f }
{
}
template <typename CoordType>
Plane<CoordType>::Plane(const Vector& origin, const Vector& normal, CoordType tol2)
VTKM_EXEC_CONT Plane<CoordType>::Plane(const Vector& origin, const Vector& normal, CoordType tol2)
: Origin(origin)
, Normal(vtkm::Normal(normal))
{
@ -268,14 +262,15 @@ Plane<CoordType>::Plane(const Vector& origin, const Vector& normal, CoordType to
}
template <typename CoordType>
CoordType Plane<CoordType>::DistanceTo(const Vector& point) const
VTKM_EXEC_CONT CoordType Plane<CoordType>::DistanceTo(const Vector& point) const
{
auto dist = vtkm::Dot(point - this->Origin, this->Normal);
return dist;
}
template <typename CoordType>
typename Plane<CoordType>::Vector Plane<CoordType>::ClosestPoint(const Vector& point) const
VTKM_EXEC_CONT typename Plane<CoordType>::Vector Plane<CoordType>::ClosestPoint(
const Vector& point) const
{
auto vop = vtkm::Project(point - this->Origin, this->Normal);
auto closest = point - vop;
@ -284,11 +279,11 @@ typename Plane<CoordType>::Vector Plane<CoordType>::ClosestPoint(const Vector& p
template <typename CoordType>
template <bool IsTwoSided>
bool Plane<CoordType>::Intersect(const Ray<CoordType, 3, IsTwoSided>& ray,
CoordType& parameter,
Vector& point,
bool& lineInPlane,
CoordType tol) const
VTKM_EXEC_CONT bool Plane<CoordType>::Intersect(const Ray<CoordType, 3, IsTwoSided>& ray,
CoordType& parameter,
Vector& point,
bool& lineInPlane,
CoordType tol) const
{
CoordType d0 = this->DistanceTo(ray.Origin);
CoordType dirDot = vtkm::Dot(this->Normal, ray.Direction);
@ -330,19 +325,19 @@ bool Plane<CoordType>::Intersect(const Ray<CoordType, 3, IsTwoSided>& ray,
}
template <typename CoordType>
bool Plane<CoordType>::Intersect(const LineSegment<CoordType>& segment,
CoordType& parameter,
bool& lineInPlane) const
VTKM_EXEC_CONT bool Plane<CoordType>::Intersect(const LineSegment<CoordType>& segment,
CoordType& parameter,
bool& lineInPlane) const
{
Vector point;
return this->Intersect(segment, parameter, point, lineInPlane);
}
template <typename CoordType>
bool Plane<CoordType>::Intersect(const LineSegment<CoordType>& segment,
CoordType& parameter,
Vector& point,
bool& lineInPlane) const
VTKM_EXEC_CONT bool Plane<CoordType>::Intersect(const LineSegment<CoordType>& segment,
CoordType& parameter,
Vector& point,
bool& lineInPlane) const
{
CoordType d0 = this->DistanceTo(segment.Endpoints[0]);
CoordType d1 = this->DistanceTo(segment.Endpoints[1]);
@ -394,10 +389,10 @@ bool Plane<CoordType>::Intersect(const LineSegment<CoordType>& segment,
}
template <typename CoordType>
bool Plane<CoordType>::Intersect(const Plane<CoordType>& other,
Ray<CoordType, 3, true>& ray,
bool& coincident,
CoordType tol2) const
VTKM_EXEC_CONT bool Plane<CoordType>::Intersect(const Plane<CoordType>& other,
Ray<CoordType, 3, true>& ray,
bool& coincident,
CoordType tol2) const
{
auto dir = vtkm::Cross(this->Normal, other.Normal);
auto mag2 = vtkm::MagnitudeSquared(dir);
@ -434,27 +429,27 @@ bool Plane<CoordType>::Intersect(const Plane<CoordType>& other,
// Sphere
template <typename CoordType, int Dim>
Sphere<CoordType, Dim>::Sphere()
VTKM_EXEC_CONT Sphere<CoordType, Dim>::Sphere()
: Center{ 0.f }
, Radius(static_cast<CoordType>(1.f))
{
}
template <typename CoordType, int Dim>
Sphere<CoordType, Dim>::Sphere(const Vector& center, CoordType radius)
VTKM_EXEC_CONT Sphere<CoordType, Dim>::Sphere(const Vector& center, CoordType radius)
: Center(center)
, Radius(radius <= 0.f ? static_cast<CoordType>(-1.0f) : radius)
{
}
template <typename CoordType, int Dim>
bool Sphere<CoordType, Dim>::Contains(const Vector& point, CoordType tol2) const
VTKM_EXEC_CONT bool Sphere<CoordType, Dim>::Contains(const Vector& point, CoordType tol2) const
{
return this->Classify(point, tol2) < 0;
}
template <typename CoordType, int Dim>
int Sphere<CoordType, Dim>::Classify(const Vector& point, CoordType tol2) const
VTKM_EXEC_CONT int Sphere<CoordType, Dim>::Classify(const Vector& point, CoordType tol2) const
{
if (!this->IsValid())
{
@ -469,16 +464,17 @@ int Sphere<CoordType, Dim>::Classify(const Vector& point, CoordType tol2) const
// Construction techniques
template <typename CoordType, bool IsTwoSided>
vtkm::Plane<CoordType> make_PlaneFromPointAndLine(const vtkm::Vec<CoordType, 3>& point,
const vtkm::Ray<CoordType, 3, IsTwoSided>& ray,
CoordType tol2)
VTKM_EXEC_CONT vtkm::Plane<CoordType> make_PlaneFromPointAndLine(
const vtkm::Vec<CoordType, 3>& point,
const vtkm::Ray<CoordType, 3, IsTwoSided>& ray,
CoordType tol2)
{
auto tmpDir = point - ray.Origin;
return vtkm::Plane<CoordType>(point, vtkm::Cross(ray.Direction, tmpDir), tol2);
}
template <typename CoordType>
vtkm::Plane<CoordType> make_PlaneFromPointAndLineSegment(
VTKM_EXEC_CONT vtkm::Plane<CoordType> make_PlaneFromPointAndLineSegment(
const vtkm::Vec<CoordType, 3>& point,
const vtkm::LineSegment3<CoordType>& segment,
CoordType tol2)
@ -488,10 +484,11 @@ vtkm::Plane<CoordType> make_PlaneFromPointAndLineSegment(
}
template <typename CoordType>
vtkm::Circle<CoordType> make_CircleFrom3Points(const typename vtkm::Vec<CoordType, 2>& p0,
const typename vtkm::Vec<CoordType, 2>& p1,
const typename vtkm::Vec<CoordType, 2>& p2,
CoordType tol)
VTKM_EXEC_CONT vtkm::Circle<CoordType> make_CircleFrom3Points(
const typename vtkm::Vec<CoordType, 2>& p0,
const typename vtkm::Vec<CoordType, 2>& p1,
const typename vtkm::Vec<CoordType, 2>& p2,
CoordType tol)
{
constexpr int Dim = 2;
using Vector = typename vtkm::Circle<CoordType>::Vector;
@ -518,11 +515,11 @@ vtkm::Circle<CoordType> make_CircleFrom3Points(const typename vtkm::Vec<CoordTyp
}
template <typename CoordType>
vtkm::Sphere<CoordType, 3> make_SphereFrom4Points(const vtkm::Vec<CoordType, 3>& a0,
const vtkm::Vec<CoordType, 3>& a1,
const vtkm::Vec<CoordType, 3>& a2,
const vtkm::Vec<CoordType, 3>& a3,
CoordType tol)
VTKM_EXEC_CONT vtkm::Sphere<CoordType, 3> make_SphereFrom4Points(const vtkm::Vec<CoordType, 3>& a0,
const vtkm::Vec<CoordType, 3>& a1,
const vtkm::Vec<CoordType, 3>& a2,
const vtkm::Vec<CoordType, 3>& a3,
CoordType tol)
{
// Choose p3 such that the min(p3 - p[012]) is larger than any other choice of p3.
// From: http://steve.hollasch.net/cgindex/geometry/sphere4pts.html,

@ -665,22 +665,18 @@ private:
} // namespace vtkm
#ifdef VTKM_CUDA
// Cuda seems to have a bug where it expects the template class VirtualObjectTransfer
// to be instantiated in a consistent order among all the translation units of an
// executable. Failing to do so results in random crashes and incorrect results.
// We workaroud this issue by explicitly instantiating VirtualObjectTransfer for
// all the implicit functions here.
#include <vtkm/cont/cuda/internal/VirtualObjectTransferCuda.h>
#ifdef VTKM_CUDA
#include <vtkm/cont/internal/VirtualObjectTransferInstantiate.h>
VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Box);
VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Cylinder);
VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Frustum);
VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Plane);
VTKM_EXPLICITLY_INSTANTIATE_TRANSFER(vtkm::Sphere);
#endif
#endif //vtk_m_ImplicitFunction_h

@ -57,8 +57,8 @@ using IsList = typename vtkm::internal::IsListImpl<T>::type;
/// actually a device adapter tag. (You can get weird errors elsewhere in the
/// code when a mistake is made.)
///
#define VTKM_IS_LIST(type) \
VTKM_STATIC_ASSERT_MSG((::vtkm::internal::IsList<type>::value), \
#define VTKM_IS_LIST(type) \
VTKM_STATIC_ASSERT_MSG((::vtkm::internal::IsList<type>::value), \
"Provided type is not a valid VTK-m list type.")
namespace detail
@ -226,8 +226,7 @@ template <vtkm::IdComponent NumSearched,
typename... Ts>
struct FindFirstOfType<NumSearched, Target, T0, T1, T2, T3, T4, T5, Ts...>
: FindFirstOfSplit4<(std::is_same<Target, T0>::value || std::is_same<Target, T1>::value ||
std::is_same<Target, T2>::value ||
std::is_same<Target, T3>::value),
std::is_same<Target, T2>::value || std::is_same<Target, T3>::value),
NumSearched,
Target,
T0,
@ -257,8 +256,7 @@ template <vtkm::IdComponent NumSearched,
typename... Ts>
struct FindFirstOfSplit8<true, NumSearched, Target, T0, T1, T2, T3, T4, T5, T6, T7, Ts...>
: FindFirstOfSplit4<(std::is_same<Target, T0>::value || std::is_same<Target, T1>::value ||
std::is_same<Target, T2>::value ||
std::is_same<Target, T3>::value),
std::is_same<Target, T2>::value || std::is_same<Target, T3>::value),
NumSearched,
Target,
T0,
@ -305,12 +303,9 @@ template <vtkm::IdComponent NumSearched,
typename... Ts>
struct FindFirstOfType<NumSearched, Target, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, Ts...>
: FindFirstOfSplit8<(std::is_same<Target, T0>::value || std::is_same<Target, T1>::value ||
std::is_same<Target, T2>::value ||
std::is_same<Target, T3>::value ||
std::is_same<Target, T4>::value ||
std::is_same<Target, T5>::value ||
std::is_same<Target, T6>::value ||
std::is_same<Target, T7>::value),
std::is_same<Target, T2>::value || std::is_same<Target, T3>::value ||
std::is_same<Target, T4>::value || std::is_same<Target, T5>::value ||
std::is_same<Target, T6>::value || std::is_same<Target, T7>::value),
NumSearched,
Target,
T0,

@ -103,8 +103,8 @@ struct VTKM_DEPRECATED(1.6, "VTKM_IS_LIST_TAG replaced with VTKM_IS_LIST.") List
/// actually a device adapter tag. (You can get weird errors elsewhere in the
/// code when a mistake is made.)
///
#define VTKM_IS_LIST_TAG(tag) \
VTKM_STATIC_ASSERT_MSG((::vtkm::detail::ListTagAssert<tag>::value), \
#define VTKM_IS_LIST_TAG(tag) \
VTKM_STATIC_ASSERT_MSG((::vtkm::detail::ListTagAssert<tag>::value), \
"Provided type is not a valid VTK-m list tag.")
namespace internal

@ -17,8 +17,10 @@
#include <vtkm/Types.h>
#include <vtkm/VecTraits.h>
#include <limits> // must be found with or without CUDA.
#ifndef VTKM_CUDA
#include <cmath>
#include <cstring>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
@ -2584,6 +2586,105 @@ inline VTKM_EXEC_CONT vtkm::Float64 Ldexp(vtkm::Float64 x, vtkm::Int32 exponent)
#endif
}
// See: https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/ for why this works.
inline VTKM_EXEC_CONT vtkm::UInt64 FloatDistance(vtkm::Float64 x, vtkm::Float64 y)
{
static_assert(sizeof(vtkm::Float64) == sizeof(vtkm::UInt64), "vtkm::Float64 is incorrect size.");
static_assert(std::numeric_limits<vtkm::Float64>::has_denorm == std::denorm_present, "FloatDistance presumes the floating-point type has subnormal numbers.");
if (!vtkm::IsFinite(x) || !vtkm::IsFinite(y)) {
return 0xFFFFFFFFFFFFFFFFL;
}
// Signed zero is the sworn enemy of this process.
if (y == 0) {
y = vtkm::Abs(y);
}
if (x == 0) {
x = vtkm::Abs(x);
}
if ( (x < 0 && y >= 0) || (x >= 0 && y < 0) )
{
vtkm::UInt64 dx, dy;
if (x < 0) {
dy = FloatDistance(0.0, y);
dx = FloatDistance(0.0, -x);
}
else {
dy = FloatDistance(0.0, -y);
dx = FloatDistance(0.0, x);
}
return dx + dy;
}
if (x < 0 && y < 0) {
return FloatDistance(-x, -y);
}
// Note that:
// int64_t xi = *reinterpret_cast<int64_t*>(&x);
// int64_t yi = *reinterpret_cast<int64_t*>(&y);
// also works, but generates warnings.
// Good option to have if we get compile errors off memcpy or don't want to #include <cstring> though.
// At least on gcc, both versions generate the same assembly.
vtkm::UInt64 xi;
vtkm::UInt64 yi;
memcpy(&xi, &x, sizeof(vtkm::UInt64));
memcpy(&yi, &y, sizeof(vtkm::UInt64));
if (yi > xi) {
return yi - xi;
}
return xi - yi;
}
inline VTKM_EXEC_CONT vtkm::UInt64 FloatDistance(vtkm::Float32 x, vtkm::Float32 y)
{
static_assert(sizeof(vtkm::Float32) == sizeof(vtkm::Int32), "vtkm::Float32 is incorrect size.");
static_assert(std::numeric_limits<vtkm::Float32>::has_denorm == std::denorm_present, "FloatDistance presumes the floating-point type has subnormal numbers.");
if (!vtkm::IsFinite(x) || !vtkm::IsFinite(y)) {
return 0xFFFFFFFFFFFFFFFFL;
}
if (y == 0) {
y = vtkm::Abs(y);
}
if (x == 0) {
x = vtkm::Abs(x);
}
if ( (x < 0 && y >= 0) || (x >= 0 && y < 0) )
{
vtkm::UInt64 dx, dy;
if (x < 0) {
dy = FloatDistance(0.0f, y);
dx = FloatDistance(0.0f, -x);
}
else {
dy = FloatDistance(0.0f, -y);
dx = FloatDistance(0.0f, x);
}
return dx + dy;
}
if (x < 0 && y < 0) {
return FloatDistance(-x, -y);
}
vtkm::UInt32 xi_32;
vtkm::UInt32 yi_32;
memcpy(&xi_32, &x, sizeof(vtkm::UInt32));
memcpy(&yi_32, &y, sizeof(vtkm::UInt32));
vtkm::UInt64 xi = xi_32;
vtkm::UInt64 yi = yi_32;
if (yi > xi) {
return yi - xi;
}
return xi - yi;
}
/// Bitwise operations
///

@ -29,8 +29,10 @@ $# Ignore the following comment. It is meant for the generated file.
#include <vtkm/Types.h>
#include <vtkm/VecTraits.h>
#include <limits> // must be found with or without CUDA.
#ifndef VTKM_CUDA
#include <cmath>
#include <cstring>
#include <limits.h>
#include <math.h>
#include <stdlib.h>
@ -1186,6 +1188,105 @@ inline VTKM_EXEC_CONT vtkm::Float64 Ldexp(vtkm::Float64 x, vtkm::Int32 exponent)
#endif
}
// See: https://randomascii.wordpress.com/2012/01/23/stupid-float-tricks-2/ for why this works.
inline VTKM_EXEC_CONT vtkm::UInt64 FloatDistance(vtkm::Float64 x, vtkm::Float64 y)
{
static_assert(sizeof(vtkm::Float64) == sizeof(vtkm::UInt64), "vtkm::Float64 is incorrect size.");
static_assert(std::numeric_limits<vtkm::Float64>::has_denorm == std::denorm_present, "FloatDistance presumes the floating-point type has subnormal numbers.");
if (!vtkm::IsFinite(x) || !vtkm::IsFinite(y)) {
return 0xFFFFFFFFFFFFFFFFL;
}
// Signed zero is the sworn enemy of this process.
if (y == 0) {
y = vtkm::Abs(y);
}
if (x == 0) {
x = vtkm::Abs(x);
}
if ( (x < 0 && y >= 0) || (x >= 0 && y < 0) )
{
vtkm::UInt64 dx, dy;
if (x < 0) {
dy = FloatDistance(0.0, y);
dx = FloatDistance(0.0, -x);
}
else {
dy = FloatDistance(0.0, -y);
dx = FloatDistance(0.0, x);
}
return dx + dy;
}
if (x < 0 && y < 0) {
return FloatDistance(-x, -y);
}
// Note that:
// int64_t xi = *reinterpret_cast<int64_t*>(&x);
// int64_t yi = *reinterpret_cast<int64_t*>(&y);
// also works, but generates warnings.
// Good option to have if we get compile errors off memcpy or don't want to #include <cstring> though.
// At least on gcc, both versions generate the same assembly.
vtkm::UInt64 xi;
vtkm::UInt64 yi;
memcpy(&xi, &x, sizeof(vtkm::UInt64));
memcpy(&yi, &y, sizeof(vtkm::UInt64));
if (yi > xi) {
return yi - xi;
}
return xi - yi;
}
inline VTKM_EXEC_CONT vtkm::UInt64 FloatDistance(vtkm::Float32 x, vtkm::Float32 y)
{
static_assert(sizeof(vtkm::Float32) == sizeof(vtkm::Int32), "vtkm::Float32 is incorrect size.");
static_assert(std::numeric_limits<vtkm::Float32>::has_denorm == std::denorm_present, "FloatDistance presumes the floating-point type has subnormal numbers.");
if (!vtkm::IsFinite(x) || !vtkm::IsFinite(y)) {
return 0xFFFFFFFFFFFFFFFFL;
}
if (y == 0) {
y = vtkm::Abs(y);
}
if (x == 0) {
x = vtkm::Abs(x);
}
if ( (x < 0 && y >= 0) || (x >= 0 && y < 0) )
{
vtkm::UInt64 dx, dy;
if (x < 0) {
dy = FloatDistance(0.0f, y);
dx = FloatDistance(0.0f, -x);
}
else {
dy = FloatDistance(0.0f, -y);
dx = FloatDistance(0.0f, x);
}
return dx + dy;
}
if (x < 0 && y < 0) {
return FloatDistance(-x, -y);
}
vtkm::UInt32 xi_32;
vtkm::UInt32 yi_32;
memcpy(&xi_32, &x, sizeof(vtkm::UInt32));
memcpy(&yi_32, &y, sizeof(vtkm::UInt32));
vtkm::UInt64 xi = xi_32;
vtkm::UInt64 yi = yi_32;
if (yi > xi) {
return yi - xi;
}
return xi - yi;
}
/// Bitwise operations
///

Some files were not shown because too many files have changed in this diff Show More