Add initial version of an OpenMP backend.

This commit is contained in:
Allison Vacanti 2018-03-01 16:20:31 -05:00
parent 7b5ad3e80c
commit 183bcf109a
50 changed files with 3929 additions and 3 deletions

551
CMake/FindOpenMP.cmake Normal file

@ -0,0 +1,551 @@
##=============================================================================
##
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
## Copyright 2018 UT-Battelle, LLC.
## Copyright 2018 Los Alamos National Security.
##
## Under the terms of Contract DE-NA0003525 with NTESS,
## the U.S. Government retains certain rights in this software.
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##
##=============================================================================
#.rst:
# FindOpenMP
# ----------
#
# Finds OpenMP support
#
# This module can be used to detect OpenMP support in a compiler. If
# the compiler supports OpenMP, the flags required to compile with
# OpenMP support are returned in variables for the different languages.
# The variables may be empty if the compiler does not need a special
# flag to support OpenMP.
#
# Variables
# ^^^^^^^^^
#
# The module exposes the components ``C``, ``CXX``, and ``Fortran``.
# Each of these controls the various languages to search OpenMP support for.
#
# Depending on the enabled components the following variables will be set:
#
# ``OpenMP_FOUND``
# Variable indicating that OpenMP flags for all requested languages have been found.
# If no components are specified, this is true if OpenMP settings for all enabled languages
# were detected.
# ``OpenMP_VERSION``
# Minimal version of the OpenMP standard detected among the requested languages,
# or all enabled languages if no components were specified.
#
# This module will set the following variables per language in your
# project, where ``<lang>`` is one of C, CXX, or Fortran:
#
# ``OpenMP_<lang>_FOUND``
# Variable indicating if OpenMP support for ``<lang>`` was detected.
# ``OpenMP_<lang>_FLAGS``
# OpenMP compiler flags for ``<lang>``, separated by spaces.
#
# For linking with OpenMP code written in ``<lang>``, the following
# variables are provided:
#
# ``OpenMP_<lang>_LIB_NAMES``
# :ref:`;-list <CMake Language Lists>` of libraries for OpenMP programs for ``<lang>``.
# ``OpenMP_<libname>_LIBRARY``
# Location of the individual libraries needed for OpenMP support in ``<lang>``.
# ``OpenMP_<lang>_LIBRARIES``
# A list of libraries needed to link with OpenMP code written in ``<lang>``.
#
# Additionally, the module provides :prop_tgt:`IMPORTED` targets:
#
# ``OpenMP::OpenMP_<lang>``
# Target for using OpenMP from ``<lang>``.
#
# Specifically for Fortran, the module sets the following variables:
#
# ``OpenMP_Fortran_HAVE_OMPLIB_HEADER``
# Boolean indicating if OpenMP is accessible through ``omp_lib.h``.
# ``OpenMP_Fortran_HAVE_OMPLIB_MODULE``
# Boolean indicating if OpenMP is accessible through the ``omp_lib`` Fortran module.
#
# The module will also try to provide the OpenMP version variables:
#
# ``OpenMP_<lang>_SPEC_DATE``
# Date of the OpenMP specification implemented by the ``<lang>`` compiler.
# ``OpenMP_<lang>_VERSION_MAJOR``
# Major version of OpenMP implemented by the ``<lang>`` compiler.
# ``OpenMP_<lang>_VERSION_MINOR``
# Minor version of OpenMP implemented by the ``<lang>`` compiler.
# ``OpenMP_<lang>_VERSION``
# OpenMP version implemented by the ``<lang>`` compiler.
#
# The specification date is formatted as given in the OpenMP standard:
# ``yyyymm`` where ``yyyy`` and ``mm`` represents the year and month of
# the OpenMP specification implemented by the ``<lang>`` compiler.
cmake_policy(PUSH)
cmake_policy(SET CMP0012 NEW) # if() recognizes numbers and booleans
cmake_policy(SET CMP0054 NEW) # if() quoted variables not dereferenced
cmake_policy(SET CMP0057 NEW) # if IN_LIST
function(_OPENMP_FLAG_CANDIDATES LANG)
if(NOT OpenMP_${LANG}_FLAG)
unset(OpenMP_FLAG_CANDIDATES)
set(OMP_FLAG_GNU "-fopenmp")
set(OMP_FLAG_Clang "-fopenmp=libomp" "-fopenmp=libiomp5" "-fopenmp")
set(OMP_FLAG_AppleClang "-Xclang -fopenmp")
set(OMP_FLAG_HP "+Oopenmp")
if(WIN32)
set(OMP_FLAG_Intel "-Qopenmp")
elseif(CMAKE_${LANG}_COMPILER_ID STREQUAL "Intel" AND
"${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS "15.0.0.20140528")
set(OMP_FLAG_Intel "-openmp")
else()
set(OMP_FLAG_Intel "-qopenmp")
endif()
set(OMP_FLAG_MIPSpro "-mp")
set(OMP_FLAG_MSVC "-openmp")
set(OMP_FLAG_PathScale "-openmp")
set(OMP_FLAG_NAG "-openmp")
set(OMP_FLAG_Absoft "-openmp")
set(OMP_FLAG_PGI "-mp")
set(OMP_FLAG_Flang "-fopenmp")
set(OMP_FLAG_SunPro "-xopenmp")
set(OMP_FLAG_XL "-qsmp=omp")
# Cray compiler activate OpenMP with -h omp, which is enabled by default.
set(OMP_FLAG_Cray " " "-h omp")
# If we know the correct flags, use those
if(DEFINED OMP_FLAG_${CMAKE_${LANG}_COMPILER_ID})
set(OpenMP_FLAG_CANDIDATES "${OMP_FLAG_${CMAKE_${LANG}_COMPILER_ID}}")
# Fall back to reasonable default tries otherwise
else()
set(OpenMP_FLAG_CANDIDATES "-openmp" "-fopenmp" "-mp" " ")
endif()
set(OpenMP_${LANG}_FLAG_CANDIDATES "${OpenMP_FLAG_CANDIDATES}" PARENT_SCOPE)
else()
set(OpenMP_${LANG}_FLAG_CANDIDATES "${OpenMP_${LANG}_FLAG}" PARENT_SCOPE)
endif()
endfunction()
# sample openmp source code to test
set(OpenMP_C_CXX_TEST_SOURCE
"
#include <omp.h>
int main() {
#ifdef _OPENMP
int n = omp_get_max_threads();
return 0;
#else
breaks_on_purpose
#endif
}
")
# in Fortran, an implementation may provide an omp_lib.h header
# or omp_lib module, or both (OpenMP standard, section 3.1)
# Furthmore !$ is the Fortran equivalent of #ifdef _OPENMP (OpenMP standard, 2.2.2)
# Without the conditional compilation, some compilers (e.g. PGI) might compile OpenMP code
# while not actually enabling OpenMP, building code sequentially
set(OpenMP_Fortran_TEST_SOURCE
"
program test
@OpenMP_Fortran_INCLUDE_LINE@
!$ integer :: n
n = omp_get_num_threads()
end program test
"
)
function(_OPENMP_WRITE_SOURCE_FILE LANG SRC_FILE_CONTENT_VAR SRC_FILE_NAME SRC_FILE_FULLPATH)
set(WORK_DIR ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindOpenMP)
if("${LANG}" STREQUAL "C")
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.c")
file(WRITE "${SRC_FILE}" "${OpenMP_C_CXX_${SRC_FILE_CONTENT_VAR}}")
elseif("${LANG}" STREQUAL "CXX")
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.cpp")
file(WRITE "${SRC_FILE}" "${OpenMP_C_CXX_${SRC_FILE_CONTENT_VAR}}")
elseif("${LANG}" STREQUAL "Fortran")
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.f90")
file(WRITE "${SRC_FILE}_in" "${OpenMP_Fortran_${SRC_FILE_CONTENT_VAR}}")
configure_file("${SRC_FILE}_in" "${SRC_FILE}" @ONLY)
endif()
set(${SRC_FILE_FULLPATH} "${SRC_FILE}" PARENT_SCOPE)
endfunction()
include(CMakeParseImplicitLinkInfo)
function(_OPENMP_GET_FLAGS LANG FLAG_MODE OPENMP_FLAG_VAR OPENMP_LIB_NAMES_VAR)
_OPENMP_FLAG_CANDIDATES("${LANG}")
_OPENMP_WRITE_SOURCE_FILE("${LANG}" "TEST_SOURCE" OpenMPTryFlag _OPENMP_TEST_SRC)
unset(OpenMP_VERBOSE_COMPILE_OPTIONS)
separate_arguments(OpenMP_VERBOSE_OPTIONS NATIVE_COMMAND "${CMAKE_${LANG}_VERBOSE_FLAG}")
foreach(_VERBOSE_OPTION IN LISTS OpenMP_VERBOSE_OPTIONS)
if(NOT _VERBOSE_OPTION MATCHES "^-Wl,")
list(APPEND OpenMP_VERBOSE_COMPILE_OPTIONS ${_VERBOSE_OPTION})
endif()
endforeach()
foreach(OPENMP_FLAG IN LISTS OpenMP_${LANG}_FLAG_CANDIDATES)
set(OPENMP_FLAGS_TEST "${OPENMP_FLAG}")
if(OpenMP_VERBOSE_COMPILE_OPTIONS)
string(APPEND OPENMP_FLAGS_TEST " ${OpenMP_VERBOSE_COMPILE_OPTIONS}")
endif()
string(REGEX REPLACE "[-/=+]" "" OPENMP_PLAIN_FLAG "${OPENMP_FLAG}")
try_compile( OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG} ${CMAKE_BINARY_DIR} ${_OPENMP_TEST_SRC}
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OPENMP_FLAGS_TEST}"
LINK_LIBRARIES ${CMAKE_${LANG}_VERBOSE_FLAG}
OUTPUT_VARIABLE OpenMP_TRY_COMPILE_OUTPUT
)
if(OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG})
set("${OPENMP_FLAG_VAR}" "${OPENMP_FLAG}" PARENT_SCOPE)
if(CMAKE_${LANG}_VERBOSE_FLAG)
unset(OpenMP_${LANG}_IMPLICIT_LIBRARIES)
unset(OpenMP_${LANG}_IMPLICIT_LINK_DIRS)
unset(OpenMP_${LANG}_IMPLICIT_FWK_DIRS)
unset(OpenMP_${LANG}_LOG_VAR)
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
"Detecting ${LANG} OpenMP compiler ABI info compiled with the following output:\n${OpenMP_TRY_COMPILE_OUTPUT}\n\n")
cmake_parse_implicit_link_info("${OpenMP_TRY_COMPILE_OUTPUT}"
OpenMP_${LANG}_IMPLICIT_LIBRARIES
OpenMP_${LANG}_IMPLICIT_LINK_DIRS
OpenMP_${LANG}_IMPLICIT_FWK_DIRS
OpenMP_${LANG}_LOG_VAR
"${CMAKE_${LANG}_IMPLICIT_OBJECT_REGEX}"
)
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
"Parsed ${LANG} OpenMP implicit link information from above output:\n${OpenMP_${LANG}_LOG_VAR}\n\n")
unset(_OPENMP_LIB_NAMES)
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_IMPLICIT_LIBRARIES)
get_filename_component(_OPENMP_IMPLICIT_LIB_DIR "${_OPENMP_IMPLICIT_LIB}" DIRECTORY)
get_filename_component(_OPENMP_IMPLICIT_LIB_NAME "${_OPENMP_IMPLICIT_LIB}" NAME)
get_filename_component(_OPENMP_IMPLICIT_LIB_PLAIN "${_OPENMP_IMPLICIT_LIB}" NAME_WE)
string(REGEX REPLACE "([][+.*?()^$])" "\\\\\\1" _OPENMP_IMPLICIT_LIB_PLAIN_ESC "${_OPENMP_IMPLICIT_LIB_PLAIN}")
string(REGEX REPLACE "([][+.*?()^$])" "\\\\\\1" _OPENMP_IMPLICIT_LIB_PATH_ESC "${_OPENMP_IMPLICIT_LIB}")
if(NOT ( "${_OPENMP_IMPLICIT_LIB}" IN_LIST CMAKE_${LANG}_IMPLICIT_LINK_LIBRARIES
OR "${CMAKE_${LANG}_STANDARD_LIBRARIES}" MATCHES "(^| )(-Wl,)?(-l)?(${_OPENMP_IMPLICIT_LIB_PLAIN_ESC}|${_OPENMP_IMPLICIT_LIB_PATH_ESC})( |$)"
OR "${CMAKE_${LANG}_LINK_EXECUTABLE}" MATCHES "(^| )(-Wl,)?(-l)?(${_OPENMP_IMPLICIT_LIB_PLAIN_ESC}|${_OPENMP_IMPLICIT_LIB_PATH_ESC})( |$)" ) )
if(_OPENMP_IMPLICIT_LIB_DIR)
set(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY "${_OPENMP_IMPLICIT_LIB}" CACHE FILEPATH
"Path to the ${_OPENMP_IMPLICIT_LIB_PLAIN} library for OpenMP")
else()
find_library(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY
NAMES "${_OPENMP_IMPLICIT_LIB_NAME}"
DOC "Path to the ${_OPENMP_IMPLICIT_LIB_PLAIN} library for OpenMP"
HINTS ${OpenMP_${LANG}_IMPLICIT_LINK_DIRS}
CMAKE_FIND_ROOT_PATH_BOTH
NO_DEFAULT_PATH
)
endif()
mark_as_advanced(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY)
list(APPEND _OPENMP_LIB_NAMES ${_OPENMP_IMPLICIT_LIB_PLAIN})
endif()
endforeach()
set("${OPENMP_LIB_NAMES_VAR}" "${_OPENMP_LIB_NAMES}" PARENT_SCOPE)
else()
# We do not know how to extract implicit OpenMP libraries for this compiler.
# Assume that it handles them automatically, e.g. the Intel Compiler on
# Windows should put the dependency in its object files.
set("${OPENMP_LIB_NAMES_VAR}" "" PARENT_SCOPE)
endif()
break()
elseif(CMAKE_${LANG}_COMPILER_ID STREQUAL "AppleClang"
AND CMAKE_${LANG}_COMPILER_VERSION VERSION_GREATER_EQUAL "7.0")
# Check for separate OpenMP library on AppleClang 7+
find_library(OpenMP_libomp_LIBRARY
NAMES omp gomp iomp5
HINTS ${CMAKE_${LANG}_IMPLICIT_LINK_DIRECTORIES}
)
mark_as_advanced(OpenMP_libomp_LIBRARY)
if(OpenMP_libomp_LIBRARY)
try_compile( OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG} ${CMAKE_BINARY_DIR} ${_OPENMP_TEST_SRC}
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OPENMP_FLAGS_TEST}"
LINK_LIBRARIES ${CMAKE_${LANG}_VERBOSE_FLAG} ${OpenMP_libomp_LIBRARY}
OUTPUT_VARIABLE OpenMP_TRY_COMPILE_OUTPUT
)
if(OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG})
set("${OPENMP_FLAG_VAR}" "${OPENMP_FLAG}" PARENT_SCOPE)
set("${OPENMP_LIB_NAMES_VAR}" "libomp" PARENT_SCOPE)
break()
endif()
endif()
endif()
set("${OPENMP_LIB_NAMES_VAR}" "NOTFOUND" PARENT_SCOPE)
set("${OPENMP_FLAG_VAR}" "NOTFOUND" PARENT_SCOPE)
endforeach()
unset(OpenMP_VERBOSE_COMPILE_OPTIONS)
endfunction()
set(OpenMP_C_CXX_CHECK_VERSION_SOURCE
"
#include <stdio.h>
#include <omp.h>
const char ompver_str[] = { 'I', 'N', 'F', 'O', ':', 'O', 'p', 'e', 'n', 'M',
'P', '-', 'd', 'a', 't', 'e', '[',
('0' + ((_OPENMP/100000)%10)),
('0' + ((_OPENMP/10000)%10)),
('0' + ((_OPENMP/1000)%10)),
('0' + ((_OPENMP/100)%10)),
('0' + ((_OPENMP/10)%10)),
('0' + ((_OPENMP/1)%10)),
']', '\\0' };
int main()
{
puts(ompver_str);
return 0;
}
")
set(OpenMP_Fortran_CHECK_VERSION_SOURCE
"
program omp_ver
@OpenMP_Fortran_INCLUDE_LINE@
integer, parameter :: zero = ichar('0')
integer, parameter :: ompv = openmp_version
character, dimension(24), parameter :: ompver_str =&
(/ 'I', 'N', 'F', 'O', ':', 'O', 'p', 'e', 'n', 'M', 'P', '-',&
'd', 'a', 't', 'e', '[',&
char(zero + mod(ompv/100000, 10)),&
char(zero + mod(ompv/10000, 10)),&
char(zero + mod(ompv/1000, 10)),&
char(zero + mod(ompv/100, 10)),&
char(zero + mod(ompv/10, 10)),&
char(zero + mod(ompv/1, 10)), ']' /)
print *, ompver_str
end program omp_ver
")
function(_OPENMP_GET_SPEC_DATE LANG SPEC_DATE)
_OPENMP_WRITE_SOURCE_FILE("${LANG}" "CHECK_VERSION_SOURCE" OpenMPCheckVersion _OPENMP_TEST_SRC)
set(BIN_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindOpenMP/ompver_${LANG}.bin")
string(REGEX REPLACE "[-/=+]" "" OPENMP_PLAIN_FLAG "${OPENMP_FLAG}")
try_compile(OpenMP_SPECTEST_${LANG}_${OPENMP_PLAIN_FLAG} "${CMAKE_BINARY_DIR}" "${_OPENMP_TEST_SRC}"
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OpenMP_${LANG}_FLAGS}"
COPY_FILE ${BIN_FILE})
if(${OpenMP_SPECTEST_${LANG}_${OPENMP_PLAIN_FLAG}})
file(STRINGS ${BIN_FILE} specstr LIMIT_COUNT 1 REGEX "INFO:OpenMP-date")
set(regex_spec_date ".*INFO:OpenMP-date\\[0*([^]]*)\\].*")
if("${specstr}" MATCHES "${regex_spec_date}")
set(${SPEC_DATE} "${CMAKE_MATCH_1}" PARENT_SCOPE)
endif()
endif()
endfunction()
macro(_OPENMP_SET_VERSION_BY_SPEC_DATE LANG)
set(OpenMP_SPEC_DATE_MAP
# Preview versions
"201611=5.0" # OpenMP 5.0 preview 1
# Combined versions, 2.5 onwards
"201511=4.5"
"201307=4.0"
"201107=3.1"
"200805=3.0"
"200505=2.5"
# C/C++ version 2.0
"200203=2.0"
# Fortran version 2.0
"200011=2.0"
# Fortran version 1.1
"199911=1.1"
# C/C++ version 1.0 (there's no 1.1 for C/C++)
"199810=1.0"
# Fortran version 1.0
"199710=1.0"
)
if(OpenMP_${LANG}_SPEC_DATE)
string(REGEX MATCHALL "${OpenMP_${LANG}_SPEC_DATE}=([0-9]+)\\.([0-9]+)" _version_match "${OpenMP_SPEC_DATE_MAP}")
else()
set(_version_match "")
endif()
if(NOT _version_match STREQUAL "")
set(OpenMP_${LANG}_VERSION_MAJOR ${CMAKE_MATCH_1})
set(OpenMP_${LANG}_VERSION_MINOR ${CMAKE_MATCH_2})
set(OpenMP_${LANG}_VERSION "${OpenMP_${LANG}_VERSION_MAJOR}.${OpenMP_${LANG}_VERSION_MINOR}")
else()
unset(OpenMP_${LANG}_VERSION_MAJOR)
unset(OpenMP_${LANG}_VERSION_MINOR)
unset(OpenMP_${LANG}_VERSION)
endif()
unset(_version_match)
unset(OpenMP_SPEC_DATE_MAP)
endmacro()
foreach(LANG IN ITEMS C CXX)
if(CMAKE_${LANG}_COMPILER_LOADED)
if(NOT DEFINED OpenMP_${LANG}_FLAGS OR "${OpenMP_${LANG}_FLAGS}" STREQUAL "NOTFOUND"
OR NOT DEFINED OpenMP_${LANG}_LIB_NAMES OR "${OpenMP_${LANG}_LIB_NAMES}" STREQUAL "NOTFOUND")
_OPENMP_GET_FLAGS("${LANG}" "${LANG}" OpenMP_${LANG}_FLAGS_WORK OpenMP_${LANG}_LIB_NAMES_WORK)
endif()
set(OpenMP_${LANG}_FLAGS "${OpenMP_${LANG}_FLAGS_WORK}"
CACHE STRING "${LANG} compiler flags for OpenMP parallelization")
set(OpenMP_${LANG}_LIB_NAMES "${OpenMP_${LANG}_LIB_NAMES_WORK}"
CACHE STRING "${LANG} compiler libraries for OpenMP parallelization")
mark_as_advanced(OpenMP_${LANG}_FLAGS OpenMP_${LANG}_LIB_NAMES)
endif()
endforeach()
if(CMAKE_Fortran_COMPILER_LOADED)
if(NOT DEFINED OpenMP_Fortran_FLAGS OR "${OpenMP_Fortran_FLAGS}" STREQUAL "NOTFOUND"
OR NOT DEFINED OpenMP_Fortran_LIB_NAMES OR "${OpenMP_Fortran_LIB_NAMES}" STREQUAL "NOTFOUND"
OR NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_MODULE)
set(OpenMP_Fortran_INCLUDE_LINE "use omp_lib\n implicit none")
_OPENMP_GET_FLAGS("Fortran" "FortranHeader" OpenMP_Fortran_FLAGS_WORK OpenMP_Fortran_LIB_NAMES_WORK)
if(OpenMP_Fortran_FLAGS_WORK)
set(OpenMP_Fortran_HAVE_OMPLIB_MODULE TRUE CACHE BOOL INTERNAL "")
endif()
set(OpenMP_Fortran_FLAGS "${OpenMP_Fortran_FLAGS_WORK}"
CACHE STRING "Fortran compiler flags for OpenMP parallelization")
set(OpenMP_Fortran_LIB_NAMES "${OpenMP_Fortran_LIB_NAMES_WORK}"
CACHE STRING "Fortran compiler libraries for OpenMP parallelization")
mark_as_advanced(OpenMP_Fortran_FLAGS OpenMP_Fortran_LIB_NAMES)
endif()
if(NOT DEFINED OpenMP_Fortran_FLAGS OR "${OpenMP_Fortran_FLAGS}" STREQUAL "NOTFOUND"
OR NOT DEFINED OpenMP_Fortran_LIB_NAMES OR "${OpenMP_Fortran_LIB_NAMES}" STREQUAL "NOTFOUND"
OR NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_HEADER)
set(OpenMP_Fortran_INCLUDE_LINE "implicit none\n include 'omp_lib.h'")
_OPENMP_GET_FLAGS("Fortran" "FortranModule" OpenMP_Fortran_FLAGS_WORK OpenMP_Fortran_LIB_NAMES_WORK)
if(OpenMP_Fortran_FLAGS_WORK)
set(OpenMP_Fortran_HAVE_OMPLIB_HEADER TRUE CACHE BOOL INTERNAL "")
endif()
set(OpenMP_Fortran_FLAGS "${OpenMP_Fortran_FLAGS_WORK}"
CACHE STRING "Fortran compiler flags for OpenMP parallelization")
set(OpenMP_Fortran_LIB_NAMES "${OpenMP_Fortran_LIB_NAMES}"
CACHE STRING "Fortran compiler libraries for OpenMP parallelization")
endif()
if(OpenMP_Fortran_HAVE_OMPLIB_MODULE)
set(OpenMP_Fortran_INCLUDE_LINE "use omp_lib\n implicit none")
else()
set(OpenMP_Fortran_INCLUDE_LINE "implicit none\n include 'omp_lib.h'")
endif()
endif()
if(NOT OpenMP_FIND_COMPONENTS)
set(OpenMP_FINDLIST C CXX Fortran)
else()
set(OpenMP_FINDLIST ${OpenMP_FIND_COMPONENTS})
endif()
unset(_OpenMP_MIN_VERSION)
include(FindPackageHandleStandardArgs)
foreach(LANG IN LISTS OpenMP_FINDLIST)
if(CMAKE_${LANG}_COMPILER_LOADED)
if (NOT OpenMP_${LANG}_SPEC_DATE AND OpenMP_${LANG}_FLAGS)
_OPENMP_GET_SPEC_DATE("${LANG}" OpenMP_${LANG}_SPEC_DATE_INTERNAL)
set(OpenMP_${LANG}_SPEC_DATE "${OpenMP_${LANG}_SPEC_DATE_INTERNAL}" CACHE
INTERNAL "${LANG} compiler's OpenMP specification date")
_OPENMP_SET_VERSION_BY_SPEC_DATE("${LANG}")
endif()
set(OpenMP_${LANG}_FIND_QUIETLY ${OpenMP_FIND_QUIETLY})
set(OpenMP_${LANG}_FIND_REQUIRED ${OpenMP_FIND_REQUIRED})
set(OpenMP_${LANG}_FIND_VERSION ${OpenMP_FIND_VERSION})
set(OpenMP_${LANG}_FIND_VERSION_EXACT ${OpenMP_FIND_VERSION_EXACT})
set(_OPENMP_${LANG}_REQUIRED_VARS OpenMP_${LANG}_FLAGS)
if("${OpenMP_${LANG}_LIB_NAMES}" STREQUAL "NOTFOUND")
set(_OPENMP_${LANG}_REQUIRED_LIB_VARS OpenMP_${LANG}_LIB_NAMES)
else()
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_LIB_NAMES)
list(APPEND _OPENMP_${LANG}_REQUIRED_LIB_VARS OpenMP_${_OPENMP_IMPLICIT_LIB}_LIBRARY)
endforeach()
endif()
find_package_handle_standard_args(OpenMP_${LANG}
REQUIRED_VARS OpenMP_${LANG}_FLAGS ${_OPENMP_${LANG}_REQUIRED_LIB_VARS}
VERSION_VAR OpenMP_${LANG}_VERSION
)
if(OpenMP_${LANG}_FOUND)
if(DEFINED OpenMP_${LANG}_VERSION)
if(NOT _OpenMP_MIN_VERSION OR _OpenMP_MIN_VERSION VERSION_GREATER OpenMP_${LANG}_VERSION)
set(_OpenMP_MIN_VERSION OpenMP_${LANG}_VERSION)
endif()
endif()
set(OpenMP_${LANG}_LIBRARIES "")
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_LIB_NAMES)
list(APPEND OpenMP_${LANG}_LIBRARIES "${OpenMP_${_OPENMP_IMPLICIT_LIB}_LIBRARY}")
endforeach()
if(NOT TARGET OpenMP::OpenMP_${LANG})
add_library(OpenMP::OpenMP_${LANG} INTERFACE IMPORTED)
endif()
if(OpenMP_${LANG}_FLAGS)
separate_arguments(_OpenMP_${LANG}_OPTIONS NATIVE_COMMAND "${OpenMP_${LANG}_FLAGS}")
set_property(TARGET OpenMP::OpenMP_${LANG} PROPERTY
INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:${LANG}>:${_OpenMP_${LANG}_OPTIONS}>")
unset(_OpenMP_${LANG}_OPTIONS)
endif()
if(OpenMP_${LANG}_LIBRARIES)
set_property(TARGET OpenMP::OpenMP_${LANG} PROPERTY
INTERFACE_LINK_LIBRARIES "${OpenMP_${LANG}_LIBRARIES}")
endif()
endif()
endif()
endforeach()
unset(_OpenMP_REQ_VARS)
foreach(LANG IN ITEMS C CXX Fortran)
if((NOT OpenMP_FIND_COMPONENTS AND CMAKE_${LANG}_COMPILER_LOADED) OR LANG IN_LIST OpenMP_FIND_COMPONENTS)
list(APPEND _OpenMP_REQ_VARS "OpenMP_${LANG}_FOUND")
endif()
endforeach()
find_package_handle_standard_args(OpenMP
REQUIRED_VARS ${_OpenMP_REQ_VARS}
VERSION_VAR ${_OpenMP_MIN_VERSION}
HANDLE_COMPONENTS)
set(OPENMP_FOUND ${OpenMP_FOUND})
if(CMAKE_Fortran_COMPILER_LOADED AND OpenMP_Fortran_FOUND)
if(NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_MODULE)
set(OpenMP_Fortran_HAVE_OMPLIB_MODULE FALSE CACHE BOOL INTERNAL "")
endif()
if(NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_HEADER)
set(OpenMP_Fortran_HAVE_OMPLIB_HEADER FALSE CACHE BOOL INTERNAL "")
endif()
endif()
if(NOT ( CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED ))
message(SEND_ERROR "FindOpenMP requires the C, CXX or Fortran languages to be enabled")
endif()
unset(OpenMP_C_CXX_TEST_SOURCE)
unset(OpenMP_Fortran_TEST_SOURCE)
unset(OpenMP_C_CXX_CHECK_VERSION_SOURCE)
unset(OpenMP_Fortran_CHECK_VERSION_SOURCE)
unset(OpenMP_Fortran_INCLUDE_LINE)
cmake_policy(POP)

@ -27,6 +27,9 @@
# vtkm::tbb Target that contains tbb related link information
# implicitly linked to by `vtkm_cont` if tbb is enabled
#
# vtkm::openmp Target that contains openmp related link information
# implicitly linked to by `vtkm_cont` if openmp is enabled
#
# vtkm::cuda Target that contains cuda related link information
# implicitly linked to by `vtkm_cont` if cuda is enabled
#
@ -41,6 +44,7 @@
# VTKm_BUILD_SHARED_LIBS Will be enabled if VTK-m was built shared/dynamic
# VTKm_ENABLE_CUDA Will be enabled if VTK-m was built with CUDA support
# VTKm_ENABLE_TBB Will be enabled if VTK-m was built with TBB support
# VTKm_ENABLE_OPENMP Will be enabled if VTK-m was built with OpenMP support
# VTKm_ENABLE_MPI Will be enabled if VTK-m was built with MPI support
# VTKm_ENABLE_RENDERING Will be enabled if VTK-m was built with rendering support
# VTKm_ENABLE_GL_CONTEXT Will be enabled if VTK-m rendering was built with a GL context
@ -63,6 +67,7 @@ set(VTKm_VERSION "@VTKm_VERSION@")
set(VTKm_BUILD_SHARED_LIBS "@VTKm_BUILD_SHARED_LIBS@")
set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@")
set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
set(VTKm_ENABLE_OPENMP "@VTKm_ENABLE_OPENMP@")
set(VTKm_ENABLE_RENDERING "@VTKm_ENABLE_RENDERING@")
set(VTKm_ENABLE_GL_CONTEXT "@VTKm_ENABLE_GL_CONTEXT@")
set(VTKm_ENABLE_OSMESA_CONTEXT "@VTKm_ENABLE_OSMESA_CONTEXT@")

@ -47,6 +47,19 @@ if(VTKm_ENABLE_TBB AND NOT TARGET vtkm::tbb)
endif()
endif()
if(VTKm_ENABLE_OPENMP AND NOT TARGET vtkm::openmp)
find_package(OpenMP 4.5 REQUIRED COMPONENTS CXX QUIET)
add_library(vtkm::openmp INTERFACE IMPORTED GLOBAL)
if(OpenMP_CXX_FLAGS)
set_target_properties(vtkm::openmp PROPERTIES
INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CXX>:${OpenMP_CXX_FLAGS}>")
endif()
if(OpenMP_CXX_LIBRARIES)
set_target_properties(vtkm::openmp PROPERTIES
INTERFACE_LINK_LIBRARIES "${OpenMP_CXX_LIBRARIES}")
endif()
endif()
if(VTKm_ENABLE_CUDA AND NOT TARGET vtkm::cuda)
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)

@ -86,6 +86,7 @@ endmacro ()
# Configurable Options
vtkm_option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
vtkm_option(VTKm_ENABLE_OPENMP "Enable OpenMP support" OFF)
vtkm_option(VTKm_ENABLE_RENDERING "Enable rendering library" ON)
vtkm_option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON)
vtkm_option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
@ -266,6 +267,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
FILES
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
${VTKm_SOURCE_DIR}/CMake/FindOpenMP.cmake
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
)

@ -73,6 +73,8 @@ Optional dependencies are:
+ [Cuda Toolkit 7+](https://developer.nvidia.com/cuda-toolkit)
+ TBB Device Adapter
+ [TBB](https://www.threadingbuildingblocks.org/)
+ OpenMP Device Adapter
+ Requires a compiler that supports OpenMP >= 4.5.
+ OpenGL Rendering
+ The rendering module contains multiple rendering implementations
including standalone rendering code. The rendering module also

@ -46,7 +46,9 @@
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
#include <tbb/task_scheduler_init.h>
#endif // TBB
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
#include <omp.h>
#endif
// This benchmark has a number of commandline options to customize its behavior.
// See The BenchDevAlgoConfig documentations for details.
@ -1193,6 +1195,8 @@ int main(int argc, char* argv[])
{
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
int numThreads = tbb::task_scheduler_init::automatic;
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
int numThreads = omp_get_max_threads();
#endif // TBB
vtkm::benchmarking::BenchDevAlgoConfig& config = vtkm::benchmarking::Config;
@ -1323,8 +1327,12 @@ int main(int argc, char* argv[])
std::istringstream parse(argv[i]);
parse >> numThreads;
std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
std::istringstream parse(argv[i]);
parse >> numThreads;
std::cout << "Selected " << numThreads << " OpenMP threads." << std::endl;
#else
std::cerr << "NumThreads valid only on TBB. Ignoring." << std::endl;
std::cerr << "NumThreads not valid on this device. Ignoring." << std::endl;
#endif // TBB
}
else
@ -1337,6 +1345,8 @@ int main(int argc, char* argv[])
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
// Must not be destroyed as long as benchmarks are running:
tbb::task_scheduler_init init(numThreads);
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
omp_set_num_threads(numThreads);
#endif // TBB
if (config.BenchmarkFlags == 0)

@ -30,6 +30,12 @@ function(add_benchmark name files)
target_compile_definitions(${name}_TBB PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_TBB")
endif()
if (TARGET vtkm::openmp)
add_executable(${name}_OPENMP ${files})
list(APPEND benchmarks ${name}_OPENMP)
target_compile_definitions(${name}_OPENMP PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_OPENMP")
endif()
if (TARGET vtkm::cuda)
get_filename_component(fname "${name}" NAME_WE)
get_filename_component(fullpath "${name}.cxx" ABSOLUTE)
@ -70,6 +76,9 @@ if(TARGET vtkm_rendering)
if(TARGET BenchmarkRayTracing_TBB)
target_link_libraries(BenchmarkRayTracing_TBB PRIVATE vtkm_rendering)
endif()
if(TARGET BenchmarkRayTracing_OPENMP)
target_link_libraries(BenchmarkRayTracing_OPENMP PRIVATE vtkm_rendering)
endif()
if(TARGET BenchmarkRayTracing_CUDA)
target_link_libraries(BenchmarkRayTracing_CUDA PRIVATE vtkm_rendering)
endif()

@ -0,0 +1,5 @@
# OpenMP Device Adapter
A device adapter that leverages OpenMP 4.5 for threading is now available. The
new adapter is enabled using the CMake option `VTKm_ENABLE_OPENMP` and its
performance is comparable to the TBB device adapter.

@ -151,6 +151,7 @@ add_subdirectory(arg)
add_subdirectory(diy)
add_subdirectory(serial)
add_subdirectory(tbb)
add_subdirectory(openmp)
add_subdirectory(cuda)
set(backends )
@ -160,6 +161,9 @@ endif()
if(TARGET vtkm::cuda)
list(APPEND backends vtkm::cuda)
endif()
if(TARGET vtkm::openmp)
list(APPEND backends vtkm::openmp)
endif()
target_link_libraries(vtkm_cont PUBLIC vtkm_compiler_flags ${backends})
if(TARGET vtkm_diy)
# This will become a required dependency eventually.

@ -27,6 +27,7 @@
#include <vtkm/ListTag.h>
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
@ -37,6 +38,7 @@ namespace cont
struct DeviceAdapterListTagCommon : vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagCuda,
vtkm::cont::DeviceAdapterTagTBB,
vtkm::cont::DeviceAdapterTagOpenMP,
vtkm::cont::DeviceAdapterTagSerial>
{
};

@ -36,7 +36,9 @@
// Unfortunately, VTKM_ENABLE_TBB does not guarantee that TBB is (or isn't)
// available, but there is no way to check for sure in a header library.
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_TBB
#else // !VTKM_CUDA && !VTKM_ENABLE_TBB
#elif defined(VTKM_ENABLE_OPENMP)
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_OPENMP
#else // !VTKM_CUDA && !VTKM_ENABLE_TBB && !VTKM_ENABLE_OPENMP
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_SERIAL
#endif // !VTKM_CUDA && !VTKM_ENABLE_TBB
#endif // VTKM_DEVICE_ADAPTER
@ -68,6 +70,14 @@
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagTBB
// OpenMP:
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagOpenMP
// Error:
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_ERROR

@ -32,6 +32,7 @@
#define VTKM_DEVICE_ADAPTER_SERIAL 1
#define VTKM_DEVICE_ADAPTER_CUDA 2
#define VTKM_DEVICE_ADAPTER_TBB 3
#define VTKM_DEVICE_ADAPTER_OPENMP 4
namespace vtkm
{

@ -0,0 +1,32 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
## Copyright 2018 UT-Battelle, LLC.
## Copyright 2018 Los Alamos National Security.
##
## Under the terms of Contract DE-NA0003525 with NTESS,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
set(headers
DeviceAdapterOpenMP.h
)
add_subdirectory(internal)
vtkm_declare_headers(${headers} TESTABLE ${VTKm_ENABLE_OPENMP})
#-----------------------------------------------------------------------------
if (TARGET vtkm::openmp)
add_subdirectory(testing)
endif()

@ -0,0 +1,33 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_DeviceAdapterOpenMP_h
#define vtk_m_cont_openmp_DeviceAdapterOpenMP_h
#include <vtkm/cont/openmp/internal/DeviceAdapterRuntimeDetectorOpenMP.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#ifdef VTKM_ENABLE_OPENMP
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
#include <vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h>
#endif
#endif //vtk_m_cont_openmp_DeviceAdapterOpenMP_h

@ -0,0 +1,42 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
ExecutionArrayInterfaceBasic<DeviceAdapterTagOpenMP>::ExecutionArrayInterfaceBasic(
StorageBasicBase& storage)
: Superclass(storage)
{
}
} // end namespace internal
VTKM_INSTANTIATE_ARRAYHANDLES_FOR_DEVICE_ADAPTER(DeviceAdapterTagOpenMP)
}
} // end vtkm::cont

@ -0,0 +1,109 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h
#define vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/internal/ArrayExportMacros.h>
#include <vtkm/cont/internal/ArrayManagerExecution.h>
#include <vtkm/cont/internal/ArrayManagerExecutionShareWithControl.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
template <typename T, class StorageTag>
class ArrayManagerExecution<T, StorageTag, vtkm::cont::DeviceAdapterTagOpenMP>
: public vtkm::cont::internal::ArrayManagerExecutionShareWithControl<T, StorageTag>
{
public:
using Superclass = vtkm::cont::internal::ArrayManagerExecutionShareWithControl<T, StorageTag>;
using ValueType = typename Superclass::ValueType;
using PortalType = typename Superclass::PortalType;
using PortalConstType = typename Superclass::PortalConstType;
using StorageType = typename Superclass::StorageType;
VTKM_CONT
ArrayManagerExecution(StorageType* storage)
: Superclass(storage)
{
}
VTKM_CONT
PortalConstType PrepareForInput(bool updateData)
{
return this->Superclass::PrepareForInput(updateData);
}
VTKM_CONT
PortalType PrepareForInPlace(bool updateData)
{
return this->Superclass::PrepareForInPlace(updateData);
}
VTKM_CONT
PortalType PrepareForOutput(vtkm::Id numberOfValues)
{
return this->Superclass::PrepareForOutput(numberOfValues);
}
};
template <typename T>
struct ExecutionPortalFactoryBasic<T, DeviceAdapterTagOpenMP>
: public ExecutionPortalFactoryBasicShareWithControl<T>
{
using Superclass = ExecutionPortalFactoryBasicShareWithControl<T>;
using typename Superclass::ValueType;
using typename Superclass::PortalType;
using typename Superclass::PortalConstType;
using Superclass::CreatePortal;
using Superclass::CreatePortalConst;
};
template <>
struct VTKM_CONT_EXPORT ExecutionArrayInterfaceBasic<DeviceAdapterTagOpenMP>
: public ExecutionArrayInterfaceBasicShareWithControl
{
using Superclass = ExecutionArrayInterfaceBasicShareWithControl;
VTKM_CONT
ExecutionArrayInterfaceBasic(StorageBasicBase& storage);
VTKM_CONT
DeviceAdapterId GetDeviceId() const final { return VTKM_DEVICE_ADAPTER_OPENMP; }
};
} // namespace internal
#ifndef vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
VTKM_EXPORT_ARRAYHANDLES_FOR_DEVICE_ADAPTER(DeviceAdapterTagOpenMP)
#endif // !vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
}
} // namespace vtkm::cont
#endif // vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h

@ -0,0 +1,49 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
## Copyright 2018 UT-Battelle, LLC.
## Copyright 2018 Los Alamos National Security.
##
## Under the terms of Contract DE-NA0003525 with NTESS,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
set(headers
ArrayManagerExecutionOpenMP.h
DeviceAdapterAlgorithmOpenMP.h
DeviceAdapterRuntimeDetectorOpenMP.h
DeviceAdapterTagOpenMP.h
FunctorsOpenMP.h
ParallelQuickSortOpenMP.h
ParallelRadixSortOpenMP.h
ParallelScanOpenMP.h
ParallelSortOpenMP.h
VirtualObjectTransferOpenMP.h
)
vtkm_declare_headers(${headers} TESTABLE ${VTKm_ENABLE_OPENMP})
target_sources(vtkm_cont PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterRuntimeDetectorOpenMP.cxx
)
#-----------------------------------------------------------------------------
if (NOT VTKm_ENABLE_OPENMP)
return()
endif()
target_sources(vtkm_cont PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/ArrayManagerExecutionOpenMP.cxx
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterAlgorithmOpenMP.cxx
${CMAKE_CURRENT_SOURCE_DIR}/ParallelRadixSortOpenMP.cxx
)

@ -0,0 +1,140 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
#include <vtkm/cont/ErrorExecution.h>
#include <omp.h>
namespace vtkm
{
namespace cont
{
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>::ScheduleTask(
vtkm::exec::openmp::internal::TaskTiling1D& functor,
vtkm::Id size)
{
static constexpr vtkm::Id MESSAGE_SIZE = 1024;
char errorString[MESSAGE_SIZE];
errorString[0] = '\0';
vtkm::exec::internal::ErrorMessageBuffer errorMessage(errorString, MESSAGE_SIZE);
functor.SetErrorMessageBuffer(errorMessage);
static constexpr vtkm::Id CHUNK_SIZE = 1024;
VTKM_OPENMP_DIRECTIVE(parallel for
schedule(guided))
for (vtkm::Id i = 0; i < size; i += CHUNK_SIZE)
{
const vtkm::Id end = std::min(i + CHUNK_SIZE, size);
functor(i, end);
}
if (errorMessage.IsErrorRaised())
{
throw vtkm::cont::ErrorExecution(errorString);
}
}
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>::ScheduleTask(
vtkm::exec::openmp::internal::TaskTiling3D& functor,
vtkm::Id3 size)
{
static constexpr vtkm::Id MESSAGE_SIZE = 1024;
char errorString[MESSAGE_SIZE];
errorString[0] = '\0';
vtkm::exec::internal::ErrorMessageBuffer errorMessage(errorString, MESSAGE_SIZE);
functor.SetErrorMessageBuffer(errorMessage);
vtkm::Id3 chunkDims;
if (size[0] > 512)
{
chunkDims = { 1024, 4, 1 };
}
else if (size[0] > 256)
{
chunkDims = { 512, 4, 2 };
}
else if (size[0] > 128)
{
chunkDims = { 256, 4, 4 };
}
else if (size[0] > 64)
{
chunkDims = { 128, 8, 4 };
}
else if (size[0] > 32)
{
chunkDims = { 64, 8, 8 };
}
else if (size[0] > 16)
{
chunkDims = { 32, 16, 8 };
}
else
{
chunkDims = { 16, 16, 16 };
}
const vtkm::Id3 numChunks{ openmp::CeilDivide(size[0], chunkDims[0]),
openmp::CeilDivide(size[1], chunkDims[1]),
openmp::CeilDivide(size[2], chunkDims[2]) };
const vtkm::Id chunkCount = numChunks[0] * numChunks[1] * numChunks[2];
// Lambda to convert chunkIdx into a start/end {i, j, k}:
auto computeIJK = [&](const vtkm::Id& chunkIdx, vtkm::Id3& start, vtkm::Id3& end) {
start[0] = chunkIdx % numChunks[0];
start[1] = (chunkIdx / numChunks[0]) % numChunks[1];
start[2] = (chunkIdx / (numChunks[0] * numChunks[1]));
start *= chunkDims; // c-wise mult
end[0] = std::min(start[0] + chunkDims[0], size[0]);
end[1] = std::min(start[1] + chunkDims[1], size[1]);
end[2] = std::min(start[2] + chunkDims[2], size[2]);
};
// Iterate through each chunk, converting the chunkIdx into an ijk range:
VTKM_OPENMP_DIRECTIVE(parallel for
schedule(guided))
for (vtkm::Id chunkIdx = 0; chunkIdx < chunkCount; ++chunkIdx)
{
vtkm::Id3 startIJK;
vtkm::Id3 endIJK;
computeIJK(chunkIdx, startIJK, endIJK);
for (vtkm::Id k = startIJK[2]; k < endIJK[2]; ++k)
{
for (vtkm::Id j = startIJK[1]; j < endIJK[1]; ++j)
{
functor(startIJK[0], endIJK[0], j, k);
}
}
}
if (errorMessage.IsErrorRaised())
{
throw vtkm::cont::ErrorExecution(errorString);
}
}
}
} // end namespace vtkm::cont

@ -0,0 +1,385 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h
#define vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/Error.h>
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
#include <vtkm/cont/openmp/internal/ParallelScanOpenMP.h>
#include <vtkm/cont/openmp/internal/ParallelSortOpenMP.h>
#include <vtkm/exec/openmp/internal/TaskTilingOpenMP.h>
// For serial fallback:
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
#include <omp.h>
#include <algorithm>
#include <type_traits>
namespace vtkm
{
namespace cont
{
template <>
struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>
: vtkm::cont::internal::DeviceAdapterAlgorithmGeneral<
DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>,
vtkm::cont::DeviceAdapterTagOpenMP>
{
using DevTag = DeviceAdapterTagOpenMP;
using SerialAlgo = DeviceAdapterAlgorithm<DeviceAdapterTagSerial>;
public:
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<U, COut>& output)
{
using namespace vtkm::cont::openmp;
const vtkm::Id inSize = input.GetNumberOfValues();
if (inSize == 0)
{
output.Allocate(0);
return;
}
auto inputPortal = input.PrepareForInput(DevTag());
auto outputPortal = output.PrepareForOutput(inSize, DevTag());
CopyHelper(inputPortal, outputPortal, 0, 0, inSize);
}
template <typename T, typename U, class CIn, class CStencil, class COut>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output)
{
::vtkm::NotZeroInitialized unary_predicate;
CopyIf(input, stencil, output, unary_predicate);
}
template <typename T, typename U, class CIn, class CStencil, class COut, class UnaryPredicate>
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
vtkm::cont::ArrayHandle<T, COut>& output,
UnaryPredicate unary_predicate)
{
using namespace vtkm::cont::openmp;
vtkm::Id inSize = input.GetNumberOfValues();
if (inSize == 0)
{
output.Allocate(0);
return;
}
auto inputPortal = input.PrepareForInput(DevTag());
auto stencilPortal = stencil.PrepareForInput(DevTag());
auto outputPortal = output.PrepareForOutput(inSize, DevTag());
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(inputPortal);
auto stencilIter = vtkm::cont::ArrayPortalToIteratorBegin(stencilPortal);
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outputPortal);
CopyIfHelper helper;
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
{
VTKM_OPENMP_DIRECTIVE(single)
{
// Calls omp_get_num_threads, thus must be used inside a parallel section.
helper.Initialize(inSize, sizeof(T));
}
VTKM_OPENMP_DIRECTIVE(for schedule(static))
for (vtkm::Id i = 0; i < helper.NumChunks; ++i)
{
helper.CopyIf(inIter, stencilIter, outIter, unary_predicate, i);
}
}
vtkm::Id numValues = helper.Reduce(outIter);
output.Shrink(numValues);
}
template <typename T, typename U, class CIn, class COut>
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::Id inputStartIndex,
vtkm::Id numberOfValuesToCopy,
vtkm::cont::ArrayHandle<U, COut>& output,
vtkm::Id outputIndex = 0)
{
using namespace vtkm::cont::openmp;
const vtkm::Id inSize = input.GetNumberOfValues();
// Check if the ranges overlap and fail if they do.
if (input == output &&
((outputIndex >= inputStartIndex && outputIndex < inputStartIndex + numberOfValuesToCopy) ||
(inputStartIndex >= outputIndex && inputStartIndex < outputIndex + numberOfValuesToCopy)))
{
return false;
}
if (inputStartIndex < 0 || numberOfValuesToCopy < 0 || outputIndex < 0 ||
inputStartIndex >= inSize)
{ //invalid parameters
return false;
}
//determine if the numberOfElementsToCopy needs to be reduced
if (inSize < (inputStartIndex + numberOfValuesToCopy))
{ //adjust the size
numberOfValuesToCopy = (inSize - inputStartIndex);
}
const vtkm::Id outSize = output.GetNumberOfValues();
const vtkm::Id copyOutEnd = outputIndex + numberOfValuesToCopy;
if (outSize < copyOutEnd)
{ //output is not large enough
if (outSize == 0)
{ //since output has nothing, just need to allocate to correct length
output.Allocate(copyOutEnd);
}
else
{ //we currently have data in this array, so preserve it in the new
//resized array
vtkm::cont::ArrayHandle<U, COut> temp;
temp.Allocate(copyOutEnd);
CopySubRange(output, 0, outSize, temp);
output = temp;
}
}
auto inputPortal = input.PrepareForInput(DevTag());
auto outputPortal = output.PrepareForInPlace(DevTag());
CopyHelper(inputPortal, outputPortal, inputStartIndex, outputIndex, numberOfValuesToCopy);
return true;
}
template <typename T, typename U, class CIn>
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input, U initialValue)
{
return Reduce(input, initialValue, vtkm::Add());
}
template <typename T, typename U, class CIn, class BinaryFunctor>
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input,
U initialValue,
BinaryFunctor binary_functor)
{
using namespace vtkm::cont::openmp;
auto portal = input.PrepareForInput(DevTag());
const OpenMPReductionSupported<typename std::decay<U>::type> fastPath;
return ReduceHelper::Execute(portal, initialValue, binary_functor, fastPath);
}
template <typename T,
typename U,
class CKeyIn,
class CValIn,
class CKeyOut,
class CValOut,
class BinaryFunctor>
VTKM_CONT static void ReduceByKey(const vtkm::cont::ArrayHandle<T, CKeyIn>& keys,
const vtkm::cont::ArrayHandle<U, CValIn>& values,
vtkm::cont::ArrayHandle<T, CKeyOut>& keys_output,
vtkm::cont::ArrayHandle<U, CValOut>& values_output,
BinaryFunctor func)
{
openmp::ReduceByKeyHelper(keys, values, keys_output, values_output, func);
}
template <typename T, class CIn, class COut>
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output)
{
return ScanInclusive(input, output, vtkm::Add());
}
template <typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output,
BinaryFunctor binaryFunctor)
{
if (input.GetNumberOfValues() * sizeof(T) <= openmp::PAGE_SIZE)
{
return SerialAlgo::ScanInclusive(input, output, binaryFunctor);
}
using InPortalT = decltype(input.PrepareForInput(DevTag()));
using OutPortalT = decltype(output.PrepareForOutput(0, DevTag()));
using Impl = openmp::ScanInclusiveHelper<InPortalT, OutPortalT, BinaryFunctor>;
vtkm::Id numVals = input.GetNumberOfValues();
Impl impl(
input.PrepareForInput(DevTag()), output.PrepareForOutput(numVals, DevTag()), binaryFunctor);
return impl.Execute(vtkm::Id2(0, numVals));
}
template <typename T, class CIn, class COut>
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output)
{
return ScanExclusive(input, output, vtkm::Add(), vtkm::TypeTraits<T>::ZeroInitialization());
}
template <typename T, class CIn, class COut, class BinaryFunctor>
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
vtkm::cont::ArrayHandle<T, COut>& output,
BinaryFunctor binaryFunctor,
const T& initialValue)
{
if (input.GetNumberOfValues() * sizeof(T) <= openmp::PAGE_SIZE)
{
return SerialAlgo::ScanExclusive(input, output, binaryFunctor, initialValue);
}
using InPortalT = decltype(input.PrepareForInput(DevTag()));
using OutPortalT = decltype(output.PrepareForOutput(0, DevTag()));
using Impl = openmp::ScanExclusiveHelper<InPortalT, OutPortalT, BinaryFunctor>;
vtkm::Id numVals = input.GetNumberOfValues();
Impl impl(input.PrepareForInput(DevTag()),
output.PrepareForOutput(numVals, DevTag()),
binaryFunctor,
initialValue);
return impl.Execute(vtkm::Id2(0, numVals));
}
/// \brief Unstable ascending sort of input array.
///
/// Sorts the contents of \c values so that they in ascending value. Doesn't
/// guarantee stability
///
template <typename T, class Storage>
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values)
{
Sort(values, vtkm::SortLess());
}
template <typename T, class Storage, class BinaryCompare>
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values,
BinaryCompare binary_compare)
{
openmp::sort::parallel_sort(values, binary_compare);
}
template <typename T, typename U, class StorageT, class StorageU>
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values)
{
SortByKey(keys, values, std::less<T>());
}
template <typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values,
BinaryCompare binary_compare)
{
openmp::sort::parallel_sort_bykey(keys, values, binary_compare);
}
template <typename T, class Storage>
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values)
{
Unique(values, std::equal_to<T>());
}
template <typename T, class Storage, class BinaryCompare>
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values,
BinaryCompare binary_compare)
{
auto portal = values.PrepareForInPlace(DevTag());
auto iter = vtkm::cont::ArrayPortalToIteratorBegin(portal);
using IterT = typename std::decay<decltype(iter)>::type;
using Uniqifier = openmp::UniqueHelper<IterT, BinaryCompare>;
Uniqifier uniquifier(iter, portal.GetNumberOfValues(), binary_compare);
vtkm::Id outSize = uniquifier.Execute();
values.Shrink(outSize);
}
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::openmp::internal::TaskTiling1D& functor,
vtkm::Id size);
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::openmp::internal::TaskTiling3D& functor,
vtkm::Id3 size);
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id numInstances)
{
vtkm::exec::openmp::internal::TaskTiling1D kernel(functor);
ScheduleTask(kernel, numInstances);
}
template <class FunctorType>
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 rangeMax)
{
vtkm::exec::openmp::internal::TaskTiling3D kernel(functor);
ScheduleTask(kernel, rangeMax);
}
VTKM_CONT static void Synchronize()
{
// Nothing to do. This device schedules all of its operations using a
// split/join paradigm. This means that the if the control thread is
// calling this method, then nothing should be running in the execution
// environment.
}
};
template <>
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagOpenMP>
{
public:
template <typename WorkletType, typename InvocationType>
static vtkm::exec::serial::internal::TaskTiling1D MakeTask(const WorkletType& worklet,
const InvocationType& invocation,
vtkm::Id,
vtkm::Id globalIndexOffset = 0)
{
return vtkm::exec::openmp::internal::TaskTiling1D(worklet, invocation, globalIndexOffset);
}
template <typename WorkletType, typename InvocationType>
static vtkm::exec::serial::internal::TaskTiling3D MakeTask(const WorkletType& worklet,
const InvocationType& invocation,
vtkm::Id3,
vtkm::Id globalIndexOffset = 0)
{
return vtkm::exec::openmp::internal::TaskTiling3D(worklet, invocation, globalIndexOffset);
}
};
}
} // namespace vtkm::cont
#endif //vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h

@ -0,0 +1,32 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/openmp/internal/DeviceAdapterRuntimeDetectorOpenMP.h>
namespace vtkm
{
namespace cont
{
VTKM_CONT bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagOpenMP>::Exists() const
{
using DeviceAdapterTraits = vtkm::cont::DeviceAdapterTraits<vtkm::cont::DeviceAdapterTagOpenMP>;
return DeviceAdapterTraits::Valid;
}
}
}

@ -0,0 +1,47 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterRuntimeDetector_h
#define vtk_m_cont_openmp_internal_DeviceAdapterRuntimeDetector_h
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/vtkm_cont_export.h>
namespace vtkm
{
namespace cont
{
template <class DeviceAdapterTag>
class DeviceAdapterRuntimeDetector;
/// Determine if this machine supports Serial backend
///
template <>
class VTKM_CONT_EXPORT DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagOpenMP>
{
public:
/// Returns true if the given device adapter is supported on the current
/// machine.
VTKM_CONT bool Exists() const;
};
}
}
#endif

@ -0,0 +1,32 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h
#define vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h
#include <vtkm/cont/internal/DeviceAdapterTag.h>
#ifdef VTKM_ENABLE_OPENMP
VTKM_VALID_DEVICE_ADAPTER(OpenMP, VTKM_DEVICE_ADAPTER_OPENMP)
#else
VTKM_INVALID_DEVICE_ADAPTER(OpenMP, VTKM_DEVICE_ADAPTER_OPENMP)
#endif
#endif // vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h

@ -0,0 +1,674 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_internal_FunctorsOpenMP_h
#define vtk_m_cont_openmp_internal_FunctorsOpenMP_h
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
#include <vtkm/BinaryOperators.h>
#include <vtkm/BinaryPredicates.h>
#include <vtkm/Pair.h>
#include <vtkm/Types.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ErrorExecution.h>
#include <omp.h>
#include <algorithm>
#include <type_traits>
#include <vector>
// Wrap all '#pragma omp ...' calls in this macro so we can disable them in
// non-omp builds and avoid a multitude of 'ignoring pragma..." warnings.
#ifdef _OPENMP
#define _VTKM_OPENMP_DIRECTIVE_IMPL(fullDir) _Pragma(#fullDir)
#define VTKM_OPENMP_DIRECTIVE(dir) _VTKM_OPENMP_DIRECTIVE_IMPL(omp dir)
#else // _OPENMP
#define VTKM_OPENMP_DIRECTIVE(directive)
#endif // _OPENMP
// When defined, supported type / operator combinations will use the OpenMP
// reduction(...) clause. Otherwise, all reductions use the general
// implementation with a manual reduction once the threads complete.
// I don't know how, but the benchmarks currently perform better without the
// specializations.
//#define VTKM_OPENMP_USE_NATIVE_REDUCTION
namespace vtkm
{
namespace cont
{
namespace openmp
{
constexpr static vtkm::Id CACHE_LINE_SIZE = 64;
constexpr static vtkm::Id PAGE_SIZE = 4096;
// Returns ceil(num/den) for integral types
template <typename T>
static constexpr T CeilDivide(const T& numerator, const T& denominator)
{
return (numerator + denominator - 1) / denominator;
}
// Computes the number of values per chunk. Note that numChunks + chunkSize may
// exceed numVals, so be sure to check upper limits.
static void ComputeChunkSize(const vtkm::Id numVals,
const vtkm::Id numThreads,
const vtkm::Id chunksPerThread,
const vtkm::Id bytesPerValue,
vtkm::Id& numChunks,
vtkm::Id& valuesPerChunk)
{
// try to evenly distribute pages across chunks:
const vtkm::Id bytesIn = numVals * bytesPerValue;
const vtkm::Id pagesIn = CeilDivide(bytesIn, PAGE_SIZE);
// If we don't have enough pages to honor chunksPerThread, ignore it:
numChunks = (pagesIn > numThreads * chunksPerThread) ? numThreads * chunksPerThread : numThreads;
const vtkm::Id pagesPerChunk = CeilDivide(pagesIn, numChunks);
valuesPerChunk = CeilDivide(pagesPerChunk * PAGE_SIZE, bytesPerValue);
}
template <typename T, typename U>
VTKM_EXEC_CONT static void DoCopy(T src, U dst, vtkm::Id numVals, std::true_type)
{
if (numVals)
{
std::copy(src, src + numVals, dst);
}
}
// Don't use std::copy when type conversion is required because MSVC.
template <typename InIterT, typename OutIterT>
VTKM_EXEC_CONT static void DoCopy(InIterT inIter,
OutIterT outIter,
vtkm::Id numVals,
std::false_type)
{
using ValueType = typename std::iterator_traits<OutIterT>::value_type;
for (vtkm::Id i = 0; i < numVals; ++i)
{
*(outIter++) = static_cast<ValueType>(*(inIter++));
}
}
template <typename InIterT, typename OutIterT>
VTKM_EXEC_CONT static void DoCopy(InIterT inIter, OutIterT outIter, vtkm::Id numVals)
{
using InValueType = typename std::iterator_traits<InIterT>::value_type;
using OutValueType = typename std::iterator_traits<OutIterT>::value_type;
DoCopy(inIter, outIter, numVals, std::is_same<InValueType, OutValueType>());
}
template <typename InPortalT, typename OutPortalT>
VTKM_EXEC_CONT static void CopyHelper(InPortalT inPortal,
OutPortalT outPortal,
vtkm::Id inStart,
vtkm::Id outStart,
vtkm::Id numVals)
{
using InValueT = typename InPortalT::ValueType;
using OutValueT = typename OutPortalT::ValueType;
constexpr auto isSame = std::is_same<InValueT, OutValueT>();
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(inPortal) + inStart;
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outPortal) + outStart;
vtkm::Id valuesPerChunk;
VTKM_OPENMP_DIRECTIVE(parallel default(none) shared(inIter, outIter, valuesPerChunk, numVals))
{
VTKM_OPENMP_DIRECTIVE(single)
{
// Evenly distribute full pages to all threads. We manually chunk the
// data here so that we can exploit std::copy's memmove optimizations.
vtkm::Id numChunks;
ComputeChunkSize(
numVals, omp_get_num_threads(), 8, sizeof(InValueT), numChunks, valuesPerChunk);
}
VTKM_OPENMP_DIRECTIVE(for schedule(static))
for (vtkm::Id i = 0; i < numVals; i += valuesPerChunk)
{
vtkm::Id chunkSize = std::min(numVals - i, valuesPerChunk);
DoCopy(inIter + i, outIter + i, chunkSize, isSame);
}
}
}
struct CopyIfHelper
{
vtkm::Id NumValues;
vtkm::Id NumThreads;
vtkm::Id ValueSize;
vtkm::Id NumChunks;
vtkm::Id ChunkSize;
std::vector<vtkm::Id> EndIds;
CopyIfHelper() = default;
void Initialize(vtkm::Id numValues, vtkm::Id valueSize)
{
this->NumValues = numValues;
this->NumThreads = omp_get_num_threads();
this->ValueSize = valueSize;
// Evenly distribute pages across the threads. We manually chunk the
// data here so that we can exploit std::copy's memmove optimizations.
ComputeChunkSize(
this->NumValues, this->NumThreads, 8, valueSize, this->NumChunks, this->ChunkSize);
this->EndIds.resize(this->NumChunks);
}
template <typename InIterT, typename StencilIterT, typename OutIterT, typename PredicateT>
void CopyIf(InIterT inIter,
StencilIterT stencilIter,
OutIterT outIter,
PredicateT pred,
vtkm::Id chunk)
{
vtkm::Id startPos = std::min(chunk * this->ChunkSize, this->NumValues);
vtkm::Id endPos = std::min((chunk + 1) * this->ChunkSize, this->NumValues);
vtkm::Id outPos = startPos;
for (vtkm::Id inPos = startPos; inPos < endPos; ++inPos)
{
if (pred(stencilIter[inPos]))
{
outIter[outPos++] = inIter[inPos];
}
}
this->EndIds[chunk] = outPos;
}
template <typename OutIterT>
vtkm::Id Reduce(OutIterT data)
{
vtkm::Id endPos = this->EndIds.front();
for (vtkm::Id i = 1; i < this->NumChunks; ++i)
{
vtkm::Id chunkStart = std::min(i * this->ChunkSize, this->NumValues);
vtkm::Id chunkEnd = this->EndIds[i];
vtkm::Id numValuesToCopy = chunkEnd - chunkStart;
if (numValuesToCopy > 0 && chunkStart != endPos)
{
std::copy(data + chunkStart, data + chunkEnd, data + endPos);
}
endPos += numValuesToCopy;
}
return endPos;
}
};
#ifdef VTKM_OPENMP_USE_NATIVE_REDUCTION
// OpenMP only declares reduction operations for primitive types. This utility
// detects if a type T is supported.
template <typename T>
struct OpenMPReductionSupported : std::false_type
{
};
template <>
struct OpenMPReductionSupported<Int8> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<UInt8> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<Int16> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<UInt16> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<Int32> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<UInt32> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<Int64> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<UInt64> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<Float32> : std::true_type
{
};
template <>
struct OpenMPReductionSupported<Float64> : std::true_type
{
};
#else
template <typename T>
using OpenMPReductionSupported = std::false_type;
#endif // VTKM_OPENMP_USE_NATIVE_REDUCTION
struct ReduceHelper
{
// Generic implementation:
template <typename PortalT, typename ReturnType, typename Functor>
static ReturnType Execute(PortalT portal, ReturnType init, Functor functorIn, std::false_type)
{
internal::WrappedBinaryOperator<ReturnType, Functor> f(functorIn);
const vtkm::Id numVals = portal.GetNumberOfValues();
auto data = vtkm::cont::ArrayPortalToIteratorBegin(portal);
bool doParallel = false;
std::vector<ReturnType> threadData;
VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(f)
shared(data, threadData, doParallel))
{
int tid = omp_get_thread_num();
int numThreads = omp_get_num_threads();
VTKM_OPENMP_DIRECTIVE(single)
{
if (numVals >= numThreads * 2)
{
doParallel = true;
threadData.resize(numThreads);
}
}
if (doParallel)
{
// Use the first (numThreads*2) values for initializing:
ReturnType accum;
accum = f(data[2 * tid], data[2 * tid + 1]);
// Assign each thread chunks of the remaining values for local reduction
VTKM_OPENMP_DIRECTIVE(for schedule(static))
for (vtkm::Id i = numThreads * 2; i < numVals; i++)
{
accum = f(accum, data[i]);
}
threadData[tid] = accum;
}
} // end parallel
if (doParallel)
{
// do the final reduction serially:
for (size_t i = 0; i < threadData.size(); ++i)
{
init = f(init, threadData[i]);
}
}
else
{
// Not enough threads. Do the entire reduction in serial:
for (vtkm::Id i = 0; i < numVals; ++i)
{
init = f(init, data[i]);
}
}
return init;
}
#ifdef VTKM_OPENMP_USE_NATIVE_REDUCTION
// Specialize for vtkm functors with OpenMP special cases:
#define VTKM_OPENMP_SPECIALIZE_REDUCE1(FunctorType, PragmaString) \
template <typename PortalT, typename ReturnType> \
static ReturnType Execute( \
PortalT portal, ReturnType value, FunctorType functorIn, std::true_type) \
{ \
const vtkm::Id numValues = portal.GetNumberOfValues(); \
internal::WrappedBinaryOperator<ReturnType, FunctorType> f(functorIn); \
_Pragma(#PragmaString) for (vtkm::Id i = 0; i < numValues; ++i) \
{ \
value = f(value, portal.Get(i)); \
} \
return value; \
}
// Constructing the pragma string inside the _Pragma call doesn't work so
// we jump through a hoop:
#define VTKM_OPENMP_SPECIALIZE_REDUCE(FunctorType, Operator) \
VTKM_OPENMP_SPECIALIZE_REDUCE1(FunctorType, "omp parallel for reduction(" #Operator ":value)")
// + (Add, Sum)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Add, +)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Sum, +)
// * (Multiply, Product)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Multiply, *)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Product, *)
// - (Subtract)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Subtract, -)
// & (BitwiseAnd)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseAnd, &)
// | (BitwiseOr)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseOr, |)
// ^ (BitwiseXor)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseXor, ^)
// && (LogicalAnd)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::LogicalAnd, &&)
// || (LogicalOr)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::LogicalOr, ||)
// min (Minimum)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Minimum, min)
// max (Maximum)
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Maximum, max)
#undef VTKM_OPENMP_SPECIALIZE_REDUCE
#undef VTKM_OPENMP_SPECIALIZE_REDUCE1
#endif // VTKM_OPENMP_USE_NATIVE_REDUCTION
};
template <typename KeysInArray,
typename ValuesInArray,
typename KeysOutArray,
typename ValuesOutArray,
typename BinaryFunctor>
void ReduceByKeyHelper(KeysInArray keysInArray,
ValuesInArray valuesInArray,
KeysOutArray keysOutArray,
ValuesOutArray valuesOutArray,
BinaryFunctor functor)
{
using KeyType = typename KeysInArray::ValueType;
using ValueType = typename ValuesInArray::ValueType;
const vtkm::Id numValues = keysInArray.GetNumberOfValues();
auto keysInPortal = keysInArray.PrepareForInput(DeviceAdapterTagOpenMP());
auto valuesInPortal = valuesInArray.PrepareForInput(DeviceAdapterTagOpenMP());
auto keysIn = vtkm::cont::ArrayPortalToIteratorBegin(keysInPortal);
auto valuesIn = vtkm::cont::ArrayPortalToIteratorBegin(valuesInPortal);
auto keysOutPortal = keysOutArray.PrepareForOutput(numValues, DeviceAdapterTagOpenMP());
auto valuesOutPortal = valuesOutArray.PrepareForOutput(numValues, DeviceAdapterTagOpenMP());
auto keysOut = vtkm::cont::ArrayPortalToIteratorBegin(keysOutPortal);
auto valuesOut = vtkm::cont::ArrayPortalToIteratorBegin(valuesOutPortal);
internal::WrappedBinaryOperator<ValueType, BinaryFunctor> f(functor);
vtkm::Id outIdx = 0;
VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(keysIn, valuesIn, keysOut, valuesOut, f)
shared(outIdx))
{
int tid = omp_get_thread_num();
int numThreads = omp_get_num_threads();
// Determine bounds for this thread's scan operation:
vtkm::Id chunkSize = (numValues + numThreads - 1) / numThreads;
vtkm::Id scanIdx = std::min(tid * chunkSize, numValues);
vtkm::Id scanEnd = std::min(scanIdx + chunkSize, numValues);
auto threadKeysBegin = keysOut + scanIdx;
auto threadValuesBegin = valuesOut + scanIdx;
auto threadKey = threadKeysBegin;
auto threadValue = threadValuesBegin;
// Reduce each thread's partition:
KeyType rangeKey;
ValueType rangeValue;
for (;;)
{
if (scanIdx < scanEnd)
{
rangeKey = keysIn[scanIdx];
rangeValue = valuesIn[scanIdx];
++scanIdx;
// Locate end of current range:
while (scanIdx < scanEnd && static_cast<KeyType>(keysIn[scanIdx]) == rangeKey)
{
rangeValue = f(rangeValue, valuesIn[scanIdx]);
++scanIdx;
}
*threadKey = rangeKey;
*threadValue = rangeValue;
++threadKey;
++threadValue;
}
else
{
break;
}
}
if (tid == 0)
{
outIdx = static_cast<vtkm::Id>(threadKey - threadKeysBegin);
}
// Combine the reduction results. Skip tid == 0, since it's already in
// the correct location:
for (int i = 1; i < numThreads; ++i)
{
// This barrier ensures that:
// 1) Threads remain synchronized through this final reduction loop.
// 2) The outIdx variable is initialized by thread 0.
// 3) All threads have reduced their partitions.
VTKM_OPENMP_DIRECTIVE(barrier)
if (tid == i)
{
// Check if the previous thread's last key matches our first:
if (outIdx > 0 && threadKeysBegin < threadKey && keysOut[outIdx - 1] == *threadKeysBegin)
{
valuesOut[outIdx - 1] = f(valuesOut[outIdx - 1], *threadValuesBegin);
++threadKeysBegin;
++threadValuesBegin;
}
// Copy reduced partition to final location (if needed)
if (threadKeysBegin < threadKey && threadKeysBegin != keysOut + outIdx)
{
std::copy(threadKeysBegin, threadKey, keysOut + outIdx);
std::copy(threadValuesBegin, threadValue, valuesOut + outIdx);
}
outIdx += static_cast<vtkm::Id>(threadKey - threadKeysBegin);
} // end tid == i
} // end combine reduction
} // end parallel
keysOutArray.Shrink(outIdx);
valuesOutArray.Shrink(outIdx);
}
template <typename IterT, typename RawPredicateT>
struct UniqueHelper
{
using ValueType = typename std::iterator_traits<IterT>::value_type;
using PredicateT = internal::WrappedBinaryOperator<bool, RawPredicateT>;
struct Node
{
vtkm::Id2 InputRange{ -1, -1 };
vtkm::Id2 OutputRange{ -1, -1 };
// Pad the node out to the size of a cache line to prevent false sharing:
static constexpr size_t DataSize = 2 * sizeof(vtkm::Id2);
static constexpr size_t NumCacheLines = CeilDivide<size_t>(DataSize, CACHE_LINE_SIZE);
static constexpr size_t PaddingSize = NumCacheLines * CACHE_LINE_SIZE - DataSize;
unsigned char Padding[PaddingSize];
};
IterT Data;
vtkm::Id NumValues;
PredicateT Predicate;
vtkm::Id LeafSize;
std::vector<Node> Nodes;
size_t NextNode;
UniqueHelper(IterT iter, vtkm::Id numValues, RawPredicateT pred)
: Data(iter)
, NumValues(numValues)
, Predicate(pred)
, LeafSize(0)
, NextNode(0)
{
}
vtkm::Id Execute()
{
vtkm::Id outSize = 0;
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
{
VTKM_OPENMP_DIRECTIVE(single)
{
this->Prepare();
// Kick off task-based divide-and-conquer uniquification:
Node* rootNode = this->AllocNode();
rootNode->InputRange = vtkm::Id2(0, this->NumValues);
this->Uniquify(rootNode);
outSize = rootNode->OutputRange[1] - rootNode->OutputRange[0];
}
}
return outSize;
}
private:
void Prepare()
{
// Figure out how many values each thread should handle:
int numThreads = omp_get_num_threads();
vtkm::Id chunksPerThread = 8;
vtkm::Id numChunks;
ComputeChunkSize(
this->NumValues, numThreads, chunksPerThread, sizeof(ValueType), numChunks, this->LeafSize);
// Compute an upper-bound of the number of nodes in the tree:
size_t numNodes = numChunks;
while (numChunks > 1)
{
numChunks = (numChunks + 1) / 2;
numNodes += numChunks;
}
this->Nodes.resize(numNodes);
this->NextNode = 0;
}
Node* AllocNode()
{
size_t nodeIdx;
// GCC emits a false positive "value computed but not used" for this block:
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-value"
VTKM_OPENMP_DIRECTIVE(atomic capture)
{
nodeIdx = this->NextNode;
++this->NextNode;
}
#pragma GCC diagnostic pop
VTKM_ASSERT(nodeIdx < this->Nodes.size());
return &this->Nodes[nodeIdx];
}
bool IsLeaf(const vtkm::Id2& range) { return (range[1] - range[0]) <= this->LeafSize; }
// Not an strict midpoint, but ensures that the first range will always be
// a multiple of the leaf size.
vtkm::Id ComputeMidpoint(const vtkm::Id2& range)
{
const vtkm::Id n = range[1] - range[0];
const vtkm::Id np = this->LeafSize;
return CeilDivide(n / 2, np) * np + range[0];
}
void Uniquify(Node* node)
{
if (!this->IsLeaf(node->InputRange))
{
vtkm::Id midpoint = this->ComputeMidpoint(node->InputRange);
Node* right = this->AllocNode();
Node* left = this->AllocNode();
right->InputRange = vtkm::Id2(midpoint, node->InputRange[1]);
// Intel compilers seem to have trouble following the 'this' pointer
// when launching tasks, resulting in a corrupt task environment.
// Explicitly copying the pointer into a local variable seems to fix this.
auto explicitThis = this;
VTKM_OPENMP_DIRECTIVE(taskgroup)
{
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->Uniquify(right); }
left->InputRange = vtkm::Id2(node->InputRange[0], midpoint);
this->Uniquify(left);
} // end taskgroup. Both sides of the tree will be completed here.
// Combine the ranges in the left side:
if (this->Predicate(this->Data[left->OutputRange[1] - 1], this->Data[right->OutputRange[0]]))
{
++right->OutputRange[0];
}
vtkm::Id numVals = right->OutputRange[1] - right->OutputRange[0];
DoCopy(this->Data + right->OutputRange[0], this->Data + left->OutputRange[1], numVals);
node->OutputRange[0] = left->OutputRange[0];
node->OutputRange[1] = left->OutputRange[1] + numVals;
}
else
{
auto start = this->Data + node->InputRange[0];
auto end = this->Data + node->InputRange[1];
end = std::unique(start, end, this->Predicate);
node->OutputRange[0] = node->InputRange[0];
node->OutputRange[1] = node->InputRange[0] + static_cast<vtkm::Id>(end - start);
}
}
};
}
}
} // end namespace vtkm::cont::openmp
#endif // vtk_m_cont_openmp_internal_FunctorsOpenMP_h

@ -0,0 +1,271 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
#include <vtkm/Types.h>
#include <vtkm/cont/ArrayHandle.h>
#include <omp.h>
#include <iterator>
namespace vtkm
{
namespace cont
{
namespace openmp
{
namespace sort
{
namespace quick
{
template <typename IterType, typename RawBinaryCompare>
struct QuickSorter
{
using BinaryCompare = vtkm::cont::internal::WrappedBinaryOperator<bool, RawBinaryCompare>;
using ValueType = typename std::iterator_traits<IterType>::value_type;
IterType Data;
BinaryCompare Compare;
vtkm::Id SerialSize;
QuickSorter(IterType iter, RawBinaryCompare comp)
: Data(iter)
, Compare(comp)
, SerialSize(0)
{
}
void Execute(const vtkm::Id2 range)
{
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
{
VTKM_OPENMP_DIRECTIVE(single)
{
this->Prepare(range);
this->Sort(range);
}
}
}
private:
void Prepare(const vtkm::Id2 /*range*/)
{
// Rough benchmarking on an 4-core+4HT processor shows that this sort is
// most efficient (within 5% of TBB sort) when we switch to a serial
// implementation once a partition is less than 32K keys
this->SerialSize = 32768;
}
vtkm::Pair<vtkm::Id, ValueType> MedianOf3(const vtkm::Pair<vtkm::Id, ValueType>& v1,
const vtkm::Pair<vtkm::Id, ValueType>& v2,
const vtkm::Pair<vtkm::Id, ValueType>& v3) const
{
if (this->Compare(v1.second, v2.second))
{ // v1 < v2
if (this->Compare(v1.second, v3.second))
{ // v1 < v3
if (this->Compare(v2.second, v3.second))
{ // v1 < v2 < v3
return v2;
}
else // v3 < v2
{ // v1 < v3 < v2
return v3;
}
}
else // v3 < v1
{ // v3 < v1 < v2
return v1;
}
}
else
{ // v2 < v1
if (this->Compare(v2.second, v3.second))
{ // v2 < v3
if (this->Compare(v1.second, v3.second))
{ // v2 < v1 < v3
return v1;
}
else
{ // v2 < v3 < v1
return v3;
}
}
else
{ // v3 < v2 < v1
return v2;
}
}
}
vtkm::Pair<vtkm::Id, ValueType> MedianOf3(const vtkm::Id ids[3]) const
{
return this->MedianOf3(vtkm::Pair<vtkm::Id, ValueType>(ids[0], this->Data[ids[0]]),
vtkm::Pair<vtkm::Id, ValueType>(ids[1], this->Data[ids[1]]),
vtkm::Pair<vtkm::Id, ValueType>(ids[2], this->Data[ids[2]]));
}
vtkm::Pair<vtkm::Id, ValueType> PseudoMedianOf9(const vtkm::Id ids[9]) const
{
return this->MedianOf3(
this->MedianOf3(ids), this->MedianOf3(ids + 3), this->MedianOf3(ids + 6));
}
// Approximate the median of the range and return its index.
vtkm::Pair<vtkm::Id, ValueType> SelectPivot(const vtkm::Id2 range) const
{
const vtkm::Id numVals = range[1] - range[0];
assert(numVals >= 9);
// Pseudorandomize the pivot locations to avoid issues with periodic data
// (evenly sampling inputs with periodic values tends to cause the same
// value to be obtained for all samples)
const vtkm::Id seed = range[0] * 3 / 2 + range[1] * 11 / 3 + numVals * 10 / 7;
const vtkm::Id delta = (numVals / 9) * 4 / 3;
vtkm::Id sampleLocations[9] = {
range[0] + ((seed + 0 * delta) % numVals), range[0] + ((seed + 1 * delta) % numVals),
range[0] + ((seed + 2 * delta) % numVals), range[0] + ((seed + 3 * delta) % numVals),
range[0] + ((seed + 4 * delta) % numVals), range[0] + ((seed + 5 * delta) % numVals),
range[0] + ((seed + 6 * delta) % numVals), range[0] + ((seed + 7 * delta) % numVals),
range[0] + ((seed + 8 * delta) % numVals)
};
return this->PseudoMedianOf9(sampleLocations);
}
// Select a pivot and partition data with it, returning the final location of
// the pivot element(s). We use Bentley-McIlroy three-way partitioning to
// improve handling of duplicate keys, so the pivot "location" is actually
// a range of identical keys, hence the vtkm::Id2 return type, which mark
// the [begin, end) of the pivot range.
vtkm::Id2 PartitionData(const vtkm::Id2 range)
{
using namespace std; // For ADL swap
const vtkm::Pair<vtkm::Id, ValueType> pivotData = this->SelectPivot(range);
const vtkm::Id& origPivotIdx = pivotData.first;
const ValueType& pivotVal = pivotData.second;
// Move the pivot to the end of the block while we partition the rest:
swap(this->Data[origPivotIdx], this->Data[range[1] - 1]);
// Indices of the last partitioned keys:
vtkm::Id2 dataCursors(range[0] - 1, range[1] - 1);
// Indices of the start/end of the keys equal to the pivot:
vtkm::Id2 pivotCursors(dataCursors);
for (;;)
{
// Advance the data cursors past all keys that are already partitioned:
while (this->Compare(this->Data[++dataCursors[0]], pivotVal))
;
while (this->Compare(pivotVal, this->Data[--dataCursors[1]]) && dataCursors[1] > range[0])
;
// Range is partitioned the cursors have crossed:
if (dataCursors[0] >= dataCursors[1])
{
break;
}
// Both dataCursors are pointing at incorrectly partitioned keys. Swap
// them to place them in the proper partitions:
swap(this->Data[dataCursors[0]], this->Data[dataCursors[1]]);
// If the elements we just swapped are actually equivalent to the pivot
// value, move them to the pivot storage locations:
if (!this->Compare(this->Data[dataCursors[0]], pivotVal))
{
++pivotCursors[0];
swap(this->Data[pivotCursors[0]], this->Data[dataCursors[0]]);
}
if (!this->Compare(pivotVal, this->Data[dataCursors[1]]))
{
--pivotCursors[1];
swap(this->Data[pivotCursors[1]], this->Data[dataCursors[1]]);
}
}
// Data is now partitioned as:
// | Equal | Less | Greater | Equal |
// Move the equal keys to the middle for the final partitioning:
// | Less | Equal | Greater |
// First the original pivot value at the end:
swap(this->Data[range[1] - 1], this->Data[dataCursors[0]]);
// Update the cursors to either side of the pivot:
dataCursors = vtkm::Id2(dataCursors[0] - 1, dataCursors[0] + 1);
for (vtkm::Id i = range[0]; i < pivotCursors[0]; ++i, --dataCursors[0])
{
swap(this->Data[i], this->Data[dataCursors[0]]);
}
for (vtkm::Id i = range[1] - 2; i > pivotCursors[1]; --i, ++dataCursors[1])
{
swap(this->Data[i], this->Data[dataCursors[1]]);
}
// Adjust the cursor so we can use them to construct the regions for the
// recursive call:
++dataCursors[0];
return dataCursors;
}
void Sort(const vtkm::Id2 range)
{
const vtkm::Id numVals = range[1] - range[0];
if (numVals <= this->SerialSize)
{
std::sort(this->Data + range[0], this->Data + range[1], this->Compare);
return;
}
const vtkm::Id2 pivots = this->PartitionData(range);
const vtkm::Id2 lhRange = vtkm::Id2(range[0], pivots[0]);
const vtkm::Id2 rhRange = vtkm::Id2(pivots[1], range[1]);
// Intel compilers seem to have trouble following the 'this' pointer
// when launching tasks, resulting in a corrupt task environment.
// Explicitly copying the pointer into a local variable seems to fix this.
auto explicitThis = this;
VTKM_OPENMP_DIRECTIVE(task default(none) firstprivate(rhRange, explicitThis))
{
explicitThis->Sort(rhRange);
}
this->Sort(lhRange);
}
};
}
} // end namespace sort::quick
}
}
} // end namespace vtkm::cont::openmp

@ -0,0 +1,88 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/internal/ParallelRadixSort.h>
#include <omp.h>
namespace vtkm
{
namespace cont
{
namespace openmp
{
namespace sort
{
namespace radix
{
struct RadixThreaderOpenMP
{
size_t GetAvailableCores() const
{
size_t result;
if (omp_in_parallel())
{
result = static_cast<size_t>(omp_get_num_threads());
}
else
{
#pragma omp parallel
{
result = static_cast<size_t>(omp_get_num_threads());
}
}
return result;
}
template <typename TaskType>
void RunParentTask(TaskType task)
{
assert(!omp_in_parallel());
#pragma omp parallel default(none) shared(task)
{
#pragma omp single
{
task();
}
} // Implied barrier ensures that child tasks will finish.
}
template <typename TaskType, typename ThreadData>
void RunChildTasks(ThreadData, TaskType left, TaskType right)
{
assert(omp_in_parallel());
#pragma omp task default(none) firstprivate(right)
{
right();
}
// Execute the left task in the existing thread.
left();
}
};
VTKM_INSTANTIATE_RADIX_SORT_FOR_THREADER(RadixThreaderOpenMP)
}
} // end namespace sort::radix
}
}
} // end namespace vtkm::cont::openmp

@ -0,0 +1,44 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h
#define vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h
#include <vtkm/cont/internal/ParallelRadixSortInterface.h>
namespace vtkm
{
namespace cont
{
namespace openmp
{
namespace sort
{
namespace radix
{
VTKM_DECLARE_RADIX_SORT()
}
}
}
}
} // end namespace vtkm::cont::openmp::sort::radix
#endif // vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h

@ -0,0 +1,489 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
#include <vtkm/cont/internal/FunctorsGeneral.h>
#include <vtkm/Types.h>
#include <vtkm/cont/ArrayHandle.h>
#include <omp.h>
namespace vtkm
{
namespace cont
{
namespace openmp
{
namespace scan
{
enum class ChildType
{
Left,
Right
};
// Generic implementation of modified Ladner & Fischer 1977 "adder" algorithm
// used for backbone of exclusive/inclusive scans. Language in comments is
// specific to computing a sum, but the implementation should be generic enough
// for any scan operation.
//
// The basic idea is that a tree structure is used to partition the input into
// sets of LeafSize. Each leaf of the tree is processed in two stages: First,
// the sum of each leaf is computed, and this information is pushed up the tree
// to compute the sum of each node's child leaves. Then the partial sum at the
// start of each node is computed and pushed down the tree (the "carry"
// values). In the second pass through each leaf's data, these partial sums are
// used to compute the final output from the carry value and the input data.
//
// The passes will likely overlap due to the "leftEdge" optimizations, which
// allow each leaf to start the second pass as soon as the first pass of all
// previous leaves is completed. Additionally, the first leaf in the data will
// combine both passes into one, computing the final output data while
// generating its sum for the communication stage.
template <typename ScanBody>
struct Adder : public ScanBody
{
template <typename NodeImpl>
struct NodeWrapper : public NodeImpl
{
// Range of IDs this node represents
vtkm::Id2 Range{ -1, -1 };
// Connections:
NodeWrapper* Parent{ nullptr };
NodeWrapper* Left{ nullptr };
NodeWrapper* Right{ nullptr };
// Special flag to mark nodes on the far left edge of the tree. This allows
// various optimization that start the second pass sooner on some ranges.
bool LeftEdge{ false };
// Pad the node out to the size of a cache line to prevent false sharing:
static constexpr size_t DataSize =
sizeof(NodeImpl) + sizeof(vtkm::Id2) + 3 * sizeof(NodeWrapper*) + sizeof(bool);
static constexpr size_t NumCacheLines = CeilDivide<size_t>(DataSize, CACHE_LINE_SIZE);
static constexpr size_t PaddingSize = NumCacheLines * CACHE_LINE_SIZE - DataSize;
unsigned char Padding[PaddingSize];
};
using Node = NodeWrapper<typename ScanBody::Node>;
using ValueType = typename ScanBody::ValueType;
vtkm::Id LeafSize;
std::vector<Node> Nodes;
size_t NextNode;
// Use ScanBody's ctor:
using ScanBody::ScanBody;
// Returns the total array sum:
ValueType Execute(const vtkm::Id2& range)
{
Node* rootNode = nullptr;
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
{
VTKM_OPENMP_DIRECTIVE(single)
{
// Allocate nodes, prep metadata:
this->Prepare(range);
// Compute the partition and node sums:
rootNode = this->AllocNode();
rootNode->Range = range;
rootNode->LeftEdge = true;
ScanBody::InitializeRootNode(rootNode);
this->Scan(rootNode);
} // end single
} // end parallel
return rootNode ? ScanBody::GetFinalResult(rootNode)
: vtkm::TypeTraits<ValueType>::ZeroInitialization();
}
private:
// Returns the next available node in a thread-safe manner.
Node* AllocNode()
{
size_t nodeIdx;
// GCC emits a false positive "value computed but not used" for this block:
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-value"
VTKM_OPENMP_DIRECTIVE(atomic capture)
{
nodeIdx = this->NextNode;
++this->NextNode;
}
#pragma GCC diagnostic pop
VTKM_ASSERT(nodeIdx < this->Nodes.size());
return &this->Nodes[nodeIdx];
}
// Does the range represent a leave node?
bool IsLeaf(const vtkm::Id2& range) const { return (range[1] - range[0]) <= this->LeafSize; }
// Use to split ranges. Ensures that the first range is always a multiple of
// LeafSize, when possible.
vtkm::Id ComputeMidpoint(const vtkm::Id2& range) const
{
const vtkm::Id n = range[1] - range[0];
const vtkm::Id np = this->LeafSize;
return (((n / 2) + (np - 1)) / np) * np + range[0];
}
void Prepare(const vtkm::Id2& range)
{
// Figure out how many values each thread should handle:
vtkm::Id numVals = range[1] - range[0];
int numThreads = omp_get_num_threads();
vtkm::Id chunksPerThread = 8;
vtkm::Id numChunks;
ComputeChunkSize(
numVals, numThreads, chunksPerThread, sizeof(ValueType), numChunks, this->LeafSize);
// Compute an upper-bound of the number of nodes in the tree:
size_t numNodes = numChunks;
while (numChunks > 1)
{
numChunks = (numChunks + 1) / 2;
numNodes += numChunks;
}
this->Nodes.resize(numNodes);
this->NextNode = 0;
}
// Build the tree and compute the sums:
void Scan(Node* node)
{
if (!this->IsLeaf(node->Range))
{ // split range:
vtkm::Id midpoint = this->ComputeMidpoint(node->Range);
Node* right = this->AllocNode();
right->Parent = node;
node->Right = right;
right->Range = vtkm::Id2(midpoint, node->Range[1]);
ScanBody::InitializeChildNode(right, node, ChildType::Right, false);
// Intel compilers seem to have trouble following the 'this' pointer
// when launching tasks, resulting in a corrupt task environment.
// Explicitly copying the pointer into a local variable seems to fix this.
auto explicitThis = this;
VTKM_OPENMP_DIRECTIVE(taskgroup)
{
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->Scan(right); } // end right task
Node* left = this->AllocNode();
left->Parent = node;
node->Left = left;
left->Range = vtkm::Id2(node->Range[0], midpoint);
left->LeftEdge = node->LeftEdge;
ScanBody::InitializeChildNode(left, node, ChildType::Left, left->LeftEdge);
this->Scan(left);
} // end task group. Both l/r sums will be finished here.
ScanBody::CombineSummaries(node, node->Left, node->Right);
if (node->LeftEdge)
{
this->UpdateOutput(node);
}
}
else
{ // Compute sums:
ScanBody::ComputeSummary(node, node->Range, node->LeftEdge);
}
}
void UpdateOutput(Node* node)
{
if (node->Left != nullptr)
{
assert(node->Right != nullptr);
ScanBody::PropagateSummaries(node, node->Left, node->Right, node->LeftEdge);
// if this node is on the left edge, we know that the left child's
// output is already updated, so only descend to the right:
if (node->LeftEdge)
{
this->UpdateOutput(node->Right);
}
else // Otherwise descent into both:
{
// Intel compilers seem to have trouble following the 'this' pointer
// when launching tasks, resulting in a corrupt task environment.
// Explicitly copying the pointer into a local variable seems to fix
// this.
auto explicitThis = this;
// no taskgroup/sync needed other than the final barrier of the parallel
// section.
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->UpdateOutput(node->Right); } // end task
this->UpdateOutput(node->Left);
}
}
else
{
ScanBody::UpdateOutput(node, node->Range, node->LeftEdge);
}
}
};
template <typename InPortalT, typename OutPortalT, typename RawFunctorT>
struct ScanExclusiveBody
{
using ValueType = typename InPortalT::ValueType;
using FunctorType = internal::WrappedBinaryOperator<ValueType, RawFunctorT>;
InPortalT InPortal;
OutPortalT OutPortal;
FunctorType Functor;
ValueType InitialValue;
struct Node
{
// Sum of all values in range
ValueType Sum{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
// The sum of all elements prior to this node's range
ValueType Carry{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
};
ScanExclusiveBody(const InPortalT& inPortal,
const OutPortalT& outPortal,
const RawFunctorT& functor,
const ValueType& init)
: InPortal(inPortal)
, OutPortal(outPortal)
, Functor(functor)
, InitialValue(init)
{
}
// Initialize the root of the node tree
void InitializeRootNode(Node* /*root*/) {}
void InitializeChildNode(Node* /*node*/,
const Node* /*parent*/,
ChildType /*type*/,
bool /*leftEdge*/)
{
}
void ComputeSummary(Node* node, const vtkm::Id2& range, bool leftEdge)
{
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
node->Sum = input[range[0]];
// If this block is on the left edge, we can update the output while we
// compute the sum:
if (leftEdge)
{
// Set leftEdge arg to false to force the update:
node->Sum = UpdateOutputImpl(node, range, false, true);
}
else // Otherwise, only compute the sum and update the output in pass 2.
{
for (vtkm::Id i = range[0] + 1; i < range[1]; ++i)
{
node->Sum = this->Functor(node->Sum, input[i]);
}
}
}
void CombineSummaries(Node* parent, const Node* left, const Node* right)
{
parent->Sum = this->Functor(left->Sum, right->Sum);
}
void PropagateSummaries(const Node* parent, Node* left, Node* right, bool leftEdge)
{
left->Carry = parent->Carry;
right->Carry = leftEdge ? left->Sum : this->Functor(parent->Carry, left->Sum);
}
void UpdateOutput(const Node* node, const vtkm::Id2& range, bool leftEdge)
{
this->UpdateOutputImpl(node, range, leftEdge, false);
}
ValueType UpdateOutputImpl(const Node* node, const vtkm::Id2& range, bool skip, bool useInit)
{
if (skip)
{
// Do nothing; this was already done in ComputeSummary.
return vtkm::TypeTraits<ValueType>::ZeroInitialization();
}
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
auto output = vtkm::cont::ArrayPortalToIteratorBegin(this->OutPortal);
// Be careful with the order input/output are modified. They might be
// pointing at the same data:
ValueType carry = useInit ? this->InitialValue : node->Carry;
vtkm::Id end = range[1];
for (vtkm::Id i = range[0]; i < end; ++i)
{
output[i] = this->Functor(carry, input[i]);
using std::swap; // Enable ADL
swap(output[i], carry);
}
return carry;
}
// Compute the final sum from the node's metadata:
ValueType GetFinalResult(const Node* node) const { return this->Functor(node->Sum, node->Carry); }
};
template <typename InPortalT, typename OutPortalT, typename RawFunctorT>
struct ScanInclusiveBody
{
using ValueType = typename InPortalT::ValueType;
using FunctorType = internal::WrappedBinaryOperator<ValueType, RawFunctorT>;
InPortalT InPortal;
OutPortalT OutPortal;
FunctorType Functor;
struct Node
{
// Sum of all values in range
ValueType Sum{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
// The sum of all elements prior to this node's range
ValueType Carry{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
};
ScanInclusiveBody(const InPortalT& inPortal,
const OutPortalT& outPortal,
const RawFunctorT& functor)
: InPortal(inPortal)
, OutPortal(outPortal)
, Functor(functor)
{
}
// Initialize the root of the node tree
void InitializeRootNode(Node*)
{
// no-op
}
void InitializeChildNode(Node*, const Node*, ChildType, bool)
{
// no-op
}
void ComputeSummary(Node* node, const vtkm::Id2& range, bool leftEdge)
{
// If this block is on the left edge, we can update the output while we
// compute the sum:
if (leftEdge)
{
node->Sum = UpdateOutputImpl(node, range, false, false);
}
else // Otherwise, only compute the sum and update the output in pass 2.
{
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
node->Sum = input[range[0]];
for (vtkm::Id i = range[0] + 1; i < range[1]; ++i)
{
node->Sum = this->Functor(node->Sum, input[i]);
}
}
}
void CombineSummaries(Node* parent, const Node* left, const Node* right)
{
parent->Sum = this->Functor(left->Sum, right->Sum);
}
void PropagateSummaries(const Node* parent, Node* left, Node* right, bool leftEdge)
{
left->Carry = parent->Carry;
right->Carry = leftEdge ? left->Sum : this->Functor(parent->Carry, left->Sum);
}
void UpdateOutput(const Node* node, const vtkm::Id2& range, bool leftEdge)
{
UpdateOutputImpl(node, range, leftEdge, true);
}
ValueType UpdateOutputImpl(const Node* node, const vtkm::Id2& range, bool skip, bool useCarry)
{
if (skip)
{
// Do nothing; this was already done in ComputeSummary.
return vtkm::TypeTraits<ValueType>::ZeroInitialization();
}
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
auto output = vtkm::cont::ArrayPortalToIteratorBegin(this->OutPortal);
vtkm::Id start = range[0];
vtkm::Id end = range[1];
ValueType carry = node->Carry;
// Initialize with the first value if this is the first range:
if (!useCarry && start < end)
{
carry = input[start];
output[start] = carry;
++start;
}
for (vtkm::Id i = start; i < end; ++i)
{
output[i] = this->Functor(carry, input[i]);
carry = output[i];
}
return output[end - 1];
}
// Compute the final sum from the node's metadata:
ValueType GetFinalResult(const Node* node) const { return node->Sum; }
};
} // end namespace scan
template <typename InPortalT, typename OutPortalT, typename FunctorT>
using ScanExclusiveHelper = scan::Adder<scan::ScanExclusiveBody<InPortalT, OutPortalT, FunctorT>>;
template <typename InPortalT, typename OutPortalT, typename FunctorT>
using ScanInclusiveHelper = scan::Adder<scan::ScanInclusiveBody<InPortalT, OutPortalT, FunctorT>>;
}
}
} // end namespace vtkm::cont::openmp

@ -0,0 +1,251 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
#include <vtkm/cont/openmp/internal/ParallelQuickSortOpenMP.h>
#include <vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.h>
#include <vtkm/BinaryPredicates.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/ArrayHandleIndex.h>
#include <vtkm/cont/ArrayHandleZip.h>
#include <omp.h>
namespace vtkm
{
namespace cont
{
namespace openmp
{
namespace sort
{
// Forward declare entry points (See stack overflow discussion 7255281 --
// templated overloads of template functions are not specialization, and will
// be resolved during the first phase of two part lookup).
template <typename T, typename Container, class BinaryCompare>
void parallel_sort(vtkm::cont::ArrayHandle<T, Container>&, BinaryCompare);
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>&,
vtkm::cont::ArrayHandle<U, StorageU>&,
BinaryCompare);
// Quicksort values:
template <typename HandleType, class BinaryCompare>
void parallel_sort(HandleType& values,
BinaryCompare binary_compare,
vtkm::cont::internal::radix::PSortTag)
{
auto portal = values.PrepareForInPlace(DeviceAdapterTagOpenMP());
auto iter = vtkm::cont::ArrayPortalToIteratorBegin(portal);
vtkm::Id2 range(0, values.GetNumberOfValues());
using IterType = typename std::decay<decltype(iter)>::type;
using Sorter = quick::QuickSorter<IterType, BinaryCompare>;
Sorter sorter(iter, binary_compare);
sorter.Execute(range);
}
// Radix sort values:
template <typename T, typename StorageT, class BinaryCompare>
void parallel_sort(vtkm::cont::ArrayHandle<T, StorageT>& values,
BinaryCompare binary_compare,
vtkm::cont::internal::radix::RadixSortTag)
{
auto c = vtkm::cont::internal::radix::get_std_compare(binary_compare, T{});
radix::parallel_radix_sort(
values.GetStorage().GetArray(), static_cast<std::size_t>(values.GetNumberOfValues()), c);
}
// Value sort -- static switch between quicksort & radix sort
template <typename T, typename Container, class BinaryCompare>
void parallel_sort(vtkm::cont::ArrayHandle<T, Container>& values, BinaryCompare binary_compare)
{
using namespace vtkm::cont::internal::radix;
using SortAlgorithmTag = typename sort_tag_type<T, Container, BinaryCompare>::type;
parallel_sort(values, binary_compare, SortAlgorithmTag{});
}
// Quicksort by key:
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values,
BinaryCompare binary_compare,
vtkm::cont::internal::radix::PSortTag)
{
using KeyType = vtkm::cont::ArrayHandle<T, StorageT>;
constexpr bool larger_than_64bits = sizeof(U) > sizeof(vtkm::Int64);
if (larger_than_64bits)
{
/// More efficient sort:
/// Move value indexes when sorting and reorder the value array at last
using ValueType = vtkm::cont::ArrayHandle<U, StorageU>;
using IndexType = vtkm::cont::ArrayHandle<vtkm::Id>;
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, IndexType>;
IndexType indexArray;
ValueType valuesScattered;
const vtkm::Id size = values.GetNumberOfValues();
// Generate an in-memory index array:
{
auto handle = ArrayHandleIndex(keys.GetNumberOfValues());
auto inputPortal = handle.PrepareForInput(DeviceAdapterTagOpenMP());
auto outputPortal =
indexArray.PrepareForOutput(keys.GetNumberOfValues(), DeviceAdapterTagOpenMP());
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, keys.GetNumberOfValues());
}
// Sort the keys and indicies:
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, indexArray);
parallel_sort(zipHandle,
vtkm::cont::internal::KeyCompare<T, vtkm::Id, BinaryCompare>(binary_compare),
vtkm::cont::internal::radix::PSortTag());
// Permute the values to their sorted locations:
{
auto valuesInPortal = values.PrepareForInput(DeviceAdapterTagOpenMP());
auto indexPortal = indexArray.PrepareForInput(DeviceAdapterTagOpenMP());
auto valuesOutPortal = valuesScattered.PrepareForOutput(size, DeviceAdapterTagOpenMP());
VTKM_OPENMP_DIRECTIVE(parallel for
default(none)
firstprivate(valuesInPortal, indexPortal, valuesOutPortal)
schedule(static))
for (vtkm::Id i = 0; i < size; ++i)
{
valuesOutPortal.Set(i, valuesInPortal.Get(indexPortal.Get(i)));
}
}
// Copy the values back into the input array:
{
auto inputPortal = valuesScattered.PrepareForInput(DeviceAdapterTagOpenMP());
auto outputPortal =
values.PrepareForOutput(valuesScattered.GetNumberOfValues(), DeviceAdapterTagOpenMP());
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, size);
}
}
else
{
using ValueType = vtkm::cont::ArrayHandle<U, StorageU>;
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, ValueType>;
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, values);
parallel_sort(zipHandle,
vtkm::cont::internal::KeyCompare<T, U, BinaryCompare>(binary_compare),
vtkm::cont::internal::radix::PSortTag{});
}
}
// Radix sort by key:
template <typename T, typename StorageT, typename StorageU, class BinaryCompare>
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<vtkm::Id, StorageU>& values,
BinaryCompare binary_compare,
vtkm::cont::internal::radix::RadixSortTag)
{
using namespace vtkm::cont::internal::radix;
auto c = get_std_compare(binary_compare, T{});
radix::parallel_radix_sort_key_values(keys.GetStorage().GetArray(),
values.GetStorage().GetArray(),
static_cast<std::size_t>(keys.GetNumberOfValues()),
c);
}
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values,
BinaryCompare binary_compare,
vtkm::cont::internal::radix::RadixSortTag)
{
using KeyType = vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>;
using ValueType = vtkm::cont::ArrayHandle<U, vtkm::cont::StorageTagBasic>;
using IndexType = vtkm::cont::ArrayHandle<vtkm::Id, vtkm::cont::StorageTagBasic>;
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, IndexType>;
IndexType indexArray;
ValueType valuesScattered;
const vtkm::Id size = values.GetNumberOfValues();
{
auto handle = ArrayHandleIndex(keys.GetNumberOfValues());
auto inputPortal = handle.PrepareForInput(DeviceAdapterTagOpenMP());
auto outputPortal =
indexArray.PrepareForOutput(keys.GetNumberOfValues(), DeviceAdapterTagOpenMP());
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, keys.GetNumberOfValues());
}
const vtkm::Id valuesBytes = static_cast<vtkm::Id>(sizeof(T)) * keys.GetNumberOfValues();
if (valuesBytes > static_cast<vtkm::Id>(vtkm::cont::internal::radix::MIN_BYTES_FOR_PARALLEL))
{
parallel_sort_bykey(keys, indexArray, binary_compare);
}
else
{
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, indexArray);
parallel_sort(zipHandle,
vtkm::cont::internal::KeyCompare<T, vtkm::Id, BinaryCompare>(binary_compare),
vtkm::cont::internal::radix::PSortTag());
}
// Permute the values to their sorted locations:
{
auto valuesInPortal = values.PrepareForInput(DeviceAdapterTagOpenMP());
auto indexPortal = indexArray.PrepareForInput(DeviceAdapterTagOpenMP());
auto valuesOutPortal = valuesScattered.PrepareForOutput(size, DeviceAdapterTagOpenMP());
VTKM_OPENMP_DIRECTIVE(parallel for
default(none)
firstprivate(valuesInPortal, indexPortal, valuesOutPortal)
schedule(static))
for (vtkm::Id i = 0; i < size; ++i)
{
valuesOutPortal.Set(i, valuesInPortal.Get(indexPortal.Get(i)));
}
}
{
auto inputPortal = valuesScattered.PrepareForInput(DeviceAdapterTagOpenMP());
auto outputPortal =
values.PrepareForOutput(valuesScattered.GetNumberOfValues(), DeviceAdapterTagOpenMP());
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, valuesScattered.GetNumberOfValues());
}
}
// Sort by key -- static switch between radix and quick sort:
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
vtkm::cont::ArrayHandle<U, StorageU>& values,
BinaryCompare binary_compare)
{
using namespace vtkm::cont::internal::radix;
using SortAlgorithmTag =
typename sortbykey_tag_type<T, U, StorageT, StorageU, BinaryCompare>::type;
parallel_sort_bykey(keys, values, binary_compare, SortAlgorithmTag{});
}
}
}
}
} // end namespace vtkm::cont::openmp::sort

@ -0,0 +1,50 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h
#define vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h
#include <vtkm/cont/internal/VirtualObjectTransfer.h>
#include <vtkm/cont/internal/VirtualObjectTransferShareWithControl.h>
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
namespace vtkm
{
namespace cont
{
namespace internal
{
template <typename VirtualDerivedType>
struct VirtualObjectTransfer<VirtualDerivedType, vtkm::cont::DeviceAdapterTagOpenMP>
: VirtualObjectTransferShareWithControl<VirtualDerivedType>
{
VTKM_CONT VirtualObjectTransfer(const VirtualDerivedType* virtualObject)
: VirtualObjectTransferShareWithControl<VirtualDerivedType>(virtualObject)
{
}
};
}
}
} // vtkm::cont::internal
#endif // vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h

@ -0,0 +1,33 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
## Copyright 2018 UT-Battelle, LLC.
## Copyright 2018 Los Alamos National Security.
##
## Under the terms of Contract DE-NA0003525 with NTESS,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
set(unit_tests
UnitTestOpenMPArrayHandle.cxx
UnitTestOpenMPArrayHandleFancy.cxx
UnitTestOpenMPCellLocatorTwoLevelUniformGrid.cxx
UnitTestOpenMPComputeRange.cxx
UnitTestOpenMPDataSetExplicit.cxx
UnitTestOpenMPDataSetSingleType.cxx
UnitTestOpenMPDeviceAdapter.cxx
UnitTestOpenMPImplicitFunction.cxx
UnitTestOpenMPPointLocatorUniformGrid.cxx
UnitTestOpenMPVirtualObjectHandle.cxx
)
vtkm_unit_tests(OpenMP SOURCES ${unit_tests})

@ -0,0 +1,31 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingArrayHandles.h>
int UnitTestOpenMPArrayHandle(int, char* [])
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::TestingArrayHandles<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
}

@ -0,0 +1,31 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingFancyArrayHandles.h>
int UnitTestOpenMPArrayHandleFancy(int, char* [])
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::TestingFancyArrayHandles<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
}

@ -0,0 +1,32 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingCellLocatorTwoLevelUniformGrid.h>
int UnitTestOpenMPCellLocatorTwoLevelUniformGrid(int, char* [])
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::Testing::Run(
TestingCellLocatorTwoLevelUniformGrid<vtkm::cont::DeviceAdapterTagOpenMP>);
}

@ -0,0 +1,31 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingComputeRange.h>
int UnitTestOpenMPComputeRange(int, char* [])
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::TestingComputeRange<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
}

@ -0,0 +1,31 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingDataSetExplicit.h>
int UnitTestOpenMPDataSetExplicit(int, char* [])
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::TestingDataSetExplicit<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
}

@ -0,0 +1,31 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingDataSetSingleType.h>
int UnitTestOpenMPDataSetSingleType(int, char* [])
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::TestingDataSetSingleType<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
}

@ -0,0 +1,32 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/RuntimeDeviceTracker.h>
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingDeviceAdapter.h>
int UnitTestOpenMPDeviceAdapter(int, char* [])
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::TestingDeviceAdapter<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
}

@ -0,0 +1,43 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingImplicitFunction.h>
namespace
{
void TestImplicitFunctions()
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
vtkm::cont::testing::TestingImplicitFunction testing;
testing.Run(vtkm::cont::DeviceAdapterTagOpenMP());
}
} // anonymous namespace
int UnitTestOpenMPImplicitFunction(int, char* [])
{
return vtkm::cont::testing::Testing::Run(TestImplicitFunctions);
}

@ -0,0 +1,33 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingPointLocatorUniformGrid.h>
int UnitTestOpenMPPointLocatorUniformGrid(int, char* [])
{
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
return vtkm::cont::testing::Testing::Run(
TestingPointLocatorUniformGrid<vtkm::cont::DeviceAdapterTagOpenMP>());
}

@ -0,0 +1,49 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/cont/testing/TestingVirtualObjectHandle.h>
namespace
{
void TestVirtualObjectHandle()
{
using DeviceAdapterList = vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagOpenMP>;
using DeviceAdapterList2 =
vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagSerial, vtkm::cont::DeviceAdapterTagOpenMP>;
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
vtkm::cont::testing::TestingVirtualObjectHandle<DeviceAdapterList>::Run();
tracker.Reset();
vtkm::cont::testing::TestingVirtualObjectHandle<DeviceAdapterList2>::Run();
}
} // anonymous namespace
int UnitTestOpenMPVirtualObjectHandle(int, char* [])
{
return vtkm::cont::testing::Testing::Run(TestVirtualObjectHandle);
}

@ -52,6 +52,7 @@ vtkm_declare_headers(${header_impls} TESTABLE OFF)
#-----------------------------------------------------------------------------
add_subdirectory(serial)
add_subdirectory(tbb)
add_subdirectory(openmp)
add_subdirectory(cuda)
#-----------------------------------------------------------------------------

@ -0,0 +1,22 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
## Copyright 2018 UT-Battelle, LLC.
## Copyright 2018 Los Alamos National Security.
##
## Under the terms of Contract DE-NA0003525 with NTESS,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
#-----------------------------------------------------------------------------
add_subdirectory(internal)

@ -0,0 +1,30 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
## Copyright 2018 UT-Battelle, LLC.
## Copyright 2018 Los Alamos National Security.
##
## Under the terms of Contract DE-NA0003525 with NTESS,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
set(headers
TaskTilingOpenMP.h
)
vtkm_declare_headers(${headers})
#-----------------------------------------------------------------------------
if (VTKm_ENABLE_OPENMP)
add_subdirectory(testing)
endif()

@ -0,0 +1,41 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_exec_openmp_internal_TaskTilingOpenMP_h
#define vtk_m_exec_openmp_internal_TaskTilingOpenMP_h
#include <vtkm/exec/serial/internal/TaskTiling.h>
namespace vtkm
{
namespace exec
{
namespace openmp
{
namespace internal
{
using TaskTiling1D = vtkm::exec::serial::internal::TaskTiling1D;
using TaskTiling3D = vtkm::exec::serial::internal::TaskTiling3D;
}
}
}
} // namespace vtkm::exec::tbb::internal
#endif //vtk_m_exec_tbb_internal_TaskTiling_h

@ -0,0 +1,27 @@
##=============================================================================
##
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
##
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
## Copyright 2018 UT-Battelle, LLC.
## Copyright 2018 Los Alamos National Security.
##
## Under the terms of Contract DE-NA0003525 with NTESS,
## the U.S. Government retains certain rights in this software.
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##
##=============================================================================
set(unit_tests
UnitTestTaskTilingOpenMP.cxx
)
vtkm_unit_tests(SOURCES ${unit_tests})

@ -0,0 +1,30 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
// Copyright 2018 UT-Battelle, LLC.
// Copyright 2018 Los Alamos National Security.
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
#include <vtkm/exec/internal/testing/TestingTaskTiling.h>
int UnitTestTaskTilingOpenMP(int, char* [])
{
return vtkm::cont::testing::Testing::Run(
vtkm::exec::internal::testing::TestTaskTiling<vtkm::cont::DeviceAdapterTagOpenMP>);
}

@ -29,6 +29,7 @@ set(VTKM_USE_64BIT_IDS ${VTKm_USE_64BIT_IDS})
set(VTKM_ENABLE_CUDA ${VTKm_ENABLE_CUDA})
set(VTKM_ENABLE_TBB ${VTKm_ENABLE_TBB})
set(VTKM_ENABLE_OPENMP ${VTKm_ENABLE_OPENMP})
set(VTKM_ENABLE_MPI ${VTKm_ENABLE_MPI})
if(VTKM_ENABLE_CUDA)

@ -229,6 +229,10 @@
#ifndef VTKM_ENABLE_TBB
#cmakedefine VTKM_ENABLE_TBB
#endif
//Mark if we are building with OpenMP enabled
#ifndef VTKM_ENABLE_OPENMP
#cmakedefine VTKM_ENABLE_OPENMP
#endif
//Mark if we are building with MPI enabled.
#cmakedefine VTKM_ENABLE_MPI

@ -905,6 +905,10 @@ template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<
template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagTBB>(
vtkm::cont::DeviceAdapterTagTBB);
#endif
#ifdef VTKM_ENABLE_OPENMP
template VTKM_CONT_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagOpenMP>(
vtkm::cont::DeviceAdapterTagOpenMP);
#endif
#ifdef VTKM_ENABLE_CUDA
template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagCuda>(
vtkm::cont::DeviceAdapterTagCuda);

@ -87,6 +87,13 @@ inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagTBB>(vtkm::cont::
return "tbb";
}
template <>
inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagOpenMP>(
vtkm::cont::DeviceAdapterTagOpenMP)
{
return "openmp";
}
template <>
inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagCuda>(
vtkm::cont::DeviceAdapterTagCuda)