Add initial version of an OpenMP backend.
This commit is contained in:
parent
7b5ad3e80c
commit
183bcf109a
551
CMake/FindOpenMP.cmake
Normal file
551
CMake/FindOpenMP.cmake
Normal file
@ -0,0 +1,551 @@
|
|||||||
|
##=============================================================================
|
||||||
|
##
|
||||||
|
## Copyright (c) Kitware, Inc.
|
||||||
|
## All rights reserved.
|
||||||
|
## See LICENSE.txt for details.
|
||||||
|
##
|
||||||
|
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
## PURPOSE. See the above copyright notice for more information.
|
||||||
|
##
|
||||||
|
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
## Copyright 2018 UT-Battelle, LLC.
|
||||||
|
## Copyright 2018 Los Alamos National Security.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
## the U.S. Government retains certain rights in this software.
|
||||||
|
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
## this software.
|
||||||
|
##
|
||||||
|
##=============================================================================
|
||||||
|
|
||||||
|
#.rst:
|
||||||
|
# FindOpenMP
|
||||||
|
# ----------
|
||||||
|
#
|
||||||
|
# Finds OpenMP support
|
||||||
|
#
|
||||||
|
# This module can be used to detect OpenMP support in a compiler. If
|
||||||
|
# the compiler supports OpenMP, the flags required to compile with
|
||||||
|
# OpenMP support are returned in variables for the different languages.
|
||||||
|
# The variables may be empty if the compiler does not need a special
|
||||||
|
# flag to support OpenMP.
|
||||||
|
#
|
||||||
|
# Variables
|
||||||
|
# ^^^^^^^^^
|
||||||
|
#
|
||||||
|
# The module exposes the components ``C``, ``CXX``, and ``Fortran``.
|
||||||
|
# Each of these controls the various languages to search OpenMP support for.
|
||||||
|
#
|
||||||
|
# Depending on the enabled components the following variables will be set:
|
||||||
|
#
|
||||||
|
# ``OpenMP_FOUND``
|
||||||
|
# Variable indicating that OpenMP flags for all requested languages have been found.
|
||||||
|
# If no components are specified, this is true if OpenMP settings for all enabled languages
|
||||||
|
# were detected.
|
||||||
|
# ``OpenMP_VERSION``
|
||||||
|
# Minimal version of the OpenMP standard detected among the requested languages,
|
||||||
|
# or all enabled languages if no components were specified.
|
||||||
|
#
|
||||||
|
# This module will set the following variables per language in your
|
||||||
|
# project, where ``<lang>`` is one of C, CXX, or Fortran:
|
||||||
|
#
|
||||||
|
# ``OpenMP_<lang>_FOUND``
|
||||||
|
# Variable indicating if OpenMP support for ``<lang>`` was detected.
|
||||||
|
# ``OpenMP_<lang>_FLAGS``
|
||||||
|
# OpenMP compiler flags for ``<lang>``, separated by spaces.
|
||||||
|
#
|
||||||
|
# For linking with OpenMP code written in ``<lang>``, the following
|
||||||
|
# variables are provided:
|
||||||
|
#
|
||||||
|
# ``OpenMP_<lang>_LIB_NAMES``
|
||||||
|
# :ref:`;-list <CMake Language Lists>` of libraries for OpenMP programs for ``<lang>``.
|
||||||
|
# ``OpenMP_<libname>_LIBRARY``
|
||||||
|
# Location of the individual libraries needed for OpenMP support in ``<lang>``.
|
||||||
|
# ``OpenMP_<lang>_LIBRARIES``
|
||||||
|
# A list of libraries needed to link with OpenMP code written in ``<lang>``.
|
||||||
|
#
|
||||||
|
# Additionally, the module provides :prop_tgt:`IMPORTED` targets:
|
||||||
|
#
|
||||||
|
# ``OpenMP::OpenMP_<lang>``
|
||||||
|
# Target for using OpenMP from ``<lang>``.
|
||||||
|
#
|
||||||
|
# Specifically for Fortran, the module sets the following variables:
|
||||||
|
#
|
||||||
|
# ``OpenMP_Fortran_HAVE_OMPLIB_HEADER``
|
||||||
|
# Boolean indicating if OpenMP is accessible through ``omp_lib.h``.
|
||||||
|
# ``OpenMP_Fortran_HAVE_OMPLIB_MODULE``
|
||||||
|
# Boolean indicating if OpenMP is accessible through the ``omp_lib`` Fortran module.
|
||||||
|
#
|
||||||
|
# The module will also try to provide the OpenMP version variables:
|
||||||
|
#
|
||||||
|
# ``OpenMP_<lang>_SPEC_DATE``
|
||||||
|
# Date of the OpenMP specification implemented by the ``<lang>`` compiler.
|
||||||
|
# ``OpenMP_<lang>_VERSION_MAJOR``
|
||||||
|
# Major version of OpenMP implemented by the ``<lang>`` compiler.
|
||||||
|
# ``OpenMP_<lang>_VERSION_MINOR``
|
||||||
|
# Minor version of OpenMP implemented by the ``<lang>`` compiler.
|
||||||
|
# ``OpenMP_<lang>_VERSION``
|
||||||
|
# OpenMP version implemented by the ``<lang>`` compiler.
|
||||||
|
#
|
||||||
|
# The specification date is formatted as given in the OpenMP standard:
|
||||||
|
# ``yyyymm`` where ``yyyy`` and ``mm`` represents the year and month of
|
||||||
|
# the OpenMP specification implemented by the ``<lang>`` compiler.
|
||||||
|
|
||||||
|
cmake_policy(PUSH)
|
||||||
|
cmake_policy(SET CMP0012 NEW) # if() recognizes numbers and booleans
|
||||||
|
cmake_policy(SET CMP0054 NEW) # if() quoted variables not dereferenced
|
||||||
|
cmake_policy(SET CMP0057 NEW) # if IN_LIST
|
||||||
|
|
||||||
|
function(_OPENMP_FLAG_CANDIDATES LANG)
|
||||||
|
if(NOT OpenMP_${LANG}_FLAG)
|
||||||
|
unset(OpenMP_FLAG_CANDIDATES)
|
||||||
|
|
||||||
|
set(OMP_FLAG_GNU "-fopenmp")
|
||||||
|
set(OMP_FLAG_Clang "-fopenmp=libomp" "-fopenmp=libiomp5" "-fopenmp")
|
||||||
|
set(OMP_FLAG_AppleClang "-Xclang -fopenmp")
|
||||||
|
set(OMP_FLAG_HP "+Oopenmp")
|
||||||
|
if(WIN32)
|
||||||
|
set(OMP_FLAG_Intel "-Qopenmp")
|
||||||
|
elseif(CMAKE_${LANG}_COMPILER_ID STREQUAL "Intel" AND
|
||||||
|
"${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS "15.0.0.20140528")
|
||||||
|
set(OMP_FLAG_Intel "-openmp")
|
||||||
|
else()
|
||||||
|
set(OMP_FLAG_Intel "-qopenmp")
|
||||||
|
endif()
|
||||||
|
set(OMP_FLAG_MIPSpro "-mp")
|
||||||
|
set(OMP_FLAG_MSVC "-openmp")
|
||||||
|
set(OMP_FLAG_PathScale "-openmp")
|
||||||
|
set(OMP_FLAG_NAG "-openmp")
|
||||||
|
set(OMP_FLAG_Absoft "-openmp")
|
||||||
|
set(OMP_FLAG_PGI "-mp")
|
||||||
|
set(OMP_FLAG_Flang "-fopenmp")
|
||||||
|
set(OMP_FLAG_SunPro "-xopenmp")
|
||||||
|
set(OMP_FLAG_XL "-qsmp=omp")
|
||||||
|
# Cray compiler activate OpenMP with -h omp, which is enabled by default.
|
||||||
|
set(OMP_FLAG_Cray " " "-h omp")
|
||||||
|
|
||||||
|
# If we know the correct flags, use those
|
||||||
|
if(DEFINED OMP_FLAG_${CMAKE_${LANG}_COMPILER_ID})
|
||||||
|
set(OpenMP_FLAG_CANDIDATES "${OMP_FLAG_${CMAKE_${LANG}_COMPILER_ID}}")
|
||||||
|
# Fall back to reasonable default tries otherwise
|
||||||
|
else()
|
||||||
|
set(OpenMP_FLAG_CANDIDATES "-openmp" "-fopenmp" "-mp" " ")
|
||||||
|
endif()
|
||||||
|
set(OpenMP_${LANG}_FLAG_CANDIDATES "${OpenMP_FLAG_CANDIDATES}" PARENT_SCOPE)
|
||||||
|
else()
|
||||||
|
set(OpenMP_${LANG}_FLAG_CANDIDATES "${OpenMP_${LANG}_FLAG}" PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
# sample openmp source code to test
|
||||||
|
set(OpenMP_C_CXX_TEST_SOURCE
|
||||||
|
"
|
||||||
|
#include <omp.h>
|
||||||
|
int main() {
|
||||||
|
#ifdef _OPENMP
|
||||||
|
int n = omp_get_max_threads();
|
||||||
|
return 0;
|
||||||
|
#else
|
||||||
|
breaks_on_purpose
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
")
|
||||||
|
|
||||||
|
# in Fortran, an implementation may provide an omp_lib.h header
|
||||||
|
# or omp_lib module, or both (OpenMP standard, section 3.1)
|
||||||
|
# Furthmore !$ is the Fortran equivalent of #ifdef _OPENMP (OpenMP standard, 2.2.2)
|
||||||
|
# Without the conditional compilation, some compilers (e.g. PGI) might compile OpenMP code
|
||||||
|
# while not actually enabling OpenMP, building code sequentially
|
||||||
|
set(OpenMP_Fortran_TEST_SOURCE
|
||||||
|
"
|
||||||
|
program test
|
||||||
|
@OpenMP_Fortran_INCLUDE_LINE@
|
||||||
|
!$ integer :: n
|
||||||
|
n = omp_get_num_threads()
|
||||||
|
end program test
|
||||||
|
"
|
||||||
|
)
|
||||||
|
|
||||||
|
function(_OPENMP_WRITE_SOURCE_FILE LANG SRC_FILE_CONTENT_VAR SRC_FILE_NAME SRC_FILE_FULLPATH)
|
||||||
|
set(WORK_DIR ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindOpenMP)
|
||||||
|
if("${LANG}" STREQUAL "C")
|
||||||
|
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.c")
|
||||||
|
file(WRITE "${SRC_FILE}" "${OpenMP_C_CXX_${SRC_FILE_CONTENT_VAR}}")
|
||||||
|
elseif("${LANG}" STREQUAL "CXX")
|
||||||
|
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.cpp")
|
||||||
|
file(WRITE "${SRC_FILE}" "${OpenMP_C_CXX_${SRC_FILE_CONTENT_VAR}}")
|
||||||
|
elseif("${LANG}" STREQUAL "Fortran")
|
||||||
|
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.f90")
|
||||||
|
file(WRITE "${SRC_FILE}_in" "${OpenMP_Fortran_${SRC_FILE_CONTENT_VAR}}")
|
||||||
|
configure_file("${SRC_FILE}_in" "${SRC_FILE}" @ONLY)
|
||||||
|
endif()
|
||||||
|
set(${SRC_FILE_FULLPATH} "${SRC_FILE}" PARENT_SCOPE)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
include(CMakeParseImplicitLinkInfo)
|
||||||
|
|
||||||
|
function(_OPENMP_GET_FLAGS LANG FLAG_MODE OPENMP_FLAG_VAR OPENMP_LIB_NAMES_VAR)
|
||||||
|
_OPENMP_FLAG_CANDIDATES("${LANG}")
|
||||||
|
_OPENMP_WRITE_SOURCE_FILE("${LANG}" "TEST_SOURCE" OpenMPTryFlag _OPENMP_TEST_SRC)
|
||||||
|
|
||||||
|
unset(OpenMP_VERBOSE_COMPILE_OPTIONS)
|
||||||
|
separate_arguments(OpenMP_VERBOSE_OPTIONS NATIVE_COMMAND "${CMAKE_${LANG}_VERBOSE_FLAG}")
|
||||||
|
foreach(_VERBOSE_OPTION IN LISTS OpenMP_VERBOSE_OPTIONS)
|
||||||
|
if(NOT _VERBOSE_OPTION MATCHES "^-Wl,")
|
||||||
|
list(APPEND OpenMP_VERBOSE_COMPILE_OPTIONS ${_VERBOSE_OPTION})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
foreach(OPENMP_FLAG IN LISTS OpenMP_${LANG}_FLAG_CANDIDATES)
|
||||||
|
set(OPENMP_FLAGS_TEST "${OPENMP_FLAG}")
|
||||||
|
if(OpenMP_VERBOSE_COMPILE_OPTIONS)
|
||||||
|
string(APPEND OPENMP_FLAGS_TEST " ${OpenMP_VERBOSE_COMPILE_OPTIONS}")
|
||||||
|
endif()
|
||||||
|
string(REGEX REPLACE "[-/=+]" "" OPENMP_PLAIN_FLAG "${OPENMP_FLAG}")
|
||||||
|
try_compile( OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG} ${CMAKE_BINARY_DIR} ${_OPENMP_TEST_SRC}
|
||||||
|
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OPENMP_FLAGS_TEST}"
|
||||||
|
LINK_LIBRARIES ${CMAKE_${LANG}_VERBOSE_FLAG}
|
||||||
|
OUTPUT_VARIABLE OpenMP_TRY_COMPILE_OUTPUT
|
||||||
|
)
|
||||||
|
|
||||||
|
if(OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG})
|
||||||
|
set("${OPENMP_FLAG_VAR}" "${OPENMP_FLAG}" PARENT_SCOPE)
|
||||||
|
|
||||||
|
if(CMAKE_${LANG}_VERBOSE_FLAG)
|
||||||
|
unset(OpenMP_${LANG}_IMPLICIT_LIBRARIES)
|
||||||
|
unset(OpenMP_${LANG}_IMPLICIT_LINK_DIRS)
|
||||||
|
unset(OpenMP_${LANG}_IMPLICIT_FWK_DIRS)
|
||||||
|
unset(OpenMP_${LANG}_LOG_VAR)
|
||||||
|
|
||||||
|
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
|
||||||
|
"Detecting ${LANG} OpenMP compiler ABI info compiled with the following output:\n${OpenMP_TRY_COMPILE_OUTPUT}\n\n")
|
||||||
|
|
||||||
|
cmake_parse_implicit_link_info("${OpenMP_TRY_COMPILE_OUTPUT}"
|
||||||
|
OpenMP_${LANG}_IMPLICIT_LIBRARIES
|
||||||
|
OpenMP_${LANG}_IMPLICIT_LINK_DIRS
|
||||||
|
OpenMP_${LANG}_IMPLICIT_FWK_DIRS
|
||||||
|
OpenMP_${LANG}_LOG_VAR
|
||||||
|
"${CMAKE_${LANG}_IMPLICIT_OBJECT_REGEX}"
|
||||||
|
)
|
||||||
|
|
||||||
|
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
|
||||||
|
"Parsed ${LANG} OpenMP implicit link information from above output:\n${OpenMP_${LANG}_LOG_VAR}\n\n")
|
||||||
|
|
||||||
|
unset(_OPENMP_LIB_NAMES)
|
||||||
|
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_IMPLICIT_LIBRARIES)
|
||||||
|
get_filename_component(_OPENMP_IMPLICIT_LIB_DIR "${_OPENMP_IMPLICIT_LIB}" DIRECTORY)
|
||||||
|
get_filename_component(_OPENMP_IMPLICIT_LIB_NAME "${_OPENMP_IMPLICIT_LIB}" NAME)
|
||||||
|
get_filename_component(_OPENMP_IMPLICIT_LIB_PLAIN "${_OPENMP_IMPLICIT_LIB}" NAME_WE)
|
||||||
|
string(REGEX REPLACE "([][+.*?()^$])" "\\\\\\1" _OPENMP_IMPLICIT_LIB_PLAIN_ESC "${_OPENMP_IMPLICIT_LIB_PLAIN}")
|
||||||
|
string(REGEX REPLACE "([][+.*?()^$])" "\\\\\\1" _OPENMP_IMPLICIT_LIB_PATH_ESC "${_OPENMP_IMPLICIT_LIB}")
|
||||||
|
if(NOT ( "${_OPENMP_IMPLICIT_LIB}" IN_LIST CMAKE_${LANG}_IMPLICIT_LINK_LIBRARIES
|
||||||
|
OR "${CMAKE_${LANG}_STANDARD_LIBRARIES}" MATCHES "(^| )(-Wl,)?(-l)?(${_OPENMP_IMPLICIT_LIB_PLAIN_ESC}|${_OPENMP_IMPLICIT_LIB_PATH_ESC})( |$)"
|
||||||
|
OR "${CMAKE_${LANG}_LINK_EXECUTABLE}" MATCHES "(^| )(-Wl,)?(-l)?(${_OPENMP_IMPLICIT_LIB_PLAIN_ESC}|${_OPENMP_IMPLICIT_LIB_PATH_ESC})( |$)" ) )
|
||||||
|
if(_OPENMP_IMPLICIT_LIB_DIR)
|
||||||
|
set(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY "${_OPENMP_IMPLICIT_LIB}" CACHE FILEPATH
|
||||||
|
"Path to the ${_OPENMP_IMPLICIT_LIB_PLAIN} library for OpenMP")
|
||||||
|
else()
|
||||||
|
find_library(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY
|
||||||
|
NAMES "${_OPENMP_IMPLICIT_LIB_NAME}"
|
||||||
|
DOC "Path to the ${_OPENMP_IMPLICIT_LIB_PLAIN} library for OpenMP"
|
||||||
|
HINTS ${OpenMP_${LANG}_IMPLICIT_LINK_DIRS}
|
||||||
|
CMAKE_FIND_ROOT_PATH_BOTH
|
||||||
|
NO_DEFAULT_PATH
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
mark_as_advanced(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY)
|
||||||
|
list(APPEND _OPENMP_LIB_NAMES ${_OPENMP_IMPLICIT_LIB_PLAIN})
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
set("${OPENMP_LIB_NAMES_VAR}" "${_OPENMP_LIB_NAMES}" PARENT_SCOPE)
|
||||||
|
else()
|
||||||
|
# We do not know how to extract implicit OpenMP libraries for this compiler.
|
||||||
|
# Assume that it handles them automatically, e.g. the Intel Compiler on
|
||||||
|
# Windows should put the dependency in its object files.
|
||||||
|
set("${OPENMP_LIB_NAMES_VAR}" "" PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
break()
|
||||||
|
elseif(CMAKE_${LANG}_COMPILER_ID STREQUAL "AppleClang"
|
||||||
|
AND CMAKE_${LANG}_COMPILER_VERSION VERSION_GREATER_EQUAL "7.0")
|
||||||
|
|
||||||
|
# Check for separate OpenMP library on AppleClang 7+
|
||||||
|
find_library(OpenMP_libomp_LIBRARY
|
||||||
|
NAMES omp gomp iomp5
|
||||||
|
HINTS ${CMAKE_${LANG}_IMPLICIT_LINK_DIRECTORIES}
|
||||||
|
)
|
||||||
|
mark_as_advanced(OpenMP_libomp_LIBRARY)
|
||||||
|
|
||||||
|
if(OpenMP_libomp_LIBRARY)
|
||||||
|
try_compile( OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG} ${CMAKE_BINARY_DIR} ${_OPENMP_TEST_SRC}
|
||||||
|
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OPENMP_FLAGS_TEST}"
|
||||||
|
LINK_LIBRARIES ${CMAKE_${LANG}_VERBOSE_FLAG} ${OpenMP_libomp_LIBRARY}
|
||||||
|
OUTPUT_VARIABLE OpenMP_TRY_COMPILE_OUTPUT
|
||||||
|
)
|
||||||
|
if(OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG})
|
||||||
|
set("${OPENMP_FLAG_VAR}" "${OPENMP_FLAG}" PARENT_SCOPE)
|
||||||
|
set("${OPENMP_LIB_NAMES_VAR}" "libomp" PARENT_SCOPE)
|
||||||
|
break()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
set("${OPENMP_LIB_NAMES_VAR}" "NOTFOUND" PARENT_SCOPE)
|
||||||
|
set("${OPENMP_FLAG_VAR}" "NOTFOUND" PARENT_SCOPE)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
unset(OpenMP_VERBOSE_COMPILE_OPTIONS)
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
set(OpenMP_C_CXX_CHECK_VERSION_SOURCE
|
||||||
|
"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <omp.h>
|
||||||
|
const char ompver_str[] = { 'I', 'N', 'F', 'O', ':', 'O', 'p', 'e', 'n', 'M',
|
||||||
|
'P', '-', 'd', 'a', 't', 'e', '[',
|
||||||
|
('0' + ((_OPENMP/100000)%10)),
|
||||||
|
('0' + ((_OPENMP/10000)%10)),
|
||||||
|
('0' + ((_OPENMP/1000)%10)),
|
||||||
|
('0' + ((_OPENMP/100)%10)),
|
||||||
|
('0' + ((_OPENMP/10)%10)),
|
||||||
|
('0' + ((_OPENMP/1)%10)),
|
||||||
|
']', '\\0' };
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
puts(ompver_str);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
")
|
||||||
|
|
||||||
|
set(OpenMP_Fortran_CHECK_VERSION_SOURCE
|
||||||
|
"
|
||||||
|
program omp_ver
|
||||||
|
@OpenMP_Fortran_INCLUDE_LINE@
|
||||||
|
integer, parameter :: zero = ichar('0')
|
||||||
|
integer, parameter :: ompv = openmp_version
|
||||||
|
character, dimension(24), parameter :: ompver_str =&
|
||||||
|
(/ 'I', 'N', 'F', 'O', ':', 'O', 'p', 'e', 'n', 'M', 'P', '-',&
|
||||||
|
'd', 'a', 't', 'e', '[',&
|
||||||
|
char(zero + mod(ompv/100000, 10)),&
|
||||||
|
char(zero + mod(ompv/10000, 10)),&
|
||||||
|
char(zero + mod(ompv/1000, 10)),&
|
||||||
|
char(zero + mod(ompv/100, 10)),&
|
||||||
|
char(zero + mod(ompv/10, 10)),&
|
||||||
|
char(zero + mod(ompv/1, 10)), ']' /)
|
||||||
|
print *, ompver_str
|
||||||
|
end program omp_ver
|
||||||
|
")
|
||||||
|
|
||||||
|
function(_OPENMP_GET_SPEC_DATE LANG SPEC_DATE)
|
||||||
|
_OPENMP_WRITE_SOURCE_FILE("${LANG}" "CHECK_VERSION_SOURCE" OpenMPCheckVersion _OPENMP_TEST_SRC)
|
||||||
|
|
||||||
|
set(BIN_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindOpenMP/ompver_${LANG}.bin")
|
||||||
|
string(REGEX REPLACE "[-/=+]" "" OPENMP_PLAIN_FLAG "${OPENMP_FLAG}")
|
||||||
|
try_compile(OpenMP_SPECTEST_${LANG}_${OPENMP_PLAIN_FLAG} "${CMAKE_BINARY_DIR}" "${_OPENMP_TEST_SRC}"
|
||||||
|
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OpenMP_${LANG}_FLAGS}"
|
||||||
|
COPY_FILE ${BIN_FILE})
|
||||||
|
|
||||||
|
if(${OpenMP_SPECTEST_${LANG}_${OPENMP_PLAIN_FLAG}})
|
||||||
|
file(STRINGS ${BIN_FILE} specstr LIMIT_COUNT 1 REGEX "INFO:OpenMP-date")
|
||||||
|
set(regex_spec_date ".*INFO:OpenMP-date\\[0*([^]]*)\\].*")
|
||||||
|
if("${specstr}" MATCHES "${regex_spec_date}")
|
||||||
|
set(${SPEC_DATE} "${CMAKE_MATCH_1}" PARENT_SCOPE)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endfunction()
|
||||||
|
|
||||||
|
macro(_OPENMP_SET_VERSION_BY_SPEC_DATE LANG)
|
||||||
|
set(OpenMP_SPEC_DATE_MAP
|
||||||
|
# Preview versions
|
||||||
|
"201611=5.0" # OpenMP 5.0 preview 1
|
||||||
|
# Combined versions, 2.5 onwards
|
||||||
|
"201511=4.5"
|
||||||
|
"201307=4.0"
|
||||||
|
"201107=3.1"
|
||||||
|
"200805=3.0"
|
||||||
|
"200505=2.5"
|
||||||
|
# C/C++ version 2.0
|
||||||
|
"200203=2.0"
|
||||||
|
# Fortran version 2.0
|
||||||
|
"200011=2.0"
|
||||||
|
# Fortran version 1.1
|
||||||
|
"199911=1.1"
|
||||||
|
# C/C++ version 1.0 (there's no 1.1 for C/C++)
|
||||||
|
"199810=1.0"
|
||||||
|
# Fortran version 1.0
|
||||||
|
"199710=1.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
if(OpenMP_${LANG}_SPEC_DATE)
|
||||||
|
string(REGEX MATCHALL "${OpenMP_${LANG}_SPEC_DATE}=([0-9]+)\\.([0-9]+)" _version_match "${OpenMP_SPEC_DATE_MAP}")
|
||||||
|
else()
|
||||||
|
set(_version_match "")
|
||||||
|
endif()
|
||||||
|
if(NOT _version_match STREQUAL "")
|
||||||
|
set(OpenMP_${LANG}_VERSION_MAJOR ${CMAKE_MATCH_1})
|
||||||
|
set(OpenMP_${LANG}_VERSION_MINOR ${CMAKE_MATCH_2})
|
||||||
|
set(OpenMP_${LANG}_VERSION "${OpenMP_${LANG}_VERSION_MAJOR}.${OpenMP_${LANG}_VERSION_MINOR}")
|
||||||
|
else()
|
||||||
|
unset(OpenMP_${LANG}_VERSION_MAJOR)
|
||||||
|
unset(OpenMP_${LANG}_VERSION_MINOR)
|
||||||
|
unset(OpenMP_${LANG}_VERSION)
|
||||||
|
endif()
|
||||||
|
unset(_version_match)
|
||||||
|
unset(OpenMP_SPEC_DATE_MAP)
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
foreach(LANG IN ITEMS C CXX)
|
||||||
|
if(CMAKE_${LANG}_COMPILER_LOADED)
|
||||||
|
if(NOT DEFINED OpenMP_${LANG}_FLAGS OR "${OpenMP_${LANG}_FLAGS}" STREQUAL "NOTFOUND"
|
||||||
|
OR NOT DEFINED OpenMP_${LANG}_LIB_NAMES OR "${OpenMP_${LANG}_LIB_NAMES}" STREQUAL "NOTFOUND")
|
||||||
|
_OPENMP_GET_FLAGS("${LANG}" "${LANG}" OpenMP_${LANG}_FLAGS_WORK OpenMP_${LANG}_LIB_NAMES_WORK)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(OpenMP_${LANG}_FLAGS "${OpenMP_${LANG}_FLAGS_WORK}"
|
||||||
|
CACHE STRING "${LANG} compiler flags for OpenMP parallelization")
|
||||||
|
set(OpenMP_${LANG}_LIB_NAMES "${OpenMP_${LANG}_LIB_NAMES_WORK}"
|
||||||
|
CACHE STRING "${LANG} compiler libraries for OpenMP parallelization")
|
||||||
|
mark_as_advanced(OpenMP_${LANG}_FLAGS OpenMP_${LANG}_LIB_NAMES)
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
if(CMAKE_Fortran_COMPILER_LOADED)
|
||||||
|
if(NOT DEFINED OpenMP_Fortran_FLAGS OR "${OpenMP_Fortran_FLAGS}" STREQUAL "NOTFOUND"
|
||||||
|
OR NOT DEFINED OpenMP_Fortran_LIB_NAMES OR "${OpenMP_Fortran_LIB_NAMES}" STREQUAL "NOTFOUND"
|
||||||
|
OR NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_MODULE)
|
||||||
|
set(OpenMP_Fortran_INCLUDE_LINE "use omp_lib\n implicit none")
|
||||||
|
_OPENMP_GET_FLAGS("Fortran" "FortranHeader" OpenMP_Fortran_FLAGS_WORK OpenMP_Fortran_LIB_NAMES_WORK)
|
||||||
|
if(OpenMP_Fortran_FLAGS_WORK)
|
||||||
|
set(OpenMP_Fortran_HAVE_OMPLIB_MODULE TRUE CACHE BOOL INTERNAL "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(OpenMP_Fortran_FLAGS "${OpenMP_Fortran_FLAGS_WORK}"
|
||||||
|
CACHE STRING "Fortran compiler flags for OpenMP parallelization")
|
||||||
|
set(OpenMP_Fortran_LIB_NAMES "${OpenMP_Fortran_LIB_NAMES_WORK}"
|
||||||
|
CACHE STRING "Fortran compiler libraries for OpenMP parallelization")
|
||||||
|
mark_as_advanced(OpenMP_Fortran_FLAGS OpenMP_Fortran_LIB_NAMES)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT DEFINED OpenMP_Fortran_FLAGS OR "${OpenMP_Fortran_FLAGS}" STREQUAL "NOTFOUND"
|
||||||
|
OR NOT DEFINED OpenMP_Fortran_LIB_NAMES OR "${OpenMP_Fortran_LIB_NAMES}" STREQUAL "NOTFOUND"
|
||||||
|
OR NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_HEADER)
|
||||||
|
set(OpenMP_Fortran_INCLUDE_LINE "implicit none\n include 'omp_lib.h'")
|
||||||
|
_OPENMP_GET_FLAGS("Fortran" "FortranModule" OpenMP_Fortran_FLAGS_WORK OpenMP_Fortran_LIB_NAMES_WORK)
|
||||||
|
if(OpenMP_Fortran_FLAGS_WORK)
|
||||||
|
set(OpenMP_Fortran_HAVE_OMPLIB_HEADER TRUE CACHE BOOL INTERNAL "")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(OpenMP_Fortran_FLAGS "${OpenMP_Fortran_FLAGS_WORK}"
|
||||||
|
CACHE STRING "Fortran compiler flags for OpenMP parallelization")
|
||||||
|
|
||||||
|
set(OpenMP_Fortran_LIB_NAMES "${OpenMP_Fortran_LIB_NAMES}"
|
||||||
|
CACHE STRING "Fortran compiler libraries for OpenMP parallelization")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(OpenMP_Fortran_HAVE_OMPLIB_MODULE)
|
||||||
|
set(OpenMP_Fortran_INCLUDE_LINE "use omp_lib\n implicit none")
|
||||||
|
else()
|
||||||
|
set(OpenMP_Fortran_INCLUDE_LINE "implicit none\n include 'omp_lib.h'")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT OpenMP_FIND_COMPONENTS)
|
||||||
|
set(OpenMP_FINDLIST C CXX Fortran)
|
||||||
|
else()
|
||||||
|
set(OpenMP_FINDLIST ${OpenMP_FIND_COMPONENTS})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
unset(_OpenMP_MIN_VERSION)
|
||||||
|
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
|
||||||
|
foreach(LANG IN LISTS OpenMP_FINDLIST)
|
||||||
|
if(CMAKE_${LANG}_COMPILER_LOADED)
|
||||||
|
if (NOT OpenMP_${LANG}_SPEC_DATE AND OpenMP_${LANG}_FLAGS)
|
||||||
|
_OPENMP_GET_SPEC_DATE("${LANG}" OpenMP_${LANG}_SPEC_DATE_INTERNAL)
|
||||||
|
set(OpenMP_${LANG}_SPEC_DATE "${OpenMP_${LANG}_SPEC_DATE_INTERNAL}" CACHE
|
||||||
|
INTERNAL "${LANG} compiler's OpenMP specification date")
|
||||||
|
_OPENMP_SET_VERSION_BY_SPEC_DATE("${LANG}")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
set(OpenMP_${LANG}_FIND_QUIETLY ${OpenMP_FIND_QUIETLY})
|
||||||
|
set(OpenMP_${LANG}_FIND_REQUIRED ${OpenMP_FIND_REQUIRED})
|
||||||
|
set(OpenMP_${LANG}_FIND_VERSION ${OpenMP_FIND_VERSION})
|
||||||
|
set(OpenMP_${LANG}_FIND_VERSION_EXACT ${OpenMP_FIND_VERSION_EXACT})
|
||||||
|
|
||||||
|
set(_OPENMP_${LANG}_REQUIRED_VARS OpenMP_${LANG}_FLAGS)
|
||||||
|
if("${OpenMP_${LANG}_LIB_NAMES}" STREQUAL "NOTFOUND")
|
||||||
|
set(_OPENMP_${LANG}_REQUIRED_LIB_VARS OpenMP_${LANG}_LIB_NAMES)
|
||||||
|
else()
|
||||||
|
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_LIB_NAMES)
|
||||||
|
list(APPEND _OPENMP_${LANG}_REQUIRED_LIB_VARS OpenMP_${_OPENMP_IMPLICIT_LIB}_LIBRARY)
|
||||||
|
endforeach()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
find_package_handle_standard_args(OpenMP_${LANG}
|
||||||
|
REQUIRED_VARS OpenMP_${LANG}_FLAGS ${_OPENMP_${LANG}_REQUIRED_LIB_VARS}
|
||||||
|
VERSION_VAR OpenMP_${LANG}_VERSION
|
||||||
|
)
|
||||||
|
|
||||||
|
if(OpenMP_${LANG}_FOUND)
|
||||||
|
if(DEFINED OpenMP_${LANG}_VERSION)
|
||||||
|
if(NOT _OpenMP_MIN_VERSION OR _OpenMP_MIN_VERSION VERSION_GREATER OpenMP_${LANG}_VERSION)
|
||||||
|
set(_OpenMP_MIN_VERSION OpenMP_${LANG}_VERSION)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
set(OpenMP_${LANG}_LIBRARIES "")
|
||||||
|
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_LIB_NAMES)
|
||||||
|
list(APPEND OpenMP_${LANG}_LIBRARIES "${OpenMP_${_OPENMP_IMPLICIT_LIB}_LIBRARY}")
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
if(NOT TARGET OpenMP::OpenMP_${LANG})
|
||||||
|
add_library(OpenMP::OpenMP_${LANG} INTERFACE IMPORTED)
|
||||||
|
endif()
|
||||||
|
if(OpenMP_${LANG}_FLAGS)
|
||||||
|
separate_arguments(_OpenMP_${LANG}_OPTIONS NATIVE_COMMAND "${OpenMP_${LANG}_FLAGS}")
|
||||||
|
set_property(TARGET OpenMP::OpenMP_${LANG} PROPERTY
|
||||||
|
INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:${LANG}>:${_OpenMP_${LANG}_OPTIONS}>")
|
||||||
|
unset(_OpenMP_${LANG}_OPTIONS)
|
||||||
|
endif()
|
||||||
|
if(OpenMP_${LANG}_LIBRARIES)
|
||||||
|
set_property(TARGET OpenMP::OpenMP_${LANG} PROPERTY
|
||||||
|
INTERFACE_LINK_LIBRARIES "${OpenMP_${LANG}_LIBRARIES}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
unset(_OpenMP_REQ_VARS)
|
||||||
|
foreach(LANG IN ITEMS C CXX Fortran)
|
||||||
|
if((NOT OpenMP_FIND_COMPONENTS AND CMAKE_${LANG}_COMPILER_LOADED) OR LANG IN_LIST OpenMP_FIND_COMPONENTS)
|
||||||
|
list(APPEND _OpenMP_REQ_VARS "OpenMP_${LANG}_FOUND")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
find_package_handle_standard_args(OpenMP
|
||||||
|
REQUIRED_VARS ${_OpenMP_REQ_VARS}
|
||||||
|
VERSION_VAR ${_OpenMP_MIN_VERSION}
|
||||||
|
HANDLE_COMPONENTS)
|
||||||
|
|
||||||
|
set(OPENMP_FOUND ${OpenMP_FOUND})
|
||||||
|
|
||||||
|
if(CMAKE_Fortran_COMPILER_LOADED AND OpenMP_Fortran_FOUND)
|
||||||
|
if(NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_MODULE)
|
||||||
|
set(OpenMP_Fortran_HAVE_OMPLIB_MODULE FALSE CACHE BOOL INTERNAL "")
|
||||||
|
endif()
|
||||||
|
if(NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_HEADER)
|
||||||
|
set(OpenMP_Fortran_HAVE_OMPLIB_HEADER FALSE CACHE BOOL INTERNAL "")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(NOT ( CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED ))
|
||||||
|
message(SEND_ERROR "FindOpenMP requires the C, CXX or Fortran languages to be enabled")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
unset(OpenMP_C_CXX_TEST_SOURCE)
|
||||||
|
unset(OpenMP_Fortran_TEST_SOURCE)
|
||||||
|
unset(OpenMP_C_CXX_CHECK_VERSION_SOURCE)
|
||||||
|
unset(OpenMP_Fortran_CHECK_VERSION_SOURCE)
|
||||||
|
unset(OpenMP_Fortran_INCLUDE_LINE)
|
||||||
|
|
||||||
|
cmake_policy(POP)
|
@ -27,6 +27,9 @@
|
|||||||
# vtkm::tbb Target that contains tbb related link information
|
# vtkm::tbb Target that contains tbb related link information
|
||||||
# implicitly linked to by `vtkm_cont` if tbb is enabled
|
# implicitly linked to by `vtkm_cont` if tbb is enabled
|
||||||
#
|
#
|
||||||
|
# vtkm::openmp Target that contains openmp related link information
|
||||||
|
# implicitly linked to by `vtkm_cont` if openmp is enabled
|
||||||
|
#
|
||||||
# vtkm::cuda Target that contains cuda related link information
|
# vtkm::cuda Target that contains cuda related link information
|
||||||
# implicitly linked to by `vtkm_cont` if cuda is enabled
|
# implicitly linked to by `vtkm_cont` if cuda is enabled
|
||||||
#
|
#
|
||||||
@ -41,6 +44,7 @@
|
|||||||
# VTKm_BUILD_SHARED_LIBS Will be enabled if VTK-m was built shared/dynamic
|
# VTKm_BUILD_SHARED_LIBS Will be enabled if VTK-m was built shared/dynamic
|
||||||
# VTKm_ENABLE_CUDA Will be enabled if VTK-m was built with CUDA support
|
# VTKm_ENABLE_CUDA Will be enabled if VTK-m was built with CUDA support
|
||||||
# VTKm_ENABLE_TBB Will be enabled if VTK-m was built with TBB support
|
# VTKm_ENABLE_TBB Will be enabled if VTK-m was built with TBB support
|
||||||
|
# VTKm_ENABLE_OPENMP Will be enabled if VTK-m was built with OpenMP support
|
||||||
# VTKm_ENABLE_MPI Will be enabled if VTK-m was built with MPI support
|
# VTKm_ENABLE_MPI Will be enabled if VTK-m was built with MPI support
|
||||||
# VTKm_ENABLE_RENDERING Will be enabled if VTK-m was built with rendering support
|
# VTKm_ENABLE_RENDERING Will be enabled if VTK-m was built with rendering support
|
||||||
# VTKm_ENABLE_GL_CONTEXT Will be enabled if VTK-m rendering was built with a GL context
|
# VTKm_ENABLE_GL_CONTEXT Will be enabled if VTK-m rendering was built with a GL context
|
||||||
@ -63,6 +67,7 @@ set(VTKm_VERSION "@VTKm_VERSION@")
|
|||||||
set(VTKm_BUILD_SHARED_LIBS "@VTKm_BUILD_SHARED_LIBS@")
|
set(VTKm_BUILD_SHARED_LIBS "@VTKm_BUILD_SHARED_LIBS@")
|
||||||
set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@")
|
set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@")
|
||||||
set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
|
set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
|
||||||
|
set(VTKm_ENABLE_OPENMP "@VTKm_ENABLE_OPENMP@")
|
||||||
set(VTKm_ENABLE_RENDERING "@VTKm_ENABLE_RENDERING@")
|
set(VTKm_ENABLE_RENDERING "@VTKm_ENABLE_RENDERING@")
|
||||||
set(VTKm_ENABLE_GL_CONTEXT "@VTKm_ENABLE_GL_CONTEXT@")
|
set(VTKm_ENABLE_GL_CONTEXT "@VTKm_ENABLE_GL_CONTEXT@")
|
||||||
set(VTKm_ENABLE_OSMESA_CONTEXT "@VTKm_ENABLE_OSMESA_CONTEXT@")
|
set(VTKm_ENABLE_OSMESA_CONTEXT "@VTKm_ENABLE_OSMESA_CONTEXT@")
|
||||||
|
@ -47,6 +47,19 @@ if(VTKm_ENABLE_TBB AND NOT TARGET vtkm::tbb)
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(VTKm_ENABLE_OPENMP AND NOT TARGET vtkm::openmp)
|
||||||
|
find_package(OpenMP 4.5 REQUIRED COMPONENTS CXX QUIET)
|
||||||
|
|
||||||
|
add_library(vtkm::openmp INTERFACE IMPORTED GLOBAL)
|
||||||
|
if(OpenMP_CXX_FLAGS)
|
||||||
|
set_target_properties(vtkm::openmp PROPERTIES
|
||||||
|
INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CXX>:${OpenMP_CXX_FLAGS}>")
|
||||||
|
endif()
|
||||||
|
if(OpenMP_CXX_LIBRARIES)
|
||||||
|
set_target_properties(vtkm::openmp PROPERTIES
|
||||||
|
INTERFACE_LINK_LIBRARIES "${OpenMP_CXX_LIBRARIES}")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
if(VTKm_ENABLE_CUDA AND NOT TARGET vtkm::cuda)
|
if(VTKm_ENABLE_CUDA AND NOT TARGET vtkm::cuda)
|
||||||
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
|
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
|
||||||
|
@ -86,6 +86,7 @@ endmacro ()
|
|||||||
# Configurable Options
|
# Configurable Options
|
||||||
vtkm_option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
|
vtkm_option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
|
||||||
vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
|
vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
|
||||||
|
vtkm_option(VTKm_ENABLE_OPENMP "Enable OpenMP support" OFF)
|
||||||
vtkm_option(VTKm_ENABLE_RENDERING "Enable rendering library" ON)
|
vtkm_option(VTKm_ENABLE_RENDERING "Enable rendering library" ON)
|
||||||
vtkm_option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON)
|
vtkm_option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON)
|
||||||
vtkm_option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
|
vtkm_option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
|
||||||
@ -266,6 +267,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
|
|||||||
FILES
|
FILES
|
||||||
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
|
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
|
||||||
${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
|
${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
|
||||||
|
${VTKm_SOURCE_DIR}/CMake/FindOpenMP.cmake
|
||||||
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
|
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -73,6 +73,8 @@ Optional dependencies are:
|
|||||||
+ [Cuda Toolkit 7+](https://developer.nvidia.com/cuda-toolkit)
|
+ [Cuda Toolkit 7+](https://developer.nvidia.com/cuda-toolkit)
|
||||||
+ TBB Device Adapter
|
+ TBB Device Adapter
|
||||||
+ [TBB](https://www.threadingbuildingblocks.org/)
|
+ [TBB](https://www.threadingbuildingblocks.org/)
|
||||||
|
+ OpenMP Device Adapter
|
||||||
|
+ Requires a compiler that supports OpenMP >= 4.5.
|
||||||
+ OpenGL Rendering
|
+ OpenGL Rendering
|
||||||
+ The rendering module contains multiple rendering implementations
|
+ The rendering module contains multiple rendering implementations
|
||||||
including standalone rendering code. The rendering module also
|
including standalone rendering code. The rendering module also
|
||||||
|
@ -46,7 +46,9 @@
|
|||||||
|
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
||||||
#include <tbb/task_scheduler_init.h>
|
#include <tbb/task_scheduler_init.h>
|
||||||
#endif // TBB
|
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
// This benchmark has a number of commandline options to customize its behavior.
|
// This benchmark has a number of commandline options to customize its behavior.
|
||||||
// See The BenchDevAlgoConfig documentations for details.
|
// See The BenchDevAlgoConfig documentations for details.
|
||||||
@ -1193,6 +1195,8 @@ int main(int argc, char* argv[])
|
|||||||
{
|
{
|
||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
||||||
int numThreads = tbb::task_scheduler_init::automatic;
|
int numThreads = tbb::task_scheduler_init::automatic;
|
||||||
|
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||||
|
int numThreads = omp_get_max_threads();
|
||||||
#endif // TBB
|
#endif // TBB
|
||||||
|
|
||||||
vtkm::benchmarking::BenchDevAlgoConfig& config = vtkm::benchmarking::Config;
|
vtkm::benchmarking::BenchDevAlgoConfig& config = vtkm::benchmarking::Config;
|
||||||
@ -1323,8 +1327,12 @@ int main(int argc, char* argv[])
|
|||||||
std::istringstream parse(argv[i]);
|
std::istringstream parse(argv[i]);
|
||||||
parse >> numThreads;
|
parse >> numThreads;
|
||||||
std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
|
std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
|
||||||
|
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||||
|
std::istringstream parse(argv[i]);
|
||||||
|
parse >> numThreads;
|
||||||
|
std::cout << "Selected " << numThreads << " OpenMP threads." << std::endl;
|
||||||
#else
|
#else
|
||||||
std::cerr << "NumThreads valid only on TBB. Ignoring." << std::endl;
|
std::cerr << "NumThreads not valid on this device. Ignoring." << std::endl;
|
||||||
#endif // TBB
|
#endif // TBB
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -1337,6 +1345,8 @@ int main(int argc, char* argv[])
|
|||||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
||||||
// Must not be destroyed as long as benchmarks are running:
|
// Must not be destroyed as long as benchmarks are running:
|
||||||
tbb::task_scheduler_init init(numThreads);
|
tbb::task_scheduler_init init(numThreads);
|
||||||
|
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||||
|
omp_set_num_threads(numThreads);
|
||||||
#endif // TBB
|
#endif // TBB
|
||||||
|
|
||||||
if (config.BenchmarkFlags == 0)
|
if (config.BenchmarkFlags == 0)
|
||||||
|
@ -30,6 +30,12 @@ function(add_benchmark name files)
|
|||||||
target_compile_definitions(${name}_TBB PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_TBB")
|
target_compile_definitions(${name}_TBB PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_TBB")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (TARGET vtkm::openmp)
|
||||||
|
add_executable(${name}_OPENMP ${files})
|
||||||
|
list(APPEND benchmarks ${name}_OPENMP)
|
||||||
|
target_compile_definitions(${name}_OPENMP PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_OPENMP")
|
||||||
|
endif()
|
||||||
|
|
||||||
if (TARGET vtkm::cuda)
|
if (TARGET vtkm::cuda)
|
||||||
get_filename_component(fname "${name}" NAME_WE)
|
get_filename_component(fname "${name}" NAME_WE)
|
||||||
get_filename_component(fullpath "${name}.cxx" ABSOLUTE)
|
get_filename_component(fullpath "${name}.cxx" ABSOLUTE)
|
||||||
@ -70,6 +76,9 @@ if(TARGET vtkm_rendering)
|
|||||||
if(TARGET BenchmarkRayTracing_TBB)
|
if(TARGET BenchmarkRayTracing_TBB)
|
||||||
target_link_libraries(BenchmarkRayTracing_TBB PRIVATE vtkm_rendering)
|
target_link_libraries(BenchmarkRayTracing_TBB PRIVATE vtkm_rendering)
|
||||||
endif()
|
endif()
|
||||||
|
if(TARGET BenchmarkRayTracing_OPENMP)
|
||||||
|
target_link_libraries(BenchmarkRayTracing_OPENMP PRIVATE vtkm_rendering)
|
||||||
|
endif()
|
||||||
if(TARGET BenchmarkRayTracing_CUDA)
|
if(TARGET BenchmarkRayTracing_CUDA)
|
||||||
target_link_libraries(BenchmarkRayTracing_CUDA PRIVATE vtkm_rendering)
|
target_link_libraries(BenchmarkRayTracing_CUDA PRIVATE vtkm_rendering)
|
||||||
endif()
|
endif()
|
||||||
|
5
docs/changelog/openmp-backend.md
Normal file
5
docs/changelog/openmp-backend.md
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
# OpenMP Device Adapter
|
||||||
|
|
||||||
|
A device adapter that leverages OpenMP 4.5 for threading is now available. The
|
||||||
|
new adapter is enabled using the CMake option `VTKm_ENABLE_OPENMP` and its
|
||||||
|
performance is comparable to the TBB device adapter.
|
@ -151,6 +151,7 @@ add_subdirectory(arg)
|
|||||||
add_subdirectory(diy)
|
add_subdirectory(diy)
|
||||||
add_subdirectory(serial)
|
add_subdirectory(serial)
|
||||||
add_subdirectory(tbb)
|
add_subdirectory(tbb)
|
||||||
|
add_subdirectory(openmp)
|
||||||
add_subdirectory(cuda)
|
add_subdirectory(cuda)
|
||||||
|
|
||||||
set(backends )
|
set(backends )
|
||||||
@ -160,6 +161,9 @@ endif()
|
|||||||
if(TARGET vtkm::cuda)
|
if(TARGET vtkm::cuda)
|
||||||
list(APPEND backends vtkm::cuda)
|
list(APPEND backends vtkm::cuda)
|
||||||
endif()
|
endif()
|
||||||
|
if(TARGET vtkm::openmp)
|
||||||
|
list(APPEND backends vtkm::openmp)
|
||||||
|
endif()
|
||||||
target_link_libraries(vtkm_cont PUBLIC vtkm_compiler_flags ${backends})
|
target_link_libraries(vtkm_cont PUBLIC vtkm_compiler_flags ${backends})
|
||||||
if(TARGET vtkm_diy)
|
if(TARGET vtkm_diy)
|
||||||
# This will become a required dependency eventually.
|
# This will become a required dependency eventually.
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include <vtkm/ListTag.h>
|
#include <vtkm/ListTag.h>
|
||||||
|
|
||||||
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
|
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
|
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
|
||||||
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
|
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
|
||||||
|
|
||||||
@ -37,6 +38,7 @@ namespace cont
|
|||||||
|
|
||||||
struct DeviceAdapterListTagCommon : vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagCuda,
|
struct DeviceAdapterListTagCommon : vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagCuda,
|
||||||
vtkm::cont::DeviceAdapterTagTBB,
|
vtkm::cont::DeviceAdapterTagTBB,
|
||||||
|
vtkm::cont::DeviceAdapterTagOpenMP,
|
||||||
vtkm::cont::DeviceAdapterTagSerial>
|
vtkm::cont::DeviceAdapterTagSerial>
|
||||||
{
|
{
|
||||||
};
|
};
|
||||||
|
@ -36,7 +36,9 @@
|
|||||||
// Unfortunately, VTKM_ENABLE_TBB does not guarantee that TBB is (or isn't)
|
// Unfortunately, VTKM_ENABLE_TBB does not guarantee that TBB is (or isn't)
|
||||||
// available, but there is no way to check for sure in a header library.
|
// available, but there is no way to check for sure in a header library.
|
||||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_TBB
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_TBB
|
||||||
#else // !VTKM_CUDA && !VTKM_ENABLE_TBB
|
#elif defined(VTKM_ENABLE_OPENMP)
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_OPENMP
|
||||||
|
#else // !VTKM_CUDA && !VTKM_ENABLE_TBB && !VTKM_ENABLE_OPENMP
|
||||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_SERIAL
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_SERIAL
|
||||||
#endif // !VTKM_CUDA && !VTKM_ENABLE_TBB
|
#endif // !VTKM_CUDA && !VTKM_ENABLE_TBB
|
||||||
#endif // VTKM_DEVICE_ADAPTER
|
#endif // VTKM_DEVICE_ADAPTER
|
||||||
@ -68,6 +70,14 @@
|
|||||||
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
|
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
|
||||||
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagTBB
|
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagTBB
|
||||||
|
|
||||||
|
// OpenMP:
|
||||||
|
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagOpenMP
|
||||||
|
|
||||||
// Error:
|
// Error:
|
||||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_ERROR
|
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#define VTKM_DEVICE_ADAPTER_SERIAL 1
|
#define VTKM_DEVICE_ADAPTER_SERIAL 1
|
||||||
#define VTKM_DEVICE_ADAPTER_CUDA 2
|
#define VTKM_DEVICE_ADAPTER_CUDA 2
|
||||||
#define VTKM_DEVICE_ADAPTER_TBB 3
|
#define VTKM_DEVICE_ADAPTER_TBB 3
|
||||||
|
#define VTKM_DEVICE_ADAPTER_OPENMP 4
|
||||||
|
|
||||||
namespace vtkm
|
namespace vtkm
|
||||||
{
|
{
|
||||||
|
32
vtkm/cont/openmp/CMakeLists.txt
Normal file
32
vtkm/cont/openmp/CMakeLists.txt
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
##============================================================================
|
||||||
|
## Copyright (c) Kitware, Inc.
|
||||||
|
## All rights reserved.
|
||||||
|
## See LICENSE.txt for details.
|
||||||
|
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
## PURPOSE. See the above copyright notice for more information.
|
||||||
|
##
|
||||||
|
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
## Copyright 2018 UT-Battelle, LLC.
|
||||||
|
## Copyright 2018 Los Alamos National Security.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
## the U.S. Government retains certain rights in this software.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
## this software.
|
||||||
|
##============================================================================
|
||||||
|
|
||||||
|
set(headers
|
||||||
|
DeviceAdapterOpenMP.h
|
||||||
|
)
|
||||||
|
|
||||||
|
add_subdirectory(internal)
|
||||||
|
|
||||||
|
vtkm_declare_headers(${headers} TESTABLE ${VTKm_ENABLE_OPENMP})
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
if (TARGET vtkm::openmp)
|
||||||
|
add_subdirectory(testing)
|
||||||
|
endif()
|
33
vtkm/cont/openmp/DeviceAdapterOpenMP.h
Normal file
33
vtkm/cont/openmp/DeviceAdapterOpenMP.h
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#ifndef vtk_m_cont_openmp_DeviceAdapterOpenMP_h
|
||||||
|
#define vtk_m_cont_openmp_DeviceAdapterOpenMP_h
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterRuntimeDetectorOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
|
||||||
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
|
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //vtk_m_cont_openmp_DeviceAdapterOpenMP_h
|
42
vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.cxx
Normal file
42
vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.cxx
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace internal
|
||||||
|
{
|
||||||
|
|
||||||
|
ExecutionArrayInterfaceBasic<DeviceAdapterTagOpenMP>::ExecutionArrayInterfaceBasic(
|
||||||
|
StorageBasicBase& storage)
|
||||||
|
: Superclass(storage)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
} // end namespace internal
|
||||||
|
|
||||||
|
VTKM_INSTANTIATE_ARRAYHANDLES_FOR_DEVICE_ADAPTER(DeviceAdapterTagOpenMP)
|
||||||
|
}
|
||||||
|
} // end vtkm::cont
|
109
vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h
Normal file
109
vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#ifndef vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h
|
||||||
|
#define vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h
|
||||||
|
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
|
||||||
|
#include <vtkm/cont/internal/ArrayExportMacros.h>
|
||||||
|
#include <vtkm/cont/internal/ArrayManagerExecution.h>
|
||||||
|
#include <vtkm/cont/internal/ArrayManagerExecutionShareWithControl.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace internal
|
||||||
|
{
|
||||||
|
|
||||||
|
template <typename T, class StorageTag>
|
||||||
|
class ArrayManagerExecution<T, StorageTag, vtkm::cont::DeviceAdapterTagOpenMP>
|
||||||
|
: public vtkm::cont::internal::ArrayManagerExecutionShareWithControl<T, StorageTag>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
using Superclass = vtkm::cont::internal::ArrayManagerExecutionShareWithControl<T, StorageTag>;
|
||||||
|
using ValueType = typename Superclass::ValueType;
|
||||||
|
using PortalType = typename Superclass::PortalType;
|
||||||
|
using PortalConstType = typename Superclass::PortalConstType;
|
||||||
|
using StorageType = typename Superclass::StorageType;
|
||||||
|
|
||||||
|
VTKM_CONT
|
||||||
|
ArrayManagerExecution(StorageType* storage)
|
||||||
|
: Superclass(storage)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
VTKM_CONT
|
||||||
|
PortalConstType PrepareForInput(bool updateData)
|
||||||
|
{
|
||||||
|
return this->Superclass::PrepareForInput(updateData);
|
||||||
|
}
|
||||||
|
|
||||||
|
VTKM_CONT
|
||||||
|
PortalType PrepareForInPlace(bool updateData)
|
||||||
|
{
|
||||||
|
return this->Superclass::PrepareForInPlace(updateData);
|
||||||
|
}
|
||||||
|
|
||||||
|
VTKM_CONT
|
||||||
|
PortalType PrepareForOutput(vtkm::Id numberOfValues)
|
||||||
|
{
|
||||||
|
return this->Superclass::PrepareForOutput(numberOfValues);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct ExecutionPortalFactoryBasic<T, DeviceAdapterTagOpenMP>
|
||||||
|
: public ExecutionPortalFactoryBasicShareWithControl<T>
|
||||||
|
{
|
||||||
|
using Superclass = ExecutionPortalFactoryBasicShareWithControl<T>;
|
||||||
|
|
||||||
|
using typename Superclass::ValueType;
|
||||||
|
using typename Superclass::PortalType;
|
||||||
|
using typename Superclass::PortalConstType;
|
||||||
|
using Superclass::CreatePortal;
|
||||||
|
using Superclass::CreatePortalConst;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct VTKM_CONT_EXPORT ExecutionArrayInterfaceBasic<DeviceAdapterTagOpenMP>
|
||||||
|
: public ExecutionArrayInterfaceBasicShareWithControl
|
||||||
|
{
|
||||||
|
using Superclass = ExecutionArrayInterfaceBasicShareWithControl;
|
||||||
|
|
||||||
|
VTKM_CONT
|
||||||
|
ExecutionArrayInterfaceBasic(StorageBasicBase& storage);
|
||||||
|
|
||||||
|
VTKM_CONT
|
||||||
|
DeviceAdapterId GetDeviceId() const final { return VTKM_DEVICE_ADAPTER_OPENMP; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace internal
|
||||||
|
|
||||||
|
#ifndef vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
|
||||||
|
VTKM_EXPORT_ARRAYHANDLES_FOR_DEVICE_ADAPTER(DeviceAdapterTagOpenMP)
|
||||||
|
#endif // !vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
|
||||||
|
}
|
||||||
|
} // namespace vtkm::cont
|
||||||
|
|
||||||
|
#endif // vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h
|
49
vtkm/cont/openmp/internal/CMakeLists.txt
Normal file
49
vtkm/cont/openmp/internal/CMakeLists.txt
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
##============================================================================
|
||||||
|
## Copyright (c) Kitware, Inc.
|
||||||
|
## All rights reserved.
|
||||||
|
## See LICENSE.txt for details.
|
||||||
|
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
## PURPOSE. See the above copyright notice for more information.
|
||||||
|
##
|
||||||
|
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
## Copyright 2018 UT-Battelle, LLC.
|
||||||
|
## Copyright 2018 Los Alamos National Security.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
## the U.S. Government retains certain rights in this software.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
## this software.
|
||||||
|
##============================================================================
|
||||||
|
|
||||||
|
set(headers
|
||||||
|
ArrayManagerExecutionOpenMP.h
|
||||||
|
DeviceAdapterAlgorithmOpenMP.h
|
||||||
|
DeviceAdapterRuntimeDetectorOpenMP.h
|
||||||
|
DeviceAdapterTagOpenMP.h
|
||||||
|
FunctorsOpenMP.h
|
||||||
|
ParallelQuickSortOpenMP.h
|
||||||
|
ParallelRadixSortOpenMP.h
|
||||||
|
ParallelScanOpenMP.h
|
||||||
|
ParallelSortOpenMP.h
|
||||||
|
VirtualObjectTransferOpenMP.h
|
||||||
|
)
|
||||||
|
|
||||||
|
vtkm_declare_headers(${headers} TESTABLE ${VTKm_ENABLE_OPENMP})
|
||||||
|
|
||||||
|
target_sources(vtkm_cont PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterRuntimeDetectorOpenMP.cxx
|
||||||
|
)
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
if (NOT VTKm_ENABLE_OPENMP)
|
||||||
|
return()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
target_sources(vtkm_cont PRIVATE
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/ArrayManagerExecutionOpenMP.cxx
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterAlgorithmOpenMP.cxx
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/ParallelRadixSortOpenMP.cxx
|
||||||
|
)
|
140
vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.cxx
Normal file
140
vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.cxx
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||||
|
|
||||||
|
#include <vtkm/cont/ErrorExecution.h>
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
|
||||||
|
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>::ScheduleTask(
|
||||||
|
vtkm::exec::openmp::internal::TaskTiling1D& functor,
|
||||||
|
vtkm::Id size)
|
||||||
|
{
|
||||||
|
static constexpr vtkm::Id MESSAGE_SIZE = 1024;
|
||||||
|
char errorString[MESSAGE_SIZE];
|
||||||
|
errorString[0] = '\0';
|
||||||
|
vtkm::exec::internal::ErrorMessageBuffer errorMessage(errorString, MESSAGE_SIZE);
|
||||||
|
functor.SetErrorMessageBuffer(errorMessage);
|
||||||
|
|
||||||
|
static constexpr vtkm::Id CHUNK_SIZE = 1024;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel for
|
||||||
|
schedule(guided))
|
||||||
|
for (vtkm::Id i = 0; i < size; i += CHUNK_SIZE)
|
||||||
|
{
|
||||||
|
const vtkm::Id end = std::min(i + CHUNK_SIZE, size);
|
||||||
|
functor(i, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorMessage.IsErrorRaised())
|
||||||
|
{
|
||||||
|
throw vtkm::cont::ErrorExecution(errorString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>::ScheduleTask(
|
||||||
|
vtkm::exec::openmp::internal::TaskTiling3D& functor,
|
||||||
|
vtkm::Id3 size)
|
||||||
|
{
|
||||||
|
static constexpr vtkm::Id MESSAGE_SIZE = 1024;
|
||||||
|
char errorString[MESSAGE_SIZE];
|
||||||
|
errorString[0] = '\0';
|
||||||
|
vtkm::exec::internal::ErrorMessageBuffer errorMessage(errorString, MESSAGE_SIZE);
|
||||||
|
functor.SetErrorMessageBuffer(errorMessage);
|
||||||
|
|
||||||
|
vtkm::Id3 chunkDims;
|
||||||
|
if (size[0] > 512)
|
||||||
|
{
|
||||||
|
chunkDims = { 1024, 4, 1 };
|
||||||
|
}
|
||||||
|
else if (size[0] > 256)
|
||||||
|
{
|
||||||
|
chunkDims = { 512, 4, 2 };
|
||||||
|
}
|
||||||
|
else if (size[0] > 128)
|
||||||
|
{
|
||||||
|
chunkDims = { 256, 4, 4 };
|
||||||
|
}
|
||||||
|
else if (size[0] > 64)
|
||||||
|
{
|
||||||
|
chunkDims = { 128, 8, 4 };
|
||||||
|
}
|
||||||
|
else if (size[0] > 32)
|
||||||
|
{
|
||||||
|
chunkDims = { 64, 8, 8 };
|
||||||
|
}
|
||||||
|
else if (size[0] > 16)
|
||||||
|
{
|
||||||
|
chunkDims = { 32, 16, 8 };
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
chunkDims = { 16, 16, 16 };
|
||||||
|
}
|
||||||
|
|
||||||
|
const vtkm::Id3 numChunks{ openmp::CeilDivide(size[0], chunkDims[0]),
|
||||||
|
openmp::CeilDivide(size[1], chunkDims[1]),
|
||||||
|
openmp::CeilDivide(size[2], chunkDims[2]) };
|
||||||
|
const vtkm::Id chunkCount = numChunks[0] * numChunks[1] * numChunks[2];
|
||||||
|
|
||||||
|
// Lambda to convert chunkIdx into a start/end {i, j, k}:
|
||||||
|
auto computeIJK = [&](const vtkm::Id& chunkIdx, vtkm::Id3& start, vtkm::Id3& end) {
|
||||||
|
start[0] = chunkIdx % numChunks[0];
|
||||||
|
start[1] = (chunkIdx / numChunks[0]) % numChunks[1];
|
||||||
|
start[2] = (chunkIdx / (numChunks[0] * numChunks[1]));
|
||||||
|
start *= chunkDims; // c-wise mult
|
||||||
|
|
||||||
|
end[0] = std::min(start[0] + chunkDims[0], size[0]);
|
||||||
|
end[1] = std::min(start[1] + chunkDims[1], size[1]);
|
||||||
|
end[2] = std::min(start[2] + chunkDims[2], size[2]);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Iterate through each chunk, converting the chunkIdx into an ijk range:
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel for
|
||||||
|
schedule(guided))
|
||||||
|
for (vtkm::Id chunkIdx = 0; chunkIdx < chunkCount; ++chunkIdx)
|
||||||
|
{
|
||||||
|
vtkm::Id3 startIJK;
|
||||||
|
vtkm::Id3 endIJK;
|
||||||
|
computeIJK(chunkIdx, startIJK, endIJK);
|
||||||
|
|
||||||
|
for (vtkm::Id k = startIJK[2]; k < endIJK[2]; ++k)
|
||||||
|
{
|
||||||
|
for (vtkm::Id j = startIJK[1]; j < endIJK[1]; ++j)
|
||||||
|
{
|
||||||
|
functor(startIJK[0], endIJK[0], j, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (errorMessage.IsErrorRaised())
|
||||||
|
{
|
||||||
|
throw vtkm::cont::ErrorExecution(errorString);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end namespace vtkm::cont
|
385
vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h
Normal file
385
vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h
Normal file
@ -0,0 +1,385 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h
|
||||||
|
#define vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h
|
||||||
|
|
||||||
|
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||||
|
#include <vtkm/cont/Error.h>
|
||||||
|
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/ParallelScanOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/ParallelSortOpenMP.h>
|
||||||
|
#include <vtkm/exec/openmp/internal/TaskTilingOpenMP.h>
|
||||||
|
|
||||||
|
// For serial fallback:
|
||||||
|
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>
|
||||||
|
: vtkm::cont::internal::DeviceAdapterAlgorithmGeneral<
|
||||||
|
DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>,
|
||||||
|
vtkm::cont::DeviceAdapterTagOpenMP>
|
||||||
|
{
|
||||||
|
using DevTag = DeviceAdapterTagOpenMP;
|
||||||
|
using SerialAlgo = DeviceAdapterAlgorithm<DeviceAdapterTagSerial>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
template <typename T, typename U, class CIn, class COut>
|
||||||
|
VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
vtkm::cont::ArrayHandle<U, COut>& output)
|
||||||
|
{
|
||||||
|
using namespace vtkm::cont::openmp;
|
||||||
|
|
||||||
|
const vtkm::Id inSize = input.GetNumberOfValues();
|
||||||
|
if (inSize == 0)
|
||||||
|
{
|
||||||
|
output.Allocate(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto inputPortal = input.PrepareForInput(DevTag());
|
||||||
|
auto outputPortal = output.PrepareForOutput(inSize, DevTag());
|
||||||
|
CopyHelper(inputPortal, outputPortal, 0, 0, inSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, class CIn, class CStencil, class COut>
|
||||||
|
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
|
||||||
|
vtkm::cont::ArrayHandle<T, COut>& output)
|
||||||
|
{
|
||||||
|
::vtkm::NotZeroInitialized unary_predicate;
|
||||||
|
CopyIf(input, stencil, output, unary_predicate);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, class CIn, class CStencil, class COut, class UnaryPredicate>
|
||||||
|
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
|
||||||
|
vtkm::cont::ArrayHandle<T, COut>& output,
|
||||||
|
UnaryPredicate unary_predicate)
|
||||||
|
{
|
||||||
|
using namespace vtkm::cont::openmp;
|
||||||
|
|
||||||
|
vtkm::Id inSize = input.GetNumberOfValues();
|
||||||
|
if (inSize == 0)
|
||||||
|
{
|
||||||
|
output.Allocate(0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto inputPortal = input.PrepareForInput(DevTag());
|
||||||
|
auto stencilPortal = stencil.PrepareForInput(DevTag());
|
||||||
|
auto outputPortal = output.PrepareForOutput(inSize, DevTag());
|
||||||
|
|
||||||
|
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(inputPortal);
|
||||||
|
auto stencilIter = vtkm::cont::ArrayPortalToIteratorBegin(stencilPortal);
|
||||||
|
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outputPortal);
|
||||||
|
|
||||||
|
CopyIfHelper helper;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
|
||||||
|
{
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(single)
|
||||||
|
{
|
||||||
|
// Calls omp_get_num_threads, thus must be used inside a parallel section.
|
||||||
|
helper.Initialize(inSize, sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(for schedule(static))
|
||||||
|
for (vtkm::Id i = 0; i < helper.NumChunks; ++i)
|
||||||
|
{
|
||||||
|
helper.CopyIf(inIter, stencilIter, outIter, unary_predicate, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vtkm::Id numValues = helper.Reduce(outIter);
|
||||||
|
output.Shrink(numValues);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename T, typename U, class CIn, class COut>
|
||||||
|
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
vtkm::Id inputStartIndex,
|
||||||
|
vtkm::Id numberOfValuesToCopy,
|
||||||
|
vtkm::cont::ArrayHandle<U, COut>& output,
|
||||||
|
vtkm::Id outputIndex = 0)
|
||||||
|
{
|
||||||
|
using namespace vtkm::cont::openmp;
|
||||||
|
|
||||||
|
const vtkm::Id inSize = input.GetNumberOfValues();
|
||||||
|
|
||||||
|
// Check if the ranges overlap and fail if they do.
|
||||||
|
if (input == output &&
|
||||||
|
((outputIndex >= inputStartIndex && outputIndex < inputStartIndex + numberOfValuesToCopy) ||
|
||||||
|
(inputStartIndex >= outputIndex && inputStartIndex < outputIndex + numberOfValuesToCopy)))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inputStartIndex < 0 || numberOfValuesToCopy < 0 || outputIndex < 0 ||
|
||||||
|
inputStartIndex >= inSize)
|
||||||
|
{ //invalid parameters
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
//determine if the numberOfElementsToCopy needs to be reduced
|
||||||
|
if (inSize < (inputStartIndex + numberOfValuesToCopy))
|
||||||
|
{ //adjust the size
|
||||||
|
numberOfValuesToCopy = (inSize - inputStartIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
const vtkm::Id outSize = output.GetNumberOfValues();
|
||||||
|
const vtkm::Id copyOutEnd = outputIndex + numberOfValuesToCopy;
|
||||||
|
if (outSize < copyOutEnd)
|
||||||
|
{ //output is not large enough
|
||||||
|
if (outSize == 0)
|
||||||
|
{ //since output has nothing, just need to allocate to correct length
|
||||||
|
output.Allocate(copyOutEnd);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ //we currently have data in this array, so preserve it in the new
|
||||||
|
//resized array
|
||||||
|
vtkm::cont::ArrayHandle<U, COut> temp;
|
||||||
|
temp.Allocate(copyOutEnd);
|
||||||
|
CopySubRange(output, 0, outSize, temp);
|
||||||
|
output = temp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
auto inputPortal = input.PrepareForInput(DevTag());
|
||||||
|
auto outputPortal = output.PrepareForInPlace(DevTag());
|
||||||
|
|
||||||
|
CopyHelper(inputPortal, outputPortal, inputStartIndex, outputIndex, numberOfValuesToCopy);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, class CIn>
|
||||||
|
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input, U initialValue)
|
||||||
|
{
|
||||||
|
return Reduce(input, initialValue, vtkm::Add());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, class CIn, class BinaryFunctor>
|
||||||
|
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
U initialValue,
|
||||||
|
BinaryFunctor binary_functor)
|
||||||
|
{
|
||||||
|
using namespace vtkm::cont::openmp;
|
||||||
|
|
||||||
|
auto portal = input.PrepareForInput(DevTag());
|
||||||
|
const OpenMPReductionSupported<typename std::decay<U>::type> fastPath;
|
||||||
|
|
||||||
|
return ReduceHelper::Execute(portal, initialValue, binary_functor, fastPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T,
|
||||||
|
typename U,
|
||||||
|
class CKeyIn,
|
||||||
|
class CValIn,
|
||||||
|
class CKeyOut,
|
||||||
|
class CValOut,
|
||||||
|
class BinaryFunctor>
|
||||||
|
VTKM_CONT static void ReduceByKey(const vtkm::cont::ArrayHandle<T, CKeyIn>& keys,
|
||||||
|
const vtkm::cont::ArrayHandle<U, CValIn>& values,
|
||||||
|
vtkm::cont::ArrayHandle<T, CKeyOut>& keys_output,
|
||||||
|
vtkm::cont::ArrayHandle<U, CValOut>& values_output,
|
||||||
|
BinaryFunctor func)
|
||||||
|
{
|
||||||
|
openmp::ReduceByKeyHelper(keys, values, keys_output, values_output, func);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class CIn, class COut>
|
||||||
|
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
vtkm::cont::ArrayHandle<T, COut>& output)
|
||||||
|
{
|
||||||
|
return ScanInclusive(input, output, vtkm::Add());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class CIn, class COut, class BinaryFunctor>
|
||||||
|
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
vtkm::cont::ArrayHandle<T, COut>& output,
|
||||||
|
BinaryFunctor binaryFunctor)
|
||||||
|
{
|
||||||
|
if (input.GetNumberOfValues() * sizeof(T) <= openmp::PAGE_SIZE)
|
||||||
|
{
|
||||||
|
return SerialAlgo::ScanInclusive(input, output, binaryFunctor);
|
||||||
|
}
|
||||||
|
|
||||||
|
using InPortalT = decltype(input.PrepareForInput(DevTag()));
|
||||||
|
using OutPortalT = decltype(output.PrepareForOutput(0, DevTag()));
|
||||||
|
using Impl = openmp::ScanInclusiveHelper<InPortalT, OutPortalT, BinaryFunctor>;
|
||||||
|
|
||||||
|
vtkm::Id numVals = input.GetNumberOfValues();
|
||||||
|
Impl impl(
|
||||||
|
input.PrepareForInput(DevTag()), output.PrepareForOutput(numVals, DevTag()), binaryFunctor);
|
||||||
|
|
||||||
|
return impl.Execute(vtkm::Id2(0, numVals));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class CIn, class COut>
|
||||||
|
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
vtkm::cont::ArrayHandle<T, COut>& output)
|
||||||
|
{
|
||||||
|
return ScanExclusive(input, output, vtkm::Add(), vtkm::TypeTraits<T>::ZeroInitialization());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class CIn, class COut, class BinaryFunctor>
|
||||||
|
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||||
|
vtkm::cont::ArrayHandle<T, COut>& output,
|
||||||
|
BinaryFunctor binaryFunctor,
|
||||||
|
const T& initialValue)
|
||||||
|
{
|
||||||
|
if (input.GetNumberOfValues() * sizeof(T) <= openmp::PAGE_SIZE)
|
||||||
|
{
|
||||||
|
return SerialAlgo::ScanExclusive(input, output, binaryFunctor, initialValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
using InPortalT = decltype(input.PrepareForInput(DevTag()));
|
||||||
|
using OutPortalT = decltype(output.PrepareForOutput(0, DevTag()));
|
||||||
|
using Impl = openmp::ScanExclusiveHelper<InPortalT, OutPortalT, BinaryFunctor>;
|
||||||
|
|
||||||
|
vtkm::Id numVals = input.GetNumberOfValues();
|
||||||
|
Impl impl(input.PrepareForInput(DevTag()),
|
||||||
|
output.PrepareForOutput(numVals, DevTag()),
|
||||||
|
binaryFunctor,
|
||||||
|
initialValue);
|
||||||
|
|
||||||
|
return impl.Execute(vtkm::Id2(0, numVals));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// \brief Unstable ascending sort of input array.
|
||||||
|
///
|
||||||
|
/// Sorts the contents of \c values so that they in ascending value. Doesn't
|
||||||
|
/// guarantee stability
|
||||||
|
///
|
||||||
|
template <typename T, class Storage>
|
||||||
|
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values)
|
||||||
|
{
|
||||||
|
Sort(values, vtkm::SortLess());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class Storage, class BinaryCompare>
|
||||||
|
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values,
|
||||||
|
BinaryCompare binary_compare)
|
||||||
|
{
|
||||||
|
openmp::sort::parallel_sort(values, binary_compare);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, class StorageT, class StorageU>
|
||||||
|
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||||
|
vtkm::cont::ArrayHandle<U, StorageU>& values)
|
||||||
|
{
|
||||||
|
SortByKey(keys, values, std::less<T>());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
|
||||||
|
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||||
|
vtkm::cont::ArrayHandle<U, StorageU>& values,
|
||||||
|
BinaryCompare binary_compare)
|
||||||
|
{
|
||||||
|
openmp::sort::parallel_sort_bykey(keys, values, binary_compare);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class Storage>
|
||||||
|
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values)
|
||||||
|
{
|
||||||
|
Unique(values, std::equal_to<T>());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class Storage, class BinaryCompare>
|
||||||
|
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values,
|
||||||
|
BinaryCompare binary_compare)
|
||||||
|
{
|
||||||
|
auto portal = values.PrepareForInPlace(DevTag());
|
||||||
|
auto iter = vtkm::cont::ArrayPortalToIteratorBegin(portal);
|
||||||
|
|
||||||
|
using IterT = typename std::decay<decltype(iter)>::type;
|
||||||
|
using Uniqifier = openmp::UniqueHelper<IterT, BinaryCompare>;
|
||||||
|
|
||||||
|
Uniqifier uniquifier(iter, portal.GetNumberOfValues(), binary_compare);
|
||||||
|
vtkm::Id outSize = uniquifier.Execute();
|
||||||
|
values.Shrink(outSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::openmp::internal::TaskTiling1D& functor,
|
||||||
|
vtkm::Id size);
|
||||||
|
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::openmp::internal::TaskTiling3D& functor,
|
||||||
|
vtkm::Id3 size);
|
||||||
|
|
||||||
|
template <class FunctorType>
|
||||||
|
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id numInstances)
|
||||||
|
{
|
||||||
|
vtkm::exec::openmp::internal::TaskTiling1D kernel(functor);
|
||||||
|
ScheduleTask(kernel, numInstances);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class FunctorType>
|
||||||
|
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 rangeMax)
|
||||||
|
{
|
||||||
|
vtkm::exec::openmp::internal::TaskTiling3D kernel(functor);
|
||||||
|
ScheduleTask(kernel, rangeMax);
|
||||||
|
}
|
||||||
|
|
||||||
|
VTKM_CONT static void Synchronize()
|
||||||
|
{
|
||||||
|
// Nothing to do. This device schedules all of its operations using a
|
||||||
|
// split/join paradigm. This means that the if the control thread is
|
||||||
|
// calling this method, then nothing should be running in the execution
|
||||||
|
// environment.
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagOpenMP>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
template <typename WorkletType, typename InvocationType>
|
||||||
|
static vtkm::exec::serial::internal::TaskTiling1D MakeTask(const WorkletType& worklet,
|
||||||
|
const InvocationType& invocation,
|
||||||
|
vtkm::Id,
|
||||||
|
vtkm::Id globalIndexOffset = 0)
|
||||||
|
{
|
||||||
|
return vtkm::exec::openmp::internal::TaskTiling1D(worklet, invocation, globalIndexOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename WorkletType, typename InvocationType>
|
||||||
|
static vtkm::exec::serial::internal::TaskTiling3D MakeTask(const WorkletType& worklet,
|
||||||
|
const InvocationType& invocation,
|
||||||
|
vtkm::Id3,
|
||||||
|
vtkm::Id globalIndexOffset = 0)
|
||||||
|
{
|
||||||
|
return vtkm::exec::openmp::internal::TaskTiling3D(worklet, invocation, globalIndexOffset);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} // namespace vtkm::cont
|
||||||
|
|
||||||
|
#endif //vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h
|
@ -0,0 +1,32 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterRuntimeDetectorOpenMP.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
VTKM_CONT bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagOpenMP>::Exists() const
|
||||||
|
{
|
||||||
|
using DeviceAdapterTraits = vtkm::cont::DeviceAdapterTraits<vtkm::cont::DeviceAdapterTagOpenMP>;
|
||||||
|
return DeviceAdapterTraits::Valid;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,47 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterRuntimeDetector_h
|
||||||
|
#define vtk_m_cont_openmp_internal_DeviceAdapterRuntimeDetector_h
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
#include <vtkm/cont/vtkm_cont_export.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
|
||||||
|
template <class DeviceAdapterTag>
|
||||||
|
class DeviceAdapterRuntimeDetector;
|
||||||
|
|
||||||
|
/// Determine if this machine supports Serial backend
|
||||||
|
///
|
||||||
|
template <>
|
||||||
|
class VTKM_CONT_EXPORT DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagOpenMP>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
/// Returns true if the given device adapter is supported on the current
|
||||||
|
/// machine.
|
||||||
|
VTKM_CONT bool Exists() const;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
32
vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h
Normal file
32
vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h
|
||||||
|
#define vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h
|
||||||
|
|
||||||
|
#include <vtkm/cont/internal/DeviceAdapterTag.h>
|
||||||
|
|
||||||
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
|
VTKM_VALID_DEVICE_ADAPTER(OpenMP, VTKM_DEVICE_ADAPTER_OPENMP)
|
||||||
|
#else
|
||||||
|
VTKM_INVALID_DEVICE_ADAPTER(OpenMP, VTKM_DEVICE_ADAPTER_OPENMP)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h
|
674
vtkm/cont/openmp/internal/FunctorsOpenMP.h
Normal file
674
vtkm/cont/openmp/internal/FunctorsOpenMP.h
Normal file
@ -0,0 +1,674 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
#ifndef vtk_m_cont_openmp_internal_FunctorsOpenMP_h
|
||||||
|
#define vtk_m_cont_openmp_internal_FunctorsOpenMP_h
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
|
||||||
|
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
||||||
|
|
||||||
|
#include <vtkm/BinaryOperators.h>
|
||||||
|
#include <vtkm/BinaryPredicates.h>
|
||||||
|
#include <vtkm/Pair.h>
|
||||||
|
#include <vtkm/Types.h>
|
||||||
|
#include <vtkm/cont/ArrayHandle.h>
|
||||||
|
#include <vtkm/cont/ErrorExecution.h>
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
// Wrap all '#pragma omp ...' calls in this macro so we can disable them in
|
||||||
|
// non-omp builds and avoid a multitude of 'ignoring pragma..." warnings.
|
||||||
|
#ifdef _OPENMP
|
||||||
|
#define _VTKM_OPENMP_DIRECTIVE_IMPL(fullDir) _Pragma(#fullDir)
|
||||||
|
#define VTKM_OPENMP_DIRECTIVE(dir) _VTKM_OPENMP_DIRECTIVE_IMPL(omp dir)
|
||||||
|
#else // _OPENMP
|
||||||
|
#define VTKM_OPENMP_DIRECTIVE(directive)
|
||||||
|
#endif // _OPENMP
|
||||||
|
|
||||||
|
// When defined, supported type / operator combinations will use the OpenMP
|
||||||
|
// reduction(...) clause. Otherwise, all reductions use the general
|
||||||
|
// implementation with a manual reduction once the threads complete.
|
||||||
|
// I don't know how, but the benchmarks currently perform better without the
|
||||||
|
// specializations.
|
||||||
|
//#define VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace openmp
|
||||||
|
{
|
||||||
|
|
||||||
|
constexpr static vtkm::Id CACHE_LINE_SIZE = 64;
|
||||||
|
constexpr static vtkm::Id PAGE_SIZE = 4096;
|
||||||
|
|
||||||
|
// Returns ceil(num/den) for integral types
|
||||||
|
template <typename T>
|
||||||
|
static constexpr T CeilDivide(const T& numerator, const T& denominator)
|
||||||
|
{
|
||||||
|
return (numerator + denominator - 1) / denominator;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Computes the number of values per chunk. Note that numChunks + chunkSize may
|
||||||
|
// exceed numVals, so be sure to check upper limits.
|
||||||
|
static void ComputeChunkSize(const vtkm::Id numVals,
|
||||||
|
const vtkm::Id numThreads,
|
||||||
|
const vtkm::Id chunksPerThread,
|
||||||
|
const vtkm::Id bytesPerValue,
|
||||||
|
vtkm::Id& numChunks,
|
||||||
|
vtkm::Id& valuesPerChunk)
|
||||||
|
{
|
||||||
|
// try to evenly distribute pages across chunks:
|
||||||
|
const vtkm::Id bytesIn = numVals * bytesPerValue;
|
||||||
|
const vtkm::Id pagesIn = CeilDivide(bytesIn, PAGE_SIZE);
|
||||||
|
// If we don't have enough pages to honor chunksPerThread, ignore it:
|
||||||
|
numChunks = (pagesIn > numThreads * chunksPerThread) ? numThreads * chunksPerThread : numThreads;
|
||||||
|
const vtkm::Id pagesPerChunk = CeilDivide(pagesIn, numChunks);
|
||||||
|
valuesPerChunk = CeilDivide(pagesPerChunk * PAGE_SIZE, bytesPerValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, typename U>
|
||||||
|
VTKM_EXEC_CONT static void DoCopy(T src, U dst, vtkm::Id numVals, std::true_type)
|
||||||
|
{
|
||||||
|
if (numVals)
|
||||||
|
{
|
||||||
|
std::copy(src, src + numVals, dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't use std::copy when type conversion is required because MSVC.
|
||||||
|
template <typename InIterT, typename OutIterT>
|
||||||
|
VTKM_EXEC_CONT static void DoCopy(InIterT inIter,
|
||||||
|
OutIterT outIter,
|
||||||
|
vtkm::Id numVals,
|
||||||
|
std::false_type)
|
||||||
|
{
|
||||||
|
using ValueType = typename std::iterator_traits<OutIterT>::value_type;
|
||||||
|
|
||||||
|
for (vtkm::Id i = 0; i < numVals; ++i)
|
||||||
|
{
|
||||||
|
*(outIter++) = static_cast<ValueType>(*(inIter++));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename InIterT, typename OutIterT>
|
||||||
|
VTKM_EXEC_CONT static void DoCopy(InIterT inIter, OutIterT outIter, vtkm::Id numVals)
|
||||||
|
{
|
||||||
|
using InValueType = typename std::iterator_traits<InIterT>::value_type;
|
||||||
|
using OutValueType = typename std::iterator_traits<OutIterT>::value_type;
|
||||||
|
|
||||||
|
DoCopy(inIter, outIter, numVals, std::is_same<InValueType, OutValueType>());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename InPortalT, typename OutPortalT>
|
||||||
|
VTKM_EXEC_CONT static void CopyHelper(InPortalT inPortal,
|
||||||
|
OutPortalT outPortal,
|
||||||
|
vtkm::Id inStart,
|
||||||
|
vtkm::Id outStart,
|
||||||
|
vtkm::Id numVals)
|
||||||
|
{
|
||||||
|
using InValueT = typename InPortalT::ValueType;
|
||||||
|
using OutValueT = typename OutPortalT::ValueType;
|
||||||
|
constexpr auto isSame = std::is_same<InValueT, OutValueT>();
|
||||||
|
|
||||||
|
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(inPortal) + inStart;
|
||||||
|
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outPortal) + outStart;
|
||||||
|
vtkm::Id valuesPerChunk;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel default(none) shared(inIter, outIter, valuesPerChunk, numVals))
|
||||||
|
{
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(single)
|
||||||
|
{
|
||||||
|
// Evenly distribute full pages to all threads. We manually chunk the
|
||||||
|
// data here so that we can exploit std::copy's memmove optimizations.
|
||||||
|
vtkm::Id numChunks;
|
||||||
|
ComputeChunkSize(
|
||||||
|
numVals, omp_get_num_threads(), 8, sizeof(InValueT), numChunks, valuesPerChunk);
|
||||||
|
}
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(for schedule(static))
|
||||||
|
for (vtkm::Id i = 0; i < numVals; i += valuesPerChunk)
|
||||||
|
{
|
||||||
|
vtkm::Id chunkSize = std::min(numVals - i, valuesPerChunk);
|
||||||
|
DoCopy(inIter + i, outIter + i, chunkSize, isSame);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CopyIfHelper
|
||||||
|
{
|
||||||
|
vtkm::Id NumValues;
|
||||||
|
vtkm::Id NumThreads;
|
||||||
|
vtkm::Id ValueSize;
|
||||||
|
|
||||||
|
vtkm::Id NumChunks;
|
||||||
|
vtkm::Id ChunkSize;
|
||||||
|
std::vector<vtkm::Id> EndIds;
|
||||||
|
|
||||||
|
CopyIfHelper() = default;
|
||||||
|
|
||||||
|
void Initialize(vtkm::Id numValues, vtkm::Id valueSize)
|
||||||
|
{
|
||||||
|
this->NumValues = numValues;
|
||||||
|
this->NumThreads = omp_get_num_threads();
|
||||||
|
this->ValueSize = valueSize;
|
||||||
|
|
||||||
|
// Evenly distribute pages across the threads. We manually chunk the
|
||||||
|
// data here so that we can exploit std::copy's memmove optimizations.
|
||||||
|
ComputeChunkSize(
|
||||||
|
this->NumValues, this->NumThreads, 8, valueSize, this->NumChunks, this->ChunkSize);
|
||||||
|
|
||||||
|
this->EndIds.resize(this->NumChunks);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename InIterT, typename StencilIterT, typename OutIterT, typename PredicateT>
|
||||||
|
void CopyIf(InIterT inIter,
|
||||||
|
StencilIterT stencilIter,
|
||||||
|
OutIterT outIter,
|
||||||
|
PredicateT pred,
|
||||||
|
vtkm::Id chunk)
|
||||||
|
{
|
||||||
|
vtkm::Id startPos = std::min(chunk * this->ChunkSize, this->NumValues);
|
||||||
|
vtkm::Id endPos = std::min((chunk + 1) * this->ChunkSize, this->NumValues);
|
||||||
|
|
||||||
|
vtkm::Id outPos = startPos;
|
||||||
|
for (vtkm::Id inPos = startPos; inPos < endPos; ++inPos)
|
||||||
|
{
|
||||||
|
if (pred(stencilIter[inPos]))
|
||||||
|
{
|
||||||
|
outIter[outPos++] = inIter[inPos];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this->EndIds[chunk] = outPos;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename OutIterT>
|
||||||
|
vtkm::Id Reduce(OutIterT data)
|
||||||
|
{
|
||||||
|
vtkm::Id endPos = this->EndIds.front();
|
||||||
|
for (vtkm::Id i = 1; i < this->NumChunks; ++i)
|
||||||
|
{
|
||||||
|
vtkm::Id chunkStart = std::min(i * this->ChunkSize, this->NumValues);
|
||||||
|
vtkm::Id chunkEnd = this->EndIds[i];
|
||||||
|
vtkm::Id numValuesToCopy = chunkEnd - chunkStart;
|
||||||
|
if (numValuesToCopy > 0 && chunkStart != endPos)
|
||||||
|
{
|
||||||
|
std::copy(data + chunkStart, data + chunkEnd, data + endPos);
|
||||||
|
}
|
||||||
|
endPos += numValuesToCopy;
|
||||||
|
}
|
||||||
|
return endPos;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||||
|
// OpenMP only declares reduction operations for primitive types. This utility
|
||||||
|
// detects if a type T is supported.
|
||||||
|
template <typename T>
|
||||||
|
struct OpenMPReductionSupported : std::false_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<Int8> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<UInt8> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<Int16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<UInt16> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<Int32> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<UInt32> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<Int64> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<UInt64> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<Float32> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct OpenMPReductionSupported<Float64> : std::true_type
|
||||||
|
{
|
||||||
|
};
|
||||||
|
#else
|
||||||
|
template <typename T>
|
||||||
|
using OpenMPReductionSupported = std::false_type;
|
||||||
|
#endif // VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||||
|
|
||||||
|
struct ReduceHelper
|
||||||
|
{
|
||||||
|
// Generic implementation:
|
||||||
|
template <typename PortalT, typename ReturnType, typename Functor>
|
||||||
|
static ReturnType Execute(PortalT portal, ReturnType init, Functor functorIn, std::false_type)
|
||||||
|
{
|
||||||
|
internal::WrappedBinaryOperator<ReturnType, Functor> f(functorIn);
|
||||||
|
|
||||||
|
const vtkm::Id numVals = portal.GetNumberOfValues();
|
||||||
|
auto data = vtkm::cont::ArrayPortalToIteratorBegin(portal);
|
||||||
|
|
||||||
|
bool doParallel = false;
|
||||||
|
std::vector<ReturnType> threadData;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(f)
|
||||||
|
shared(data, threadData, doParallel))
|
||||||
|
{
|
||||||
|
|
||||||
|
int tid = omp_get_thread_num();
|
||||||
|
int numThreads = omp_get_num_threads();
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(single)
|
||||||
|
{
|
||||||
|
if (numVals >= numThreads * 2)
|
||||||
|
{
|
||||||
|
doParallel = true;
|
||||||
|
threadData.resize(numThreads);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (doParallel)
|
||||||
|
{
|
||||||
|
// Use the first (numThreads*2) values for initializing:
|
||||||
|
ReturnType accum;
|
||||||
|
accum = f(data[2 * tid], data[2 * tid + 1]);
|
||||||
|
|
||||||
|
// Assign each thread chunks of the remaining values for local reduction
|
||||||
|
VTKM_OPENMP_DIRECTIVE(for schedule(static))
|
||||||
|
for (vtkm::Id i = numThreads * 2; i < numVals; i++)
|
||||||
|
{
|
||||||
|
accum = f(accum, data[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
threadData[tid] = accum;
|
||||||
|
}
|
||||||
|
} // end parallel
|
||||||
|
|
||||||
|
if (doParallel)
|
||||||
|
{
|
||||||
|
// do the final reduction serially:
|
||||||
|
for (size_t i = 0; i < threadData.size(); ++i)
|
||||||
|
{
|
||||||
|
init = f(init, threadData[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Not enough threads. Do the entire reduction in serial:
|
||||||
|
for (vtkm::Id i = 0; i < numVals; ++i)
|
||||||
|
{
|
||||||
|
init = f(init, data[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return init;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||||
|
|
||||||
|
// Specialize for vtkm functors with OpenMP special cases:
|
||||||
|
#define VTKM_OPENMP_SPECIALIZE_REDUCE1(FunctorType, PragmaString) \
|
||||||
|
template <typename PortalT, typename ReturnType> \
|
||||||
|
static ReturnType Execute( \
|
||||||
|
PortalT portal, ReturnType value, FunctorType functorIn, std::true_type) \
|
||||||
|
{ \
|
||||||
|
const vtkm::Id numValues = portal.GetNumberOfValues(); \
|
||||||
|
internal::WrappedBinaryOperator<ReturnType, FunctorType> f(functorIn); \
|
||||||
|
_Pragma(#PragmaString) for (vtkm::Id i = 0; i < numValues; ++i) \
|
||||||
|
{ \
|
||||||
|
value = f(value, portal.Get(i)); \
|
||||||
|
} \
|
||||||
|
return value; \
|
||||||
|
}
|
||||||
|
|
||||||
|
// Constructing the pragma string inside the _Pragma call doesn't work so
|
||||||
|
// we jump through a hoop:
|
||||||
|
#define VTKM_OPENMP_SPECIALIZE_REDUCE(FunctorType, Operator) \
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE1(FunctorType, "omp parallel for reduction(" #Operator ":value)")
|
||||||
|
|
||||||
|
// + (Add, Sum)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Add, +)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Sum, +)
|
||||||
|
// * (Multiply, Product)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Multiply, *)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Product, *)
|
||||||
|
// - (Subtract)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Subtract, -)
|
||||||
|
// & (BitwiseAnd)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseAnd, &)
|
||||||
|
// | (BitwiseOr)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseOr, |)
|
||||||
|
// ^ (BitwiseXor)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseXor, ^)
|
||||||
|
// && (LogicalAnd)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::LogicalAnd, &&)
|
||||||
|
// || (LogicalOr)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::LogicalOr, ||)
|
||||||
|
// min (Minimum)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Minimum, min)
|
||||||
|
// max (Maximum)
|
||||||
|
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Maximum, max)
|
||||||
|
|
||||||
|
#undef VTKM_OPENMP_SPECIALIZE_REDUCE
|
||||||
|
#undef VTKM_OPENMP_SPECIALIZE_REDUCE1
|
||||||
|
|
||||||
|
#endif // VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename KeysInArray,
|
||||||
|
typename ValuesInArray,
|
||||||
|
typename KeysOutArray,
|
||||||
|
typename ValuesOutArray,
|
||||||
|
typename BinaryFunctor>
|
||||||
|
void ReduceByKeyHelper(KeysInArray keysInArray,
|
||||||
|
ValuesInArray valuesInArray,
|
||||||
|
KeysOutArray keysOutArray,
|
||||||
|
ValuesOutArray valuesOutArray,
|
||||||
|
BinaryFunctor functor)
|
||||||
|
{
|
||||||
|
using KeyType = typename KeysInArray::ValueType;
|
||||||
|
using ValueType = typename ValuesInArray::ValueType;
|
||||||
|
|
||||||
|
const vtkm::Id numValues = keysInArray.GetNumberOfValues();
|
||||||
|
auto keysInPortal = keysInArray.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto valuesInPortal = valuesInArray.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto keysIn = vtkm::cont::ArrayPortalToIteratorBegin(keysInPortal);
|
||||||
|
auto valuesIn = vtkm::cont::ArrayPortalToIteratorBegin(valuesInPortal);
|
||||||
|
|
||||||
|
auto keysOutPortal = keysOutArray.PrepareForOutput(numValues, DeviceAdapterTagOpenMP());
|
||||||
|
auto valuesOutPortal = valuesOutArray.PrepareForOutput(numValues, DeviceAdapterTagOpenMP());
|
||||||
|
auto keysOut = vtkm::cont::ArrayPortalToIteratorBegin(keysOutPortal);
|
||||||
|
auto valuesOut = vtkm::cont::ArrayPortalToIteratorBegin(valuesOutPortal);
|
||||||
|
|
||||||
|
internal::WrappedBinaryOperator<ValueType, BinaryFunctor> f(functor);
|
||||||
|
vtkm::Id outIdx = 0;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(keysIn, valuesIn, keysOut, valuesOut, f)
|
||||||
|
shared(outIdx))
|
||||||
|
{
|
||||||
|
int tid = omp_get_thread_num();
|
||||||
|
int numThreads = omp_get_num_threads();
|
||||||
|
|
||||||
|
// Determine bounds for this thread's scan operation:
|
||||||
|
vtkm::Id chunkSize = (numValues + numThreads - 1) / numThreads;
|
||||||
|
vtkm::Id scanIdx = std::min(tid * chunkSize, numValues);
|
||||||
|
vtkm::Id scanEnd = std::min(scanIdx + chunkSize, numValues);
|
||||||
|
|
||||||
|
auto threadKeysBegin = keysOut + scanIdx;
|
||||||
|
auto threadValuesBegin = valuesOut + scanIdx;
|
||||||
|
auto threadKey = threadKeysBegin;
|
||||||
|
auto threadValue = threadValuesBegin;
|
||||||
|
|
||||||
|
// Reduce each thread's partition:
|
||||||
|
KeyType rangeKey;
|
||||||
|
ValueType rangeValue;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (scanIdx < scanEnd)
|
||||||
|
{
|
||||||
|
rangeKey = keysIn[scanIdx];
|
||||||
|
rangeValue = valuesIn[scanIdx];
|
||||||
|
++scanIdx;
|
||||||
|
|
||||||
|
// Locate end of current range:
|
||||||
|
while (scanIdx < scanEnd && static_cast<KeyType>(keysIn[scanIdx]) == rangeKey)
|
||||||
|
{
|
||||||
|
rangeValue = f(rangeValue, valuesIn[scanIdx]);
|
||||||
|
++scanIdx;
|
||||||
|
}
|
||||||
|
|
||||||
|
*threadKey = rangeKey;
|
||||||
|
*threadValue = rangeValue;
|
||||||
|
++threadKey;
|
||||||
|
++threadValue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tid == 0)
|
||||||
|
{
|
||||||
|
outIdx = static_cast<vtkm::Id>(threadKey - threadKeysBegin);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Combine the reduction results. Skip tid == 0, since it's already in
|
||||||
|
// the correct location:
|
||||||
|
for (int i = 1; i < numThreads; ++i)
|
||||||
|
{
|
||||||
|
|
||||||
|
// This barrier ensures that:
|
||||||
|
// 1) Threads remain synchronized through this final reduction loop.
|
||||||
|
// 2) The outIdx variable is initialized by thread 0.
|
||||||
|
// 3) All threads have reduced their partitions.
|
||||||
|
VTKM_OPENMP_DIRECTIVE(barrier)
|
||||||
|
|
||||||
|
if (tid == i)
|
||||||
|
{
|
||||||
|
// Check if the previous thread's last key matches our first:
|
||||||
|
if (outIdx > 0 && threadKeysBegin < threadKey && keysOut[outIdx - 1] == *threadKeysBegin)
|
||||||
|
{
|
||||||
|
valuesOut[outIdx - 1] = f(valuesOut[outIdx - 1], *threadValuesBegin);
|
||||||
|
++threadKeysBegin;
|
||||||
|
++threadValuesBegin;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy reduced partition to final location (if needed)
|
||||||
|
if (threadKeysBegin < threadKey && threadKeysBegin != keysOut + outIdx)
|
||||||
|
{
|
||||||
|
std::copy(threadKeysBegin, threadKey, keysOut + outIdx);
|
||||||
|
std::copy(threadValuesBegin, threadValue, valuesOut + outIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
outIdx += static_cast<vtkm::Id>(threadKey - threadKeysBegin);
|
||||||
|
|
||||||
|
} // end tid == i
|
||||||
|
} // end combine reduction
|
||||||
|
} // end parallel
|
||||||
|
|
||||||
|
keysOutArray.Shrink(outIdx);
|
||||||
|
valuesOutArray.Shrink(outIdx);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename IterT, typename RawPredicateT>
|
||||||
|
struct UniqueHelper
|
||||||
|
{
|
||||||
|
using ValueType = typename std::iterator_traits<IterT>::value_type;
|
||||||
|
using PredicateT = internal::WrappedBinaryOperator<bool, RawPredicateT>;
|
||||||
|
|
||||||
|
struct Node
|
||||||
|
{
|
||||||
|
vtkm::Id2 InputRange{ -1, -1 };
|
||||||
|
vtkm::Id2 OutputRange{ -1, -1 };
|
||||||
|
|
||||||
|
// Pad the node out to the size of a cache line to prevent false sharing:
|
||||||
|
static constexpr size_t DataSize = 2 * sizeof(vtkm::Id2);
|
||||||
|
static constexpr size_t NumCacheLines = CeilDivide<size_t>(DataSize, CACHE_LINE_SIZE);
|
||||||
|
static constexpr size_t PaddingSize = NumCacheLines * CACHE_LINE_SIZE - DataSize;
|
||||||
|
unsigned char Padding[PaddingSize];
|
||||||
|
};
|
||||||
|
|
||||||
|
IterT Data;
|
||||||
|
vtkm::Id NumValues;
|
||||||
|
PredicateT Predicate;
|
||||||
|
vtkm::Id LeafSize;
|
||||||
|
std::vector<Node> Nodes;
|
||||||
|
size_t NextNode;
|
||||||
|
|
||||||
|
UniqueHelper(IterT iter, vtkm::Id numValues, RawPredicateT pred)
|
||||||
|
: Data(iter)
|
||||||
|
, NumValues(numValues)
|
||||||
|
, Predicate(pred)
|
||||||
|
, LeafSize(0)
|
||||||
|
, NextNode(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
vtkm::Id Execute()
|
||||||
|
{
|
||||||
|
vtkm::Id outSize = 0;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
|
||||||
|
{
|
||||||
|
VTKM_OPENMP_DIRECTIVE(single)
|
||||||
|
{
|
||||||
|
this->Prepare();
|
||||||
|
|
||||||
|
// Kick off task-based divide-and-conquer uniquification:
|
||||||
|
Node* rootNode = this->AllocNode();
|
||||||
|
rootNode->InputRange = vtkm::Id2(0, this->NumValues);
|
||||||
|
this->Uniquify(rootNode);
|
||||||
|
outSize = rootNode->OutputRange[1] - rootNode->OutputRange[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return outSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Prepare()
|
||||||
|
{
|
||||||
|
// Figure out how many values each thread should handle:
|
||||||
|
int numThreads = omp_get_num_threads();
|
||||||
|
vtkm::Id chunksPerThread = 8;
|
||||||
|
vtkm::Id numChunks;
|
||||||
|
ComputeChunkSize(
|
||||||
|
this->NumValues, numThreads, chunksPerThread, sizeof(ValueType), numChunks, this->LeafSize);
|
||||||
|
|
||||||
|
// Compute an upper-bound of the number of nodes in the tree:
|
||||||
|
size_t numNodes = numChunks;
|
||||||
|
while (numChunks > 1)
|
||||||
|
{
|
||||||
|
numChunks = (numChunks + 1) / 2;
|
||||||
|
numNodes += numChunks;
|
||||||
|
}
|
||||||
|
this->Nodes.resize(numNodes);
|
||||||
|
this->NextNode = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node* AllocNode()
|
||||||
|
{
|
||||||
|
size_t nodeIdx;
|
||||||
|
|
||||||
|
// GCC emits a false positive "value computed but not used" for this block:
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-value"
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(atomic capture)
|
||||||
|
{
|
||||||
|
nodeIdx = this->NextNode;
|
||||||
|
++this->NextNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
VTKM_ASSERT(nodeIdx < this->Nodes.size());
|
||||||
|
|
||||||
|
return &this->Nodes[nodeIdx];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsLeaf(const vtkm::Id2& range) { return (range[1] - range[0]) <= this->LeafSize; }
|
||||||
|
|
||||||
|
// Not an strict midpoint, but ensures that the first range will always be
|
||||||
|
// a multiple of the leaf size.
|
||||||
|
vtkm::Id ComputeMidpoint(const vtkm::Id2& range)
|
||||||
|
{
|
||||||
|
const vtkm::Id n = range[1] - range[0];
|
||||||
|
const vtkm::Id np = this->LeafSize;
|
||||||
|
|
||||||
|
return CeilDivide(n / 2, np) * np + range[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void Uniquify(Node* node)
|
||||||
|
{
|
||||||
|
if (!this->IsLeaf(node->InputRange))
|
||||||
|
{
|
||||||
|
vtkm::Id midpoint = this->ComputeMidpoint(node->InputRange);
|
||||||
|
|
||||||
|
Node* right = this->AllocNode();
|
||||||
|
Node* left = this->AllocNode();
|
||||||
|
|
||||||
|
right->InputRange = vtkm::Id2(midpoint, node->InputRange[1]);
|
||||||
|
|
||||||
|
// Intel compilers seem to have trouble following the 'this' pointer
|
||||||
|
// when launching tasks, resulting in a corrupt task environment.
|
||||||
|
// Explicitly copying the pointer into a local variable seems to fix this.
|
||||||
|
auto explicitThis = this;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(taskgroup)
|
||||||
|
{
|
||||||
|
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->Uniquify(right); }
|
||||||
|
|
||||||
|
left->InputRange = vtkm::Id2(node->InputRange[0], midpoint);
|
||||||
|
this->Uniquify(left);
|
||||||
|
|
||||||
|
} // end taskgroup. Both sides of the tree will be completed here.
|
||||||
|
|
||||||
|
// Combine the ranges in the left side:
|
||||||
|
if (this->Predicate(this->Data[left->OutputRange[1] - 1], this->Data[right->OutputRange[0]]))
|
||||||
|
{
|
||||||
|
++right->OutputRange[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
vtkm::Id numVals = right->OutputRange[1] - right->OutputRange[0];
|
||||||
|
DoCopy(this->Data + right->OutputRange[0], this->Data + left->OutputRange[1], numVals);
|
||||||
|
|
||||||
|
node->OutputRange[0] = left->OutputRange[0];
|
||||||
|
node->OutputRange[1] = left->OutputRange[1] + numVals;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto start = this->Data + node->InputRange[0];
|
||||||
|
auto end = this->Data + node->InputRange[1];
|
||||||
|
end = std::unique(start, end, this->Predicate);
|
||||||
|
node->OutputRange[0] = node->InputRange[0];
|
||||||
|
node->OutputRange[1] = node->InputRange[0] + static_cast<vtkm::Id>(end - start);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end namespace vtkm::cont::openmp
|
||||||
|
|
||||||
|
#endif // vtk_m_cont_openmp_internal_FunctorsOpenMP_h
|
271
vtkm/cont/openmp/internal/ParallelQuickSortOpenMP.h
Normal file
271
vtkm/cont/openmp/internal/ParallelQuickSortOpenMP.h
Normal file
@ -0,0 +1,271 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||||
|
|
||||||
|
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
||||||
|
|
||||||
|
#include <vtkm/Types.h>
|
||||||
|
#include <vtkm/cont/ArrayHandle.h>
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace openmp
|
||||||
|
{
|
||||||
|
namespace sort
|
||||||
|
{
|
||||||
|
namespace quick
|
||||||
|
{
|
||||||
|
|
||||||
|
template <typename IterType, typename RawBinaryCompare>
|
||||||
|
struct QuickSorter
|
||||||
|
{
|
||||||
|
using BinaryCompare = vtkm::cont::internal::WrappedBinaryOperator<bool, RawBinaryCompare>;
|
||||||
|
using ValueType = typename std::iterator_traits<IterType>::value_type;
|
||||||
|
|
||||||
|
IterType Data;
|
||||||
|
BinaryCompare Compare;
|
||||||
|
vtkm::Id SerialSize;
|
||||||
|
|
||||||
|
QuickSorter(IterType iter, RawBinaryCompare comp)
|
||||||
|
: Data(iter)
|
||||||
|
, Compare(comp)
|
||||||
|
, SerialSize(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void Execute(const vtkm::Id2 range)
|
||||||
|
{
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
|
||||||
|
{
|
||||||
|
VTKM_OPENMP_DIRECTIVE(single)
|
||||||
|
{
|
||||||
|
this->Prepare(range);
|
||||||
|
this->Sort(range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void Prepare(const vtkm::Id2 /*range*/)
|
||||||
|
{
|
||||||
|
// Rough benchmarking on an 4-core+4HT processor shows that this sort is
|
||||||
|
// most efficient (within 5% of TBB sort) when we switch to a serial
|
||||||
|
// implementation once a partition is less than 32K keys
|
||||||
|
this->SerialSize = 32768;
|
||||||
|
}
|
||||||
|
|
||||||
|
vtkm::Pair<vtkm::Id, ValueType> MedianOf3(const vtkm::Pair<vtkm::Id, ValueType>& v1,
|
||||||
|
const vtkm::Pair<vtkm::Id, ValueType>& v2,
|
||||||
|
const vtkm::Pair<vtkm::Id, ValueType>& v3) const
|
||||||
|
{
|
||||||
|
if (this->Compare(v1.second, v2.second))
|
||||||
|
{ // v1 < v2
|
||||||
|
if (this->Compare(v1.second, v3.second))
|
||||||
|
{ // v1 < v3
|
||||||
|
if (this->Compare(v2.second, v3.second))
|
||||||
|
{ // v1 < v2 < v3
|
||||||
|
return v2;
|
||||||
|
}
|
||||||
|
else // v3 < v2
|
||||||
|
{ // v1 < v3 < v2
|
||||||
|
return v3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else // v3 < v1
|
||||||
|
{ // v3 < v1 < v2
|
||||||
|
return v1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // v2 < v1
|
||||||
|
if (this->Compare(v2.second, v3.second))
|
||||||
|
{ // v2 < v3
|
||||||
|
if (this->Compare(v1.second, v3.second))
|
||||||
|
{ // v2 < v1 < v3
|
||||||
|
return v1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // v2 < v3 < v1
|
||||||
|
return v3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // v3 < v2 < v1
|
||||||
|
return v2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
vtkm::Pair<vtkm::Id, ValueType> MedianOf3(const vtkm::Id ids[3]) const
|
||||||
|
{
|
||||||
|
return this->MedianOf3(vtkm::Pair<vtkm::Id, ValueType>(ids[0], this->Data[ids[0]]),
|
||||||
|
vtkm::Pair<vtkm::Id, ValueType>(ids[1], this->Data[ids[1]]),
|
||||||
|
vtkm::Pair<vtkm::Id, ValueType>(ids[2], this->Data[ids[2]]));
|
||||||
|
}
|
||||||
|
|
||||||
|
vtkm::Pair<vtkm::Id, ValueType> PseudoMedianOf9(const vtkm::Id ids[9]) const
|
||||||
|
{
|
||||||
|
return this->MedianOf3(
|
||||||
|
this->MedianOf3(ids), this->MedianOf3(ids + 3), this->MedianOf3(ids + 6));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Approximate the median of the range and return its index.
|
||||||
|
vtkm::Pair<vtkm::Id, ValueType> SelectPivot(const vtkm::Id2 range) const
|
||||||
|
{
|
||||||
|
const vtkm::Id numVals = range[1] - range[0];
|
||||||
|
assert(numVals >= 9);
|
||||||
|
|
||||||
|
// Pseudorandomize the pivot locations to avoid issues with periodic data
|
||||||
|
// (evenly sampling inputs with periodic values tends to cause the same
|
||||||
|
// value to be obtained for all samples)
|
||||||
|
const vtkm::Id seed = range[0] * 3 / 2 + range[1] * 11 / 3 + numVals * 10 / 7;
|
||||||
|
const vtkm::Id delta = (numVals / 9) * 4 / 3;
|
||||||
|
|
||||||
|
vtkm::Id sampleLocations[9] = {
|
||||||
|
range[0] + ((seed + 0 * delta) % numVals), range[0] + ((seed + 1 * delta) % numVals),
|
||||||
|
range[0] + ((seed + 2 * delta) % numVals), range[0] + ((seed + 3 * delta) % numVals),
|
||||||
|
range[0] + ((seed + 4 * delta) % numVals), range[0] + ((seed + 5 * delta) % numVals),
|
||||||
|
range[0] + ((seed + 6 * delta) % numVals), range[0] + ((seed + 7 * delta) % numVals),
|
||||||
|
range[0] + ((seed + 8 * delta) % numVals)
|
||||||
|
};
|
||||||
|
|
||||||
|
return this->PseudoMedianOf9(sampleLocations);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Select a pivot and partition data with it, returning the final location of
|
||||||
|
// the pivot element(s). We use Bentley-McIlroy three-way partitioning to
|
||||||
|
// improve handling of duplicate keys, so the pivot "location" is actually
|
||||||
|
// a range of identical keys, hence the vtkm::Id2 return type, which mark
|
||||||
|
// the [begin, end) of the pivot range.
|
||||||
|
vtkm::Id2 PartitionData(const vtkm::Id2 range)
|
||||||
|
{
|
||||||
|
using namespace std; // For ADL swap
|
||||||
|
|
||||||
|
const vtkm::Pair<vtkm::Id, ValueType> pivotData = this->SelectPivot(range);
|
||||||
|
const vtkm::Id& origPivotIdx = pivotData.first;
|
||||||
|
const ValueType& pivotVal = pivotData.second;
|
||||||
|
|
||||||
|
// Move the pivot to the end of the block while we partition the rest:
|
||||||
|
swap(this->Data[origPivotIdx], this->Data[range[1] - 1]);
|
||||||
|
|
||||||
|
// Indices of the last partitioned keys:
|
||||||
|
vtkm::Id2 dataCursors(range[0] - 1, range[1] - 1);
|
||||||
|
|
||||||
|
// Indices of the start/end of the keys equal to the pivot:
|
||||||
|
vtkm::Id2 pivotCursors(dataCursors);
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
// Advance the data cursors past all keys that are already partitioned:
|
||||||
|
while (this->Compare(this->Data[++dataCursors[0]], pivotVal))
|
||||||
|
;
|
||||||
|
while (this->Compare(pivotVal, this->Data[--dataCursors[1]]) && dataCursors[1] > range[0])
|
||||||
|
;
|
||||||
|
|
||||||
|
// Range is partitioned the cursors have crossed:
|
||||||
|
if (dataCursors[0] >= dataCursors[1])
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Both dataCursors are pointing at incorrectly partitioned keys. Swap
|
||||||
|
// them to place them in the proper partitions:
|
||||||
|
swap(this->Data[dataCursors[0]], this->Data[dataCursors[1]]);
|
||||||
|
|
||||||
|
// If the elements we just swapped are actually equivalent to the pivot
|
||||||
|
// value, move them to the pivot storage locations:
|
||||||
|
if (!this->Compare(this->Data[dataCursors[0]], pivotVal))
|
||||||
|
{
|
||||||
|
++pivotCursors[0];
|
||||||
|
swap(this->Data[pivotCursors[0]], this->Data[dataCursors[0]]);
|
||||||
|
}
|
||||||
|
if (!this->Compare(pivotVal, this->Data[dataCursors[1]]))
|
||||||
|
{
|
||||||
|
--pivotCursors[1];
|
||||||
|
swap(this->Data[pivotCursors[1]], this->Data[dataCursors[1]]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Data is now partitioned as:
|
||||||
|
// | Equal | Less | Greater | Equal |
|
||||||
|
// Move the equal keys to the middle for the final partitioning:
|
||||||
|
// | Less | Equal | Greater |
|
||||||
|
// First the original pivot value at the end:
|
||||||
|
swap(this->Data[range[1] - 1], this->Data[dataCursors[0]]);
|
||||||
|
|
||||||
|
// Update the cursors to either side of the pivot:
|
||||||
|
dataCursors = vtkm::Id2(dataCursors[0] - 1, dataCursors[0] + 1);
|
||||||
|
|
||||||
|
for (vtkm::Id i = range[0]; i < pivotCursors[0]; ++i, --dataCursors[0])
|
||||||
|
{
|
||||||
|
swap(this->Data[i], this->Data[dataCursors[0]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (vtkm::Id i = range[1] - 2; i > pivotCursors[1]; --i, ++dataCursors[1])
|
||||||
|
{
|
||||||
|
swap(this->Data[i], this->Data[dataCursors[1]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adjust the cursor so we can use them to construct the regions for the
|
||||||
|
// recursive call:
|
||||||
|
++dataCursors[0];
|
||||||
|
|
||||||
|
return dataCursors;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Sort(const vtkm::Id2 range)
|
||||||
|
{
|
||||||
|
const vtkm::Id numVals = range[1] - range[0];
|
||||||
|
if (numVals <= this->SerialSize)
|
||||||
|
{
|
||||||
|
std::sort(this->Data + range[0], this->Data + range[1], this->Compare);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const vtkm::Id2 pivots = this->PartitionData(range);
|
||||||
|
const vtkm::Id2 lhRange = vtkm::Id2(range[0], pivots[0]);
|
||||||
|
const vtkm::Id2 rhRange = vtkm::Id2(pivots[1], range[1]);
|
||||||
|
|
||||||
|
// Intel compilers seem to have trouble following the 'this' pointer
|
||||||
|
// when launching tasks, resulting in a corrupt task environment.
|
||||||
|
// Explicitly copying the pointer into a local variable seems to fix this.
|
||||||
|
auto explicitThis = this;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(task default(none) firstprivate(rhRange, explicitThis))
|
||||||
|
{
|
||||||
|
explicitThis->Sort(rhRange);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->Sort(lhRange);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
} // end namespace sort::quick
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end namespace vtkm::cont::openmp
|
88
vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx
Normal file
88
vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#include <vtkm/cont/internal/ParallelRadixSort.h>
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace openmp
|
||||||
|
{
|
||||||
|
namespace sort
|
||||||
|
{
|
||||||
|
namespace radix
|
||||||
|
{
|
||||||
|
|
||||||
|
struct RadixThreaderOpenMP
|
||||||
|
{
|
||||||
|
size_t GetAvailableCores() const
|
||||||
|
{
|
||||||
|
size_t result;
|
||||||
|
if (omp_in_parallel())
|
||||||
|
{
|
||||||
|
result = static_cast<size_t>(omp_get_num_threads());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
result = static_cast<size_t>(omp_get_num_threads());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TaskType>
|
||||||
|
void RunParentTask(TaskType task)
|
||||||
|
{
|
||||||
|
assert(!omp_in_parallel());
|
||||||
|
#pragma omp parallel default(none) shared(task)
|
||||||
|
{
|
||||||
|
#pragma omp single
|
||||||
|
{
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
} // Implied barrier ensures that child tasks will finish.
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename TaskType, typename ThreadData>
|
||||||
|
void RunChildTasks(ThreadData, TaskType left, TaskType right)
|
||||||
|
{
|
||||||
|
assert(omp_in_parallel());
|
||||||
|
#pragma omp task default(none) firstprivate(right)
|
||||||
|
{
|
||||||
|
right();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute the left task in the existing thread.
|
||||||
|
left();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
VTKM_INSTANTIATE_RADIX_SORT_FOR_THREADER(RadixThreaderOpenMP)
|
||||||
|
}
|
||||||
|
} // end namespace sort::radix
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end namespace vtkm::cont::openmp
|
44
vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.h
Normal file
44
vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.h
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#ifndef vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h
|
||||||
|
#define vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h
|
||||||
|
|
||||||
|
#include <vtkm/cont/internal/ParallelRadixSortInterface.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace openmp
|
||||||
|
{
|
||||||
|
namespace sort
|
||||||
|
{
|
||||||
|
namespace radix
|
||||||
|
{
|
||||||
|
|
||||||
|
VTKM_DECLARE_RADIX_SORT()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end namespace vtkm::cont::openmp::sort::radix
|
||||||
|
|
||||||
|
#endif // vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h
|
489
vtkm/cont/openmp/internal/ParallelScanOpenMP.h
Normal file
489
vtkm/cont/openmp/internal/ParallelScanOpenMP.h
Normal file
@ -0,0 +1,489 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||||
|
|
||||||
|
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
||||||
|
|
||||||
|
#include <vtkm/Types.h>
|
||||||
|
#include <vtkm/cont/ArrayHandle.h>
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace openmp
|
||||||
|
{
|
||||||
|
namespace scan
|
||||||
|
{
|
||||||
|
|
||||||
|
enum class ChildType
|
||||||
|
{
|
||||||
|
Left,
|
||||||
|
Right
|
||||||
|
};
|
||||||
|
|
||||||
|
// Generic implementation of modified Ladner & Fischer 1977 "adder" algorithm
|
||||||
|
// used for backbone of exclusive/inclusive scans. Language in comments is
|
||||||
|
// specific to computing a sum, but the implementation should be generic enough
|
||||||
|
// for any scan operation.
|
||||||
|
//
|
||||||
|
// The basic idea is that a tree structure is used to partition the input into
|
||||||
|
// sets of LeafSize. Each leaf of the tree is processed in two stages: First,
|
||||||
|
// the sum of each leaf is computed, and this information is pushed up the tree
|
||||||
|
// to compute the sum of each node's child leaves. Then the partial sum at the
|
||||||
|
// start of each node is computed and pushed down the tree (the "carry"
|
||||||
|
// values). In the second pass through each leaf's data, these partial sums are
|
||||||
|
// used to compute the final output from the carry value and the input data.
|
||||||
|
//
|
||||||
|
// The passes will likely overlap due to the "leftEdge" optimizations, which
|
||||||
|
// allow each leaf to start the second pass as soon as the first pass of all
|
||||||
|
// previous leaves is completed. Additionally, the first leaf in the data will
|
||||||
|
// combine both passes into one, computing the final output data while
|
||||||
|
// generating its sum for the communication stage.
|
||||||
|
template <typename ScanBody>
|
||||||
|
struct Adder : public ScanBody
|
||||||
|
{
|
||||||
|
template <typename NodeImpl>
|
||||||
|
struct NodeWrapper : public NodeImpl
|
||||||
|
{
|
||||||
|
// Range of IDs this node represents
|
||||||
|
vtkm::Id2 Range{ -1, -1 };
|
||||||
|
|
||||||
|
// Connections:
|
||||||
|
NodeWrapper* Parent{ nullptr };
|
||||||
|
NodeWrapper* Left{ nullptr };
|
||||||
|
NodeWrapper* Right{ nullptr };
|
||||||
|
|
||||||
|
// Special flag to mark nodes on the far left edge of the tree. This allows
|
||||||
|
// various optimization that start the second pass sooner on some ranges.
|
||||||
|
bool LeftEdge{ false };
|
||||||
|
|
||||||
|
// Pad the node out to the size of a cache line to prevent false sharing:
|
||||||
|
static constexpr size_t DataSize =
|
||||||
|
sizeof(NodeImpl) + sizeof(vtkm::Id2) + 3 * sizeof(NodeWrapper*) + sizeof(bool);
|
||||||
|
static constexpr size_t NumCacheLines = CeilDivide<size_t>(DataSize, CACHE_LINE_SIZE);
|
||||||
|
static constexpr size_t PaddingSize = NumCacheLines * CACHE_LINE_SIZE - DataSize;
|
||||||
|
unsigned char Padding[PaddingSize];
|
||||||
|
};
|
||||||
|
|
||||||
|
using Node = NodeWrapper<typename ScanBody::Node>;
|
||||||
|
using ValueType = typename ScanBody::ValueType;
|
||||||
|
|
||||||
|
vtkm::Id LeafSize;
|
||||||
|
std::vector<Node> Nodes;
|
||||||
|
size_t NextNode;
|
||||||
|
|
||||||
|
// Use ScanBody's ctor:
|
||||||
|
using ScanBody::ScanBody;
|
||||||
|
|
||||||
|
// Returns the total array sum:
|
||||||
|
ValueType Execute(const vtkm::Id2& range)
|
||||||
|
{
|
||||||
|
Node* rootNode = nullptr;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
|
||||||
|
{
|
||||||
|
VTKM_OPENMP_DIRECTIVE(single)
|
||||||
|
{
|
||||||
|
// Allocate nodes, prep metadata:
|
||||||
|
this->Prepare(range);
|
||||||
|
|
||||||
|
// Compute the partition and node sums:
|
||||||
|
rootNode = this->AllocNode();
|
||||||
|
rootNode->Range = range;
|
||||||
|
rootNode->LeftEdge = true;
|
||||||
|
ScanBody::InitializeRootNode(rootNode);
|
||||||
|
|
||||||
|
this->Scan(rootNode);
|
||||||
|
} // end single
|
||||||
|
} // end parallel
|
||||||
|
|
||||||
|
return rootNode ? ScanBody::GetFinalResult(rootNode)
|
||||||
|
: vtkm::TypeTraits<ValueType>::ZeroInitialization();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Returns the next available node in a thread-safe manner.
|
||||||
|
Node* AllocNode()
|
||||||
|
{
|
||||||
|
size_t nodeIdx;
|
||||||
|
|
||||||
|
// GCC emits a false positive "value computed but not used" for this block:
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wunused-value"
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(atomic capture)
|
||||||
|
{
|
||||||
|
nodeIdx = this->NextNode;
|
||||||
|
++this->NextNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
|
||||||
|
VTKM_ASSERT(nodeIdx < this->Nodes.size());
|
||||||
|
|
||||||
|
return &this->Nodes[nodeIdx];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Does the range represent a leave node?
|
||||||
|
bool IsLeaf(const vtkm::Id2& range) const { return (range[1] - range[0]) <= this->LeafSize; }
|
||||||
|
|
||||||
|
// Use to split ranges. Ensures that the first range is always a multiple of
|
||||||
|
// LeafSize, when possible.
|
||||||
|
vtkm::Id ComputeMidpoint(const vtkm::Id2& range) const
|
||||||
|
{
|
||||||
|
const vtkm::Id n = range[1] - range[0];
|
||||||
|
const vtkm::Id np = this->LeafSize;
|
||||||
|
|
||||||
|
return (((n / 2) + (np - 1)) / np) * np + range[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
void Prepare(const vtkm::Id2& range)
|
||||||
|
{
|
||||||
|
// Figure out how many values each thread should handle:
|
||||||
|
vtkm::Id numVals = range[1] - range[0];
|
||||||
|
int numThreads = omp_get_num_threads();
|
||||||
|
vtkm::Id chunksPerThread = 8;
|
||||||
|
vtkm::Id numChunks;
|
||||||
|
ComputeChunkSize(
|
||||||
|
numVals, numThreads, chunksPerThread, sizeof(ValueType), numChunks, this->LeafSize);
|
||||||
|
|
||||||
|
// Compute an upper-bound of the number of nodes in the tree:
|
||||||
|
size_t numNodes = numChunks;
|
||||||
|
while (numChunks > 1)
|
||||||
|
{
|
||||||
|
numChunks = (numChunks + 1) / 2;
|
||||||
|
numNodes += numChunks;
|
||||||
|
}
|
||||||
|
this->Nodes.resize(numNodes);
|
||||||
|
this->NextNode = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build the tree and compute the sums:
|
||||||
|
void Scan(Node* node)
|
||||||
|
{
|
||||||
|
if (!this->IsLeaf(node->Range))
|
||||||
|
{ // split range:
|
||||||
|
vtkm::Id midpoint = this->ComputeMidpoint(node->Range);
|
||||||
|
|
||||||
|
Node* right = this->AllocNode();
|
||||||
|
right->Parent = node;
|
||||||
|
node->Right = right;
|
||||||
|
right->Range = vtkm::Id2(midpoint, node->Range[1]);
|
||||||
|
ScanBody::InitializeChildNode(right, node, ChildType::Right, false);
|
||||||
|
|
||||||
|
// Intel compilers seem to have trouble following the 'this' pointer
|
||||||
|
// when launching tasks, resulting in a corrupt task environment.
|
||||||
|
// Explicitly copying the pointer into a local variable seems to fix this.
|
||||||
|
auto explicitThis = this;
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(taskgroup)
|
||||||
|
{
|
||||||
|
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->Scan(right); } // end right task
|
||||||
|
|
||||||
|
Node* left = this->AllocNode();
|
||||||
|
left->Parent = node;
|
||||||
|
node->Left = left;
|
||||||
|
left->Range = vtkm::Id2(node->Range[0], midpoint);
|
||||||
|
left->LeftEdge = node->LeftEdge;
|
||||||
|
ScanBody::InitializeChildNode(left, node, ChildType::Left, left->LeftEdge);
|
||||||
|
this->Scan(left);
|
||||||
|
|
||||||
|
} // end task group. Both l/r sums will be finished here.
|
||||||
|
|
||||||
|
ScanBody::CombineSummaries(node, node->Left, node->Right);
|
||||||
|
if (node->LeftEdge)
|
||||||
|
{
|
||||||
|
this->UpdateOutput(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // Compute sums:
|
||||||
|
ScanBody::ComputeSummary(node, node->Range, node->LeftEdge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateOutput(Node* node)
|
||||||
|
{
|
||||||
|
if (node->Left != nullptr)
|
||||||
|
{
|
||||||
|
assert(node->Right != nullptr);
|
||||||
|
ScanBody::PropagateSummaries(node, node->Left, node->Right, node->LeftEdge);
|
||||||
|
|
||||||
|
// if this node is on the left edge, we know that the left child's
|
||||||
|
// output is already updated, so only descend to the right:
|
||||||
|
if (node->LeftEdge)
|
||||||
|
{
|
||||||
|
this->UpdateOutput(node->Right);
|
||||||
|
}
|
||||||
|
else // Otherwise descent into both:
|
||||||
|
{
|
||||||
|
// Intel compilers seem to have trouble following the 'this' pointer
|
||||||
|
// when launching tasks, resulting in a corrupt task environment.
|
||||||
|
// Explicitly copying the pointer into a local variable seems to fix
|
||||||
|
// this.
|
||||||
|
auto explicitThis = this;
|
||||||
|
|
||||||
|
// no taskgroup/sync needed other than the final barrier of the parallel
|
||||||
|
// section.
|
||||||
|
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->UpdateOutput(node->Right); } // end task
|
||||||
|
this->UpdateOutput(node->Left);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ScanBody::UpdateOutput(node, node->Range, node->LeftEdge);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename InPortalT, typename OutPortalT, typename RawFunctorT>
|
||||||
|
struct ScanExclusiveBody
|
||||||
|
{
|
||||||
|
using ValueType = typename InPortalT::ValueType;
|
||||||
|
using FunctorType = internal::WrappedBinaryOperator<ValueType, RawFunctorT>;
|
||||||
|
|
||||||
|
InPortalT InPortal;
|
||||||
|
OutPortalT OutPortal;
|
||||||
|
FunctorType Functor;
|
||||||
|
ValueType InitialValue;
|
||||||
|
|
||||||
|
struct Node
|
||||||
|
{
|
||||||
|
// Sum of all values in range
|
||||||
|
ValueType Sum{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
|
||||||
|
|
||||||
|
// The sum of all elements prior to this node's range
|
||||||
|
ValueType Carry{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
|
||||||
|
};
|
||||||
|
|
||||||
|
ScanExclusiveBody(const InPortalT& inPortal,
|
||||||
|
const OutPortalT& outPortal,
|
||||||
|
const RawFunctorT& functor,
|
||||||
|
const ValueType& init)
|
||||||
|
: InPortal(inPortal)
|
||||||
|
, OutPortal(outPortal)
|
||||||
|
, Functor(functor)
|
||||||
|
, InitialValue(init)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the root of the node tree
|
||||||
|
void InitializeRootNode(Node* /*root*/) {}
|
||||||
|
|
||||||
|
void InitializeChildNode(Node* /*node*/,
|
||||||
|
const Node* /*parent*/,
|
||||||
|
ChildType /*type*/,
|
||||||
|
bool /*leftEdge*/)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputeSummary(Node* node, const vtkm::Id2& range, bool leftEdge)
|
||||||
|
{
|
||||||
|
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
|
||||||
|
node->Sum = input[range[0]];
|
||||||
|
|
||||||
|
// If this block is on the left edge, we can update the output while we
|
||||||
|
// compute the sum:
|
||||||
|
if (leftEdge)
|
||||||
|
{
|
||||||
|
// Set leftEdge arg to false to force the update:
|
||||||
|
node->Sum = UpdateOutputImpl(node, range, false, true);
|
||||||
|
}
|
||||||
|
else // Otherwise, only compute the sum and update the output in pass 2.
|
||||||
|
{
|
||||||
|
for (vtkm::Id i = range[0] + 1; i < range[1]; ++i)
|
||||||
|
{
|
||||||
|
node->Sum = this->Functor(node->Sum, input[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CombineSummaries(Node* parent, const Node* left, const Node* right)
|
||||||
|
{
|
||||||
|
parent->Sum = this->Functor(left->Sum, right->Sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PropagateSummaries(const Node* parent, Node* left, Node* right, bool leftEdge)
|
||||||
|
{
|
||||||
|
left->Carry = parent->Carry;
|
||||||
|
right->Carry = leftEdge ? left->Sum : this->Functor(parent->Carry, left->Sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateOutput(const Node* node, const vtkm::Id2& range, bool leftEdge)
|
||||||
|
{
|
||||||
|
this->UpdateOutputImpl(node, range, leftEdge, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
ValueType UpdateOutputImpl(const Node* node, const vtkm::Id2& range, bool skip, bool useInit)
|
||||||
|
{
|
||||||
|
if (skip)
|
||||||
|
{
|
||||||
|
// Do nothing; this was already done in ComputeSummary.
|
||||||
|
return vtkm::TypeTraits<ValueType>::ZeroInitialization();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
|
||||||
|
auto output = vtkm::cont::ArrayPortalToIteratorBegin(this->OutPortal);
|
||||||
|
|
||||||
|
// Be careful with the order input/output are modified. They might be
|
||||||
|
// pointing at the same data:
|
||||||
|
ValueType carry = useInit ? this->InitialValue : node->Carry;
|
||||||
|
vtkm::Id end = range[1];
|
||||||
|
|
||||||
|
for (vtkm::Id i = range[0]; i < end; ++i)
|
||||||
|
{
|
||||||
|
output[i] = this->Functor(carry, input[i]);
|
||||||
|
|
||||||
|
using std::swap; // Enable ADL
|
||||||
|
swap(output[i], carry);
|
||||||
|
}
|
||||||
|
|
||||||
|
return carry;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the final sum from the node's metadata:
|
||||||
|
ValueType GetFinalResult(const Node* node) const { return this->Functor(node->Sum, node->Carry); }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename InPortalT, typename OutPortalT, typename RawFunctorT>
|
||||||
|
struct ScanInclusiveBody
|
||||||
|
{
|
||||||
|
using ValueType = typename InPortalT::ValueType;
|
||||||
|
using FunctorType = internal::WrappedBinaryOperator<ValueType, RawFunctorT>;
|
||||||
|
|
||||||
|
InPortalT InPortal;
|
||||||
|
OutPortalT OutPortal;
|
||||||
|
FunctorType Functor;
|
||||||
|
|
||||||
|
struct Node
|
||||||
|
{
|
||||||
|
// Sum of all values in range
|
||||||
|
ValueType Sum{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
|
||||||
|
|
||||||
|
// The sum of all elements prior to this node's range
|
||||||
|
ValueType Carry{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
|
||||||
|
};
|
||||||
|
|
||||||
|
ScanInclusiveBody(const InPortalT& inPortal,
|
||||||
|
const OutPortalT& outPortal,
|
||||||
|
const RawFunctorT& functor)
|
||||||
|
: InPortal(inPortal)
|
||||||
|
, OutPortal(outPortal)
|
||||||
|
, Functor(functor)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize the root of the node tree
|
||||||
|
void InitializeRootNode(Node*)
|
||||||
|
{
|
||||||
|
// no-op
|
||||||
|
}
|
||||||
|
|
||||||
|
void InitializeChildNode(Node*, const Node*, ChildType, bool)
|
||||||
|
{
|
||||||
|
// no-op
|
||||||
|
}
|
||||||
|
|
||||||
|
void ComputeSummary(Node* node, const vtkm::Id2& range, bool leftEdge)
|
||||||
|
{
|
||||||
|
// If this block is on the left edge, we can update the output while we
|
||||||
|
// compute the sum:
|
||||||
|
if (leftEdge)
|
||||||
|
{
|
||||||
|
node->Sum = UpdateOutputImpl(node, range, false, false);
|
||||||
|
}
|
||||||
|
else // Otherwise, only compute the sum and update the output in pass 2.
|
||||||
|
{
|
||||||
|
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
|
||||||
|
node->Sum = input[range[0]];
|
||||||
|
for (vtkm::Id i = range[0] + 1; i < range[1]; ++i)
|
||||||
|
{
|
||||||
|
node->Sum = this->Functor(node->Sum, input[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void CombineSummaries(Node* parent, const Node* left, const Node* right)
|
||||||
|
{
|
||||||
|
parent->Sum = this->Functor(left->Sum, right->Sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PropagateSummaries(const Node* parent, Node* left, Node* right, bool leftEdge)
|
||||||
|
{
|
||||||
|
left->Carry = parent->Carry;
|
||||||
|
right->Carry = leftEdge ? left->Sum : this->Functor(parent->Carry, left->Sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
void UpdateOutput(const Node* node, const vtkm::Id2& range, bool leftEdge)
|
||||||
|
{
|
||||||
|
UpdateOutputImpl(node, range, leftEdge, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
ValueType UpdateOutputImpl(const Node* node, const vtkm::Id2& range, bool skip, bool useCarry)
|
||||||
|
{
|
||||||
|
if (skip)
|
||||||
|
{
|
||||||
|
// Do nothing; this was already done in ComputeSummary.
|
||||||
|
return vtkm::TypeTraits<ValueType>::ZeroInitialization();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
|
||||||
|
auto output = vtkm::cont::ArrayPortalToIteratorBegin(this->OutPortal);
|
||||||
|
|
||||||
|
vtkm::Id start = range[0];
|
||||||
|
vtkm::Id end = range[1];
|
||||||
|
ValueType carry = node->Carry;
|
||||||
|
|
||||||
|
// Initialize with the first value if this is the first range:
|
||||||
|
if (!useCarry && start < end)
|
||||||
|
{
|
||||||
|
carry = input[start];
|
||||||
|
output[start] = carry;
|
||||||
|
++start;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (vtkm::Id i = start; i < end; ++i)
|
||||||
|
{
|
||||||
|
output[i] = this->Functor(carry, input[i]);
|
||||||
|
carry = output[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return output[end - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute the final sum from the node's metadata:
|
||||||
|
ValueType GetFinalResult(const Node* node) const { return node->Sum; }
|
||||||
|
};
|
||||||
|
|
||||||
|
} // end namespace scan
|
||||||
|
|
||||||
|
template <typename InPortalT, typename OutPortalT, typename FunctorT>
|
||||||
|
using ScanExclusiveHelper = scan::Adder<scan::ScanExclusiveBody<InPortalT, OutPortalT, FunctorT>>;
|
||||||
|
|
||||||
|
template <typename InPortalT, typename OutPortalT, typename FunctorT>
|
||||||
|
using ScanInclusiveHelper = scan::Adder<scan::ScanInclusiveBody<InPortalT, OutPortalT, FunctorT>>;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end namespace vtkm::cont::openmp
|
251
vtkm/cont/openmp/internal/ParallelSortOpenMP.h
Normal file
251
vtkm/cont/openmp/internal/ParallelSortOpenMP.h
Normal file
@ -0,0 +1,251 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/ParallelQuickSortOpenMP.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.h>
|
||||||
|
|
||||||
|
#include <vtkm/BinaryPredicates.h>
|
||||||
|
#include <vtkm/cont/ArrayHandle.h>
|
||||||
|
#include <vtkm/cont/ArrayHandleIndex.h>
|
||||||
|
#include <vtkm/cont/ArrayHandleZip.h>
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace openmp
|
||||||
|
{
|
||||||
|
namespace sort
|
||||||
|
{
|
||||||
|
|
||||||
|
// Forward declare entry points (See stack overflow discussion 7255281 --
|
||||||
|
// templated overloads of template functions are not specialization, and will
|
||||||
|
// be resolved during the first phase of two part lookup).
|
||||||
|
template <typename T, typename Container, class BinaryCompare>
|
||||||
|
void parallel_sort(vtkm::cont::ArrayHandle<T, Container>&, BinaryCompare);
|
||||||
|
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
|
||||||
|
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>&,
|
||||||
|
vtkm::cont::ArrayHandle<U, StorageU>&,
|
||||||
|
BinaryCompare);
|
||||||
|
|
||||||
|
// Quicksort values:
|
||||||
|
template <typename HandleType, class BinaryCompare>
|
||||||
|
void parallel_sort(HandleType& values,
|
||||||
|
BinaryCompare binary_compare,
|
||||||
|
vtkm::cont::internal::radix::PSortTag)
|
||||||
|
{
|
||||||
|
auto portal = values.PrepareForInPlace(DeviceAdapterTagOpenMP());
|
||||||
|
auto iter = vtkm::cont::ArrayPortalToIteratorBegin(portal);
|
||||||
|
vtkm::Id2 range(0, values.GetNumberOfValues());
|
||||||
|
|
||||||
|
using IterType = typename std::decay<decltype(iter)>::type;
|
||||||
|
using Sorter = quick::QuickSorter<IterType, BinaryCompare>;
|
||||||
|
|
||||||
|
Sorter sorter(iter, binary_compare);
|
||||||
|
sorter.Execute(range);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Radix sort values:
|
||||||
|
template <typename T, typename StorageT, class BinaryCompare>
|
||||||
|
void parallel_sort(vtkm::cont::ArrayHandle<T, StorageT>& values,
|
||||||
|
BinaryCompare binary_compare,
|
||||||
|
vtkm::cont::internal::radix::RadixSortTag)
|
||||||
|
{
|
||||||
|
auto c = vtkm::cont::internal::radix::get_std_compare(binary_compare, T{});
|
||||||
|
radix::parallel_radix_sort(
|
||||||
|
values.GetStorage().GetArray(), static_cast<std::size_t>(values.GetNumberOfValues()), c);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value sort -- static switch between quicksort & radix sort
|
||||||
|
template <typename T, typename Container, class BinaryCompare>
|
||||||
|
void parallel_sort(vtkm::cont::ArrayHandle<T, Container>& values, BinaryCompare binary_compare)
|
||||||
|
{
|
||||||
|
using namespace vtkm::cont::internal::radix;
|
||||||
|
using SortAlgorithmTag = typename sort_tag_type<T, Container, BinaryCompare>::type;
|
||||||
|
|
||||||
|
parallel_sort(values, binary_compare, SortAlgorithmTag{});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quicksort by key:
|
||||||
|
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
|
||||||
|
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||||
|
vtkm::cont::ArrayHandle<U, StorageU>& values,
|
||||||
|
BinaryCompare binary_compare,
|
||||||
|
vtkm::cont::internal::radix::PSortTag)
|
||||||
|
{
|
||||||
|
using KeyType = vtkm::cont::ArrayHandle<T, StorageT>;
|
||||||
|
constexpr bool larger_than_64bits = sizeof(U) > sizeof(vtkm::Int64);
|
||||||
|
if (larger_than_64bits)
|
||||||
|
{
|
||||||
|
/// More efficient sort:
|
||||||
|
/// Move value indexes when sorting and reorder the value array at last
|
||||||
|
|
||||||
|
using ValueType = vtkm::cont::ArrayHandle<U, StorageU>;
|
||||||
|
using IndexType = vtkm::cont::ArrayHandle<vtkm::Id>;
|
||||||
|
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, IndexType>;
|
||||||
|
|
||||||
|
IndexType indexArray;
|
||||||
|
ValueType valuesScattered;
|
||||||
|
const vtkm::Id size = values.GetNumberOfValues();
|
||||||
|
|
||||||
|
// Generate an in-memory index array:
|
||||||
|
{
|
||||||
|
auto handle = ArrayHandleIndex(keys.GetNumberOfValues());
|
||||||
|
auto inputPortal = handle.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto outputPortal =
|
||||||
|
indexArray.PrepareForOutput(keys.GetNumberOfValues(), DeviceAdapterTagOpenMP());
|
||||||
|
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, keys.GetNumberOfValues());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort the keys and indicies:
|
||||||
|
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, indexArray);
|
||||||
|
parallel_sort(zipHandle,
|
||||||
|
vtkm::cont::internal::KeyCompare<T, vtkm::Id, BinaryCompare>(binary_compare),
|
||||||
|
vtkm::cont::internal::radix::PSortTag());
|
||||||
|
|
||||||
|
// Permute the values to their sorted locations:
|
||||||
|
{
|
||||||
|
auto valuesInPortal = values.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto indexPortal = indexArray.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto valuesOutPortal = valuesScattered.PrepareForOutput(size, DeviceAdapterTagOpenMP());
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel for
|
||||||
|
default(none)
|
||||||
|
firstprivate(valuesInPortal, indexPortal, valuesOutPortal)
|
||||||
|
schedule(static))
|
||||||
|
for (vtkm::Id i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
valuesOutPortal.Set(i, valuesInPortal.Get(indexPortal.Get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy the values back into the input array:
|
||||||
|
{
|
||||||
|
auto inputPortal = valuesScattered.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto outputPortal =
|
||||||
|
values.PrepareForOutput(valuesScattered.GetNumberOfValues(), DeviceAdapterTagOpenMP());
|
||||||
|
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
using ValueType = vtkm::cont::ArrayHandle<U, StorageU>;
|
||||||
|
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, ValueType>;
|
||||||
|
|
||||||
|
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, values);
|
||||||
|
parallel_sort(zipHandle,
|
||||||
|
vtkm::cont::internal::KeyCompare<T, U, BinaryCompare>(binary_compare),
|
||||||
|
vtkm::cont::internal::radix::PSortTag{});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Radix sort by key:
|
||||||
|
template <typename T, typename StorageT, typename StorageU, class BinaryCompare>
|
||||||
|
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||||
|
vtkm::cont::ArrayHandle<vtkm::Id, StorageU>& values,
|
||||||
|
BinaryCompare binary_compare,
|
||||||
|
vtkm::cont::internal::radix::RadixSortTag)
|
||||||
|
{
|
||||||
|
using namespace vtkm::cont::internal::radix;
|
||||||
|
auto c = get_std_compare(binary_compare, T{});
|
||||||
|
radix::parallel_radix_sort_key_values(keys.GetStorage().GetArray(),
|
||||||
|
values.GetStorage().GetArray(),
|
||||||
|
static_cast<std::size_t>(keys.GetNumberOfValues()),
|
||||||
|
c);
|
||||||
|
}
|
||||||
|
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
|
||||||
|
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||||
|
vtkm::cont::ArrayHandle<U, StorageU>& values,
|
||||||
|
BinaryCompare binary_compare,
|
||||||
|
vtkm::cont::internal::radix::RadixSortTag)
|
||||||
|
{
|
||||||
|
using KeyType = vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>;
|
||||||
|
using ValueType = vtkm::cont::ArrayHandle<U, vtkm::cont::StorageTagBasic>;
|
||||||
|
using IndexType = vtkm::cont::ArrayHandle<vtkm::Id, vtkm::cont::StorageTagBasic>;
|
||||||
|
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, IndexType>;
|
||||||
|
|
||||||
|
IndexType indexArray;
|
||||||
|
ValueType valuesScattered;
|
||||||
|
const vtkm::Id size = values.GetNumberOfValues();
|
||||||
|
|
||||||
|
{
|
||||||
|
auto handle = ArrayHandleIndex(keys.GetNumberOfValues());
|
||||||
|
auto inputPortal = handle.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto outputPortal =
|
||||||
|
indexArray.PrepareForOutput(keys.GetNumberOfValues(), DeviceAdapterTagOpenMP());
|
||||||
|
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, keys.GetNumberOfValues());
|
||||||
|
}
|
||||||
|
|
||||||
|
const vtkm::Id valuesBytes = static_cast<vtkm::Id>(sizeof(T)) * keys.GetNumberOfValues();
|
||||||
|
if (valuesBytes > static_cast<vtkm::Id>(vtkm::cont::internal::radix::MIN_BYTES_FOR_PARALLEL))
|
||||||
|
{
|
||||||
|
parallel_sort_bykey(keys, indexArray, binary_compare);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, indexArray);
|
||||||
|
parallel_sort(zipHandle,
|
||||||
|
vtkm::cont::internal::KeyCompare<T, vtkm::Id, BinaryCompare>(binary_compare),
|
||||||
|
vtkm::cont::internal::radix::PSortTag());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Permute the values to their sorted locations:
|
||||||
|
{
|
||||||
|
auto valuesInPortal = values.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto indexPortal = indexArray.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto valuesOutPortal = valuesScattered.PrepareForOutput(size, DeviceAdapterTagOpenMP());
|
||||||
|
|
||||||
|
VTKM_OPENMP_DIRECTIVE(parallel for
|
||||||
|
default(none)
|
||||||
|
firstprivate(valuesInPortal, indexPortal, valuesOutPortal)
|
||||||
|
schedule(static))
|
||||||
|
for (vtkm::Id i = 0; i < size; ++i)
|
||||||
|
{
|
||||||
|
valuesOutPortal.Set(i, valuesInPortal.Get(indexPortal.Get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto inputPortal = valuesScattered.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||||
|
auto outputPortal =
|
||||||
|
values.PrepareForOutput(valuesScattered.GetNumberOfValues(), DeviceAdapterTagOpenMP());
|
||||||
|
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, valuesScattered.GetNumberOfValues());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sort by key -- static switch between radix and quick sort:
|
||||||
|
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
|
||||||
|
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||||
|
vtkm::cont::ArrayHandle<U, StorageU>& values,
|
||||||
|
BinaryCompare binary_compare)
|
||||||
|
{
|
||||||
|
using namespace vtkm::cont::internal::radix;
|
||||||
|
using SortAlgorithmTag =
|
||||||
|
typename sortbykey_tag_type<T, U, StorageT, StorageU, BinaryCompare>::type;
|
||||||
|
parallel_sort_bykey(keys, values, binary_compare, SortAlgorithmTag{});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // end namespace vtkm::cont::openmp::sort
|
50
vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h
Normal file
50
vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#ifndef vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h
|
||||||
|
#define vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h
|
||||||
|
|
||||||
|
#include <vtkm/cont/internal/VirtualObjectTransfer.h>
|
||||||
|
#include <vtkm/cont/internal/VirtualObjectTransferShareWithControl.h>
|
||||||
|
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace cont
|
||||||
|
{
|
||||||
|
namespace internal
|
||||||
|
{
|
||||||
|
|
||||||
|
template <typename VirtualDerivedType>
|
||||||
|
struct VirtualObjectTransfer<VirtualDerivedType, vtkm::cont::DeviceAdapterTagOpenMP>
|
||||||
|
: VirtualObjectTransferShareWithControl<VirtualDerivedType>
|
||||||
|
{
|
||||||
|
VTKM_CONT VirtualObjectTransfer(const VirtualDerivedType* virtualObject)
|
||||||
|
: VirtualObjectTransferShareWithControl<VirtualDerivedType>(virtualObject)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // vtkm::cont::internal
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif // vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h
|
33
vtkm/cont/openmp/testing/CMakeLists.txt
Normal file
33
vtkm/cont/openmp/testing/CMakeLists.txt
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
##============================================================================
|
||||||
|
## Copyright (c) Kitware, Inc.
|
||||||
|
## All rights reserved.
|
||||||
|
## See LICENSE.txt for details.
|
||||||
|
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
## PURPOSE. See the above copyright notice for more information.
|
||||||
|
##
|
||||||
|
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
## Copyright 2018 UT-Battelle, LLC.
|
||||||
|
## Copyright 2018 Los Alamos National Security.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
## the U.S. Government retains certain rights in this software.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
## this software.
|
||||||
|
##============================================================================
|
||||||
|
|
||||||
|
set(unit_tests
|
||||||
|
UnitTestOpenMPArrayHandle.cxx
|
||||||
|
UnitTestOpenMPArrayHandleFancy.cxx
|
||||||
|
UnitTestOpenMPCellLocatorTwoLevelUniformGrid.cxx
|
||||||
|
UnitTestOpenMPComputeRange.cxx
|
||||||
|
UnitTestOpenMPDataSetExplicit.cxx
|
||||||
|
UnitTestOpenMPDataSetSingleType.cxx
|
||||||
|
UnitTestOpenMPDeviceAdapter.cxx
|
||||||
|
UnitTestOpenMPImplicitFunction.cxx
|
||||||
|
UnitTestOpenMPPointLocatorUniformGrid.cxx
|
||||||
|
UnitTestOpenMPVirtualObjectHandle.cxx
|
||||||
|
)
|
||||||
|
vtkm_unit_tests(OpenMP SOURCES ${unit_tests})
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPArrayHandle.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPArrayHandle.cxx
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingArrayHandles.h>
|
||||||
|
|
||||||
|
int UnitTestOpenMPArrayHandle(int, char* [])
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
return vtkm::cont::testing::TestingArrayHandles<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||||
|
}
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPArrayHandleFancy.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPArrayHandleFancy.cxx
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingFancyArrayHandles.h>
|
||||||
|
|
||||||
|
int UnitTestOpenMPArrayHandleFancy(int, char* [])
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
return vtkm::cont::testing::TestingFancyArrayHandles<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||||
|
}
|
@ -0,0 +1,32 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingCellLocatorTwoLevelUniformGrid.h>
|
||||||
|
|
||||||
|
int UnitTestOpenMPCellLocatorTwoLevelUniformGrid(int, char* [])
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
return vtkm::cont::testing::Testing::Run(
|
||||||
|
TestingCellLocatorTwoLevelUniformGrid<vtkm::cont::DeviceAdapterTagOpenMP>);
|
||||||
|
}
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPComputeRange.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPComputeRange.cxx
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingComputeRange.h>
|
||||||
|
|
||||||
|
int UnitTestOpenMPComputeRange(int, char* [])
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
return vtkm::cont::testing::TestingComputeRange<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||||
|
}
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPDataSetExplicit.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPDataSetExplicit.cxx
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingDataSetExplicit.h>
|
||||||
|
|
||||||
|
int UnitTestOpenMPDataSetExplicit(int, char* [])
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
return vtkm::cont::testing::TestingDataSetExplicit<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||||
|
}
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPDataSetSingleType.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPDataSetSingleType.cxx
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingDataSetSingleType.h>
|
||||||
|
|
||||||
|
int UnitTestOpenMPDataSetSingleType(int, char* [])
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
return vtkm::cont::testing::TestingDataSetSingleType<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||||
|
}
|
32
vtkm/cont/openmp/testing/UnitTestOpenMPDeviceAdapter.cxx
Normal file
32
vtkm/cont/openmp/testing/UnitTestOpenMPDeviceAdapter.cxx
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingDeviceAdapter.h>
|
||||||
|
|
||||||
|
int UnitTestOpenMPDeviceAdapter(int, char* [])
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
return vtkm::cont::testing::TestingDeviceAdapter<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||||
|
}
|
43
vtkm/cont/openmp/testing/UnitTestOpenMPImplicitFunction.cxx
Normal file
43
vtkm/cont/openmp/testing/UnitTestOpenMPImplicitFunction.cxx
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingImplicitFunction.h>
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
void TestImplicitFunctions()
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
|
||||||
|
vtkm::cont::testing::TestingImplicitFunction testing;
|
||||||
|
testing.Run(vtkm::cont::DeviceAdapterTagOpenMP());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int UnitTestOpenMPImplicitFunction(int, char* [])
|
||||||
|
{
|
||||||
|
return vtkm::cont::testing::Testing::Run(TestImplicitFunctions);
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingPointLocatorUniformGrid.h>
|
||||||
|
|
||||||
|
int UnitTestOpenMPPointLocatorUniformGrid(int, char* [])
|
||||||
|
{
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
|
||||||
|
return vtkm::cont::testing::Testing::Run(
|
||||||
|
TestingPointLocatorUniformGrid<vtkm::cont::DeviceAdapterTagOpenMP>());
|
||||||
|
}
|
@ -0,0 +1,49 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/cont/testing/TestingVirtualObjectHandle.h>
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
void TestVirtualObjectHandle()
|
||||||
|
{
|
||||||
|
using DeviceAdapterList = vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagOpenMP>;
|
||||||
|
using DeviceAdapterList2 =
|
||||||
|
vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagSerial, vtkm::cont::DeviceAdapterTagOpenMP>;
|
||||||
|
|
||||||
|
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||||
|
|
||||||
|
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||||
|
vtkm::cont::testing::TestingVirtualObjectHandle<DeviceAdapterList>::Run();
|
||||||
|
|
||||||
|
tracker.Reset();
|
||||||
|
vtkm::cont::testing::TestingVirtualObjectHandle<DeviceAdapterList2>::Run();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
int UnitTestOpenMPVirtualObjectHandle(int, char* [])
|
||||||
|
{
|
||||||
|
return vtkm::cont::testing::Testing::Run(TestVirtualObjectHandle);
|
||||||
|
}
|
@ -52,6 +52,7 @@ vtkm_declare_headers(${header_impls} TESTABLE OFF)
|
|||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
add_subdirectory(serial)
|
add_subdirectory(serial)
|
||||||
add_subdirectory(tbb)
|
add_subdirectory(tbb)
|
||||||
|
add_subdirectory(openmp)
|
||||||
add_subdirectory(cuda)
|
add_subdirectory(cuda)
|
||||||
|
|
||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
|
22
vtkm/exec/openmp/CMakeLists.txt
Normal file
22
vtkm/exec/openmp/CMakeLists.txt
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
##============================================================================
|
||||||
|
## Copyright (c) Kitware, Inc.
|
||||||
|
## All rights reserved.
|
||||||
|
## See LICENSE.txt for details.
|
||||||
|
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
## PURPOSE. See the above copyright notice for more information.
|
||||||
|
##
|
||||||
|
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
## Copyright 2018 UT-Battelle, LLC.
|
||||||
|
## Copyright 2018 Los Alamos National Security.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
## the U.S. Government retains certain rights in this software.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
## this software.
|
||||||
|
##============================================================================
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
add_subdirectory(internal)
|
30
vtkm/exec/openmp/internal/CMakeLists.txt
Normal file
30
vtkm/exec/openmp/internal/CMakeLists.txt
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
##============================================================================
|
||||||
|
## Copyright (c) Kitware, Inc.
|
||||||
|
## All rights reserved.
|
||||||
|
## See LICENSE.txt for details.
|
||||||
|
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
## PURPOSE. See the above copyright notice for more information.
|
||||||
|
##
|
||||||
|
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
## Copyright 2018 UT-Battelle, LLC.
|
||||||
|
## Copyright 2018 Los Alamos National Security.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
## the U.S. Government retains certain rights in this software.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
## this software.
|
||||||
|
##============================================================================
|
||||||
|
|
||||||
|
set(headers
|
||||||
|
TaskTilingOpenMP.h
|
||||||
|
)
|
||||||
|
|
||||||
|
vtkm_declare_headers(${headers})
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
if (VTKm_ENABLE_OPENMP)
|
||||||
|
add_subdirectory(testing)
|
||||||
|
endif()
|
41
vtkm/exec/openmp/internal/TaskTilingOpenMP.h
Normal file
41
vtkm/exec/openmp/internal/TaskTilingOpenMP.h
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
#ifndef vtk_m_exec_openmp_internal_TaskTilingOpenMP_h
|
||||||
|
#define vtk_m_exec_openmp_internal_TaskTilingOpenMP_h
|
||||||
|
|
||||||
|
#include <vtkm/exec/serial/internal/TaskTiling.h>
|
||||||
|
|
||||||
|
namespace vtkm
|
||||||
|
{
|
||||||
|
namespace exec
|
||||||
|
{
|
||||||
|
namespace openmp
|
||||||
|
{
|
||||||
|
namespace internal
|
||||||
|
{
|
||||||
|
|
||||||
|
using TaskTiling1D = vtkm::exec::serial::internal::TaskTiling1D;
|
||||||
|
using TaskTiling3D = vtkm::exec::serial::internal::TaskTiling3D;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace vtkm::exec::tbb::internal
|
||||||
|
|
||||||
|
#endif //vtk_m_exec_tbb_internal_TaskTiling_h
|
27
vtkm/exec/openmp/internal/testing/CMakeLists.txt
Normal file
27
vtkm/exec/openmp/internal/testing/CMakeLists.txt
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
##=============================================================================
|
||||||
|
##
|
||||||
|
## Copyright (c) Kitware, Inc.
|
||||||
|
## All rights reserved.
|
||||||
|
## See LICENSE.txt for details.
|
||||||
|
##
|
||||||
|
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
## PURPOSE. See the above copyright notice for more information.
|
||||||
|
##
|
||||||
|
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
## Copyright 2018 UT-Battelle, LLC.
|
||||||
|
## Copyright 2018 Los Alamos National Security.
|
||||||
|
##
|
||||||
|
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
## the U.S. Government retains certain rights in this software.
|
||||||
|
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
## this software.
|
||||||
|
##
|
||||||
|
##=============================================================================
|
||||||
|
|
||||||
|
set(unit_tests
|
||||||
|
UnitTestTaskTilingOpenMP.cxx
|
||||||
|
)
|
||||||
|
|
||||||
|
vtkm_unit_tests(SOURCES ${unit_tests})
|
@ -0,0 +1,30 @@
|
|||||||
|
//============================================================================
|
||||||
|
// Copyright (c) Kitware, Inc.
|
||||||
|
// All rights reserved.
|
||||||
|
// See LICENSE.txt for details.
|
||||||
|
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||||
|
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
// PURPOSE. See the above copyright notice for more information.
|
||||||
|
//
|
||||||
|
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||||
|
// Copyright 2018 UT-Battelle, LLC.
|
||||||
|
// Copyright 2018 Los Alamos National Security.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||||
|
// the U.S. Government retains certain rights in this software.
|
||||||
|
//
|
||||||
|
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||||
|
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||||
|
// this software.
|
||||||
|
//============================================================================
|
||||||
|
|
||||||
|
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||||
|
|
||||||
|
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||||
|
#include <vtkm/exec/internal/testing/TestingTaskTiling.h>
|
||||||
|
|
||||||
|
int UnitTestTaskTilingOpenMP(int, char* [])
|
||||||
|
{
|
||||||
|
return vtkm::cont::testing::Testing::Run(
|
||||||
|
vtkm::exec::internal::testing::TestTaskTiling<vtkm::cont::DeviceAdapterTagOpenMP>);
|
||||||
|
}
|
@ -29,6 +29,7 @@ set(VTKM_USE_64BIT_IDS ${VTKm_USE_64BIT_IDS})
|
|||||||
|
|
||||||
set(VTKM_ENABLE_CUDA ${VTKm_ENABLE_CUDA})
|
set(VTKM_ENABLE_CUDA ${VTKm_ENABLE_CUDA})
|
||||||
set(VTKM_ENABLE_TBB ${VTKm_ENABLE_TBB})
|
set(VTKM_ENABLE_TBB ${VTKm_ENABLE_TBB})
|
||||||
|
set(VTKM_ENABLE_OPENMP ${VTKm_ENABLE_OPENMP})
|
||||||
set(VTKM_ENABLE_MPI ${VTKm_ENABLE_MPI})
|
set(VTKM_ENABLE_MPI ${VTKm_ENABLE_MPI})
|
||||||
|
|
||||||
if(VTKM_ENABLE_CUDA)
|
if(VTKM_ENABLE_CUDA)
|
||||||
|
@ -229,6 +229,10 @@
|
|||||||
#ifndef VTKM_ENABLE_TBB
|
#ifndef VTKM_ENABLE_TBB
|
||||||
#cmakedefine VTKM_ENABLE_TBB
|
#cmakedefine VTKM_ENABLE_TBB
|
||||||
#endif
|
#endif
|
||||||
|
//Mark if we are building with OpenMP enabled
|
||||||
|
#ifndef VTKM_ENABLE_OPENMP
|
||||||
|
#cmakedefine VTKM_ENABLE_OPENMP
|
||||||
|
#endif
|
||||||
|
|
||||||
//Mark if we are building with MPI enabled.
|
//Mark if we are building with MPI enabled.
|
||||||
#cmakedefine VTKM_ENABLE_MPI
|
#cmakedefine VTKM_ENABLE_MPI
|
||||||
|
@ -905,6 +905,10 @@ template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<
|
|||||||
template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagTBB>(
|
template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagTBB>(
|
||||||
vtkm::cont::DeviceAdapterTagTBB);
|
vtkm::cont::DeviceAdapterTagTBB);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef VTKM_ENABLE_OPENMP
|
||||||
|
template VTKM_CONT_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagOpenMP>(
|
||||||
|
vtkm::cont::DeviceAdapterTagOpenMP);
|
||||||
|
#endif
|
||||||
#ifdef VTKM_ENABLE_CUDA
|
#ifdef VTKM_ENABLE_CUDA
|
||||||
template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagCuda>(
|
template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagCuda>(
|
||||||
vtkm::cont::DeviceAdapterTagCuda);
|
vtkm::cont::DeviceAdapterTagCuda);
|
||||||
|
@ -87,6 +87,13 @@ inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagTBB>(vtkm::cont::
|
|||||||
return "tbb";
|
return "tbb";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagOpenMP>(
|
||||||
|
vtkm::cont::DeviceAdapterTagOpenMP)
|
||||||
|
{
|
||||||
|
return "openmp";
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagCuda>(
|
inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagCuda>(
|
||||||
vtkm::cont::DeviceAdapterTagCuda)
|
vtkm::cont::DeviceAdapterTagCuda)
|
||||||
|
Loading…
Reference in New Issue
Block a user