mirror of
https://gitlab.kitware.com/vtk/vtk-m
synced 2024-10-05 01:49:02 +00:00
Add initial version of an OpenMP backend.
This commit is contained in:
parent
7b5ad3e80c
commit
183bcf109a
551
CMake/FindOpenMP.cmake
Normal file
551
CMake/FindOpenMP.cmake
Normal file
@ -0,0 +1,551 @@
|
||||
##=============================================================================
|
||||
##
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
##
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
## Copyright 2018 UT-Battelle, LLC.
|
||||
## Copyright 2018 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##
|
||||
##=============================================================================
|
||||
|
||||
#.rst:
|
||||
# FindOpenMP
|
||||
# ----------
|
||||
#
|
||||
# Finds OpenMP support
|
||||
#
|
||||
# This module can be used to detect OpenMP support in a compiler. If
|
||||
# the compiler supports OpenMP, the flags required to compile with
|
||||
# OpenMP support are returned in variables for the different languages.
|
||||
# The variables may be empty if the compiler does not need a special
|
||||
# flag to support OpenMP.
|
||||
#
|
||||
# Variables
|
||||
# ^^^^^^^^^
|
||||
#
|
||||
# The module exposes the components ``C``, ``CXX``, and ``Fortran``.
|
||||
# Each of these controls the various languages to search OpenMP support for.
|
||||
#
|
||||
# Depending on the enabled components the following variables will be set:
|
||||
#
|
||||
# ``OpenMP_FOUND``
|
||||
# Variable indicating that OpenMP flags for all requested languages have been found.
|
||||
# If no components are specified, this is true if OpenMP settings for all enabled languages
|
||||
# were detected.
|
||||
# ``OpenMP_VERSION``
|
||||
# Minimal version of the OpenMP standard detected among the requested languages,
|
||||
# or all enabled languages if no components were specified.
|
||||
#
|
||||
# This module will set the following variables per language in your
|
||||
# project, where ``<lang>`` is one of C, CXX, or Fortran:
|
||||
#
|
||||
# ``OpenMP_<lang>_FOUND``
|
||||
# Variable indicating if OpenMP support for ``<lang>`` was detected.
|
||||
# ``OpenMP_<lang>_FLAGS``
|
||||
# OpenMP compiler flags for ``<lang>``, separated by spaces.
|
||||
#
|
||||
# For linking with OpenMP code written in ``<lang>``, the following
|
||||
# variables are provided:
|
||||
#
|
||||
# ``OpenMP_<lang>_LIB_NAMES``
|
||||
# :ref:`;-list <CMake Language Lists>` of libraries for OpenMP programs for ``<lang>``.
|
||||
# ``OpenMP_<libname>_LIBRARY``
|
||||
# Location of the individual libraries needed for OpenMP support in ``<lang>``.
|
||||
# ``OpenMP_<lang>_LIBRARIES``
|
||||
# A list of libraries needed to link with OpenMP code written in ``<lang>``.
|
||||
#
|
||||
# Additionally, the module provides :prop_tgt:`IMPORTED` targets:
|
||||
#
|
||||
# ``OpenMP::OpenMP_<lang>``
|
||||
# Target for using OpenMP from ``<lang>``.
|
||||
#
|
||||
# Specifically for Fortran, the module sets the following variables:
|
||||
#
|
||||
# ``OpenMP_Fortran_HAVE_OMPLIB_HEADER``
|
||||
# Boolean indicating if OpenMP is accessible through ``omp_lib.h``.
|
||||
# ``OpenMP_Fortran_HAVE_OMPLIB_MODULE``
|
||||
# Boolean indicating if OpenMP is accessible through the ``omp_lib`` Fortran module.
|
||||
#
|
||||
# The module will also try to provide the OpenMP version variables:
|
||||
#
|
||||
# ``OpenMP_<lang>_SPEC_DATE``
|
||||
# Date of the OpenMP specification implemented by the ``<lang>`` compiler.
|
||||
# ``OpenMP_<lang>_VERSION_MAJOR``
|
||||
# Major version of OpenMP implemented by the ``<lang>`` compiler.
|
||||
# ``OpenMP_<lang>_VERSION_MINOR``
|
||||
# Minor version of OpenMP implemented by the ``<lang>`` compiler.
|
||||
# ``OpenMP_<lang>_VERSION``
|
||||
# OpenMP version implemented by the ``<lang>`` compiler.
|
||||
#
|
||||
# The specification date is formatted as given in the OpenMP standard:
|
||||
# ``yyyymm`` where ``yyyy`` and ``mm`` represents the year and month of
|
||||
# the OpenMP specification implemented by the ``<lang>`` compiler.
|
||||
|
||||
cmake_policy(PUSH)
|
||||
cmake_policy(SET CMP0012 NEW) # if() recognizes numbers and booleans
|
||||
cmake_policy(SET CMP0054 NEW) # if() quoted variables not dereferenced
|
||||
cmake_policy(SET CMP0057 NEW) # if IN_LIST
|
||||
|
||||
function(_OPENMP_FLAG_CANDIDATES LANG)
|
||||
if(NOT OpenMP_${LANG}_FLAG)
|
||||
unset(OpenMP_FLAG_CANDIDATES)
|
||||
|
||||
set(OMP_FLAG_GNU "-fopenmp")
|
||||
set(OMP_FLAG_Clang "-fopenmp=libomp" "-fopenmp=libiomp5" "-fopenmp")
|
||||
set(OMP_FLAG_AppleClang "-Xclang -fopenmp")
|
||||
set(OMP_FLAG_HP "+Oopenmp")
|
||||
if(WIN32)
|
||||
set(OMP_FLAG_Intel "-Qopenmp")
|
||||
elseif(CMAKE_${LANG}_COMPILER_ID STREQUAL "Intel" AND
|
||||
"${CMAKE_${LANG}_COMPILER_VERSION}" VERSION_LESS "15.0.0.20140528")
|
||||
set(OMP_FLAG_Intel "-openmp")
|
||||
else()
|
||||
set(OMP_FLAG_Intel "-qopenmp")
|
||||
endif()
|
||||
set(OMP_FLAG_MIPSpro "-mp")
|
||||
set(OMP_FLAG_MSVC "-openmp")
|
||||
set(OMP_FLAG_PathScale "-openmp")
|
||||
set(OMP_FLAG_NAG "-openmp")
|
||||
set(OMP_FLAG_Absoft "-openmp")
|
||||
set(OMP_FLAG_PGI "-mp")
|
||||
set(OMP_FLAG_Flang "-fopenmp")
|
||||
set(OMP_FLAG_SunPro "-xopenmp")
|
||||
set(OMP_FLAG_XL "-qsmp=omp")
|
||||
# Cray compiler activate OpenMP with -h omp, which is enabled by default.
|
||||
set(OMP_FLAG_Cray " " "-h omp")
|
||||
|
||||
# If we know the correct flags, use those
|
||||
if(DEFINED OMP_FLAG_${CMAKE_${LANG}_COMPILER_ID})
|
||||
set(OpenMP_FLAG_CANDIDATES "${OMP_FLAG_${CMAKE_${LANG}_COMPILER_ID}}")
|
||||
# Fall back to reasonable default tries otherwise
|
||||
else()
|
||||
set(OpenMP_FLAG_CANDIDATES "-openmp" "-fopenmp" "-mp" " ")
|
||||
endif()
|
||||
set(OpenMP_${LANG}_FLAG_CANDIDATES "${OpenMP_FLAG_CANDIDATES}" PARENT_SCOPE)
|
||||
else()
|
||||
set(OpenMP_${LANG}_FLAG_CANDIDATES "${OpenMP_${LANG}_FLAG}" PARENT_SCOPE)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# sample openmp source code to test
|
||||
set(OpenMP_C_CXX_TEST_SOURCE
|
||||
"
|
||||
#include <omp.h>
|
||||
int main() {
|
||||
#ifdef _OPENMP
|
||||
int n = omp_get_max_threads();
|
||||
return 0;
|
||||
#else
|
||||
breaks_on_purpose
|
||||
#endif
|
||||
}
|
||||
")
|
||||
|
||||
# in Fortran, an implementation may provide an omp_lib.h header
|
||||
# or omp_lib module, or both (OpenMP standard, section 3.1)
|
||||
# Furthmore !$ is the Fortran equivalent of #ifdef _OPENMP (OpenMP standard, 2.2.2)
|
||||
# Without the conditional compilation, some compilers (e.g. PGI) might compile OpenMP code
|
||||
# while not actually enabling OpenMP, building code sequentially
|
||||
set(OpenMP_Fortran_TEST_SOURCE
|
||||
"
|
||||
program test
|
||||
@OpenMP_Fortran_INCLUDE_LINE@
|
||||
!$ integer :: n
|
||||
n = omp_get_num_threads()
|
||||
end program test
|
||||
"
|
||||
)
|
||||
|
||||
function(_OPENMP_WRITE_SOURCE_FILE LANG SRC_FILE_CONTENT_VAR SRC_FILE_NAME SRC_FILE_FULLPATH)
|
||||
set(WORK_DIR ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindOpenMP)
|
||||
if("${LANG}" STREQUAL "C")
|
||||
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.c")
|
||||
file(WRITE "${SRC_FILE}" "${OpenMP_C_CXX_${SRC_FILE_CONTENT_VAR}}")
|
||||
elseif("${LANG}" STREQUAL "CXX")
|
||||
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.cpp")
|
||||
file(WRITE "${SRC_FILE}" "${OpenMP_C_CXX_${SRC_FILE_CONTENT_VAR}}")
|
||||
elseif("${LANG}" STREQUAL "Fortran")
|
||||
set(SRC_FILE "${WORK_DIR}/${SRC_FILE_NAME}.f90")
|
||||
file(WRITE "${SRC_FILE}_in" "${OpenMP_Fortran_${SRC_FILE_CONTENT_VAR}}")
|
||||
configure_file("${SRC_FILE}_in" "${SRC_FILE}" @ONLY)
|
||||
endif()
|
||||
set(${SRC_FILE_FULLPATH} "${SRC_FILE}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
include(CMakeParseImplicitLinkInfo)
|
||||
|
||||
function(_OPENMP_GET_FLAGS LANG FLAG_MODE OPENMP_FLAG_VAR OPENMP_LIB_NAMES_VAR)
|
||||
_OPENMP_FLAG_CANDIDATES("${LANG}")
|
||||
_OPENMP_WRITE_SOURCE_FILE("${LANG}" "TEST_SOURCE" OpenMPTryFlag _OPENMP_TEST_SRC)
|
||||
|
||||
unset(OpenMP_VERBOSE_COMPILE_OPTIONS)
|
||||
separate_arguments(OpenMP_VERBOSE_OPTIONS NATIVE_COMMAND "${CMAKE_${LANG}_VERBOSE_FLAG}")
|
||||
foreach(_VERBOSE_OPTION IN LISTS OpenMP_VERBOSE_OPTIONS)
|
||||
if(NOT _VERBOSE_OPTION MATCHES "^-Wl,")
|
||||
list(APPEND OpenMP_VERBOSE_COMPILE_OPTIONS ${_VERBOSE_OPTION})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
foreach(OPENMP_FLAG IN LISTS OpenMP_${LANG}_FLAG_CANDIDATES)
|
||||
set(OPENMP_FLAGS_TEST "${OPENMP_FLAG}")
|
||||
if(OpenMP_VERBOSE_COMPILE_OPTIONS)
|
||||
string(APPEND OPENMP_FLAGS_TEST " ${OpenMP_VERBOSE_COMPILE_OPTIONS}")
|
||||
endif()
|
||||
string(REGEX REPLACE "[-/=+]" "" OPENMP_PLAIN_FLAG "${OPENMP_FLAG}")
|
||||
try_compile( OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG} ${CMAKE_BINARY_DIR} ${_OPENMP_TEST_SRC}
|
||||
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OPENMP_FLAGS_TEST}"
|
||||
LINK_LIBRARIES ${CMAKE_${LANG}_VERBOSE_FLAG}
|
||||
OUTPUT_VARIABLE OpenMP_TRY_COMPILE_OUTPUT
|
||||
)
|
||||
|
||||
if(OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG})
|
||||
set("${OPENMP_FLAG_VAR}" "${OPENMP_FLAG}" PARENT_SCOPE)
|
||||
|
||||
if(CMAKE_${LANG}_VERBOSE_FLAG)
|
||||
unset(OpenMP_${LANG}_IMPLICIT_LIBRARIES)
|
||||
unset(OpenMP_${LANG}_IMPLICIT_LINK_DIRS)
|
||||
unset(OpenMP_${LANG}_IMPLICIT_FWK_DIRS)
|
||||
unset(OpenMP_${LANG}_LOG_VAR)
|
||||
|
||||
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
|
||||
"Detecting ${LANG} OpenMP compiler ABI info compiled with the following output:\n${OpenMP_TRY_COMPILE_OUTPUT}\n\n")
|
||||
|
||||
cmake_parse_implicit_link_info("${OpenMP_TRY_COMPILE_OUTPUT}"
|
||||
OpenMP_${LANG}_IMPLICIT_LIBRARIES
|
||||
OpenMP_${LANG}_IMPLICIT_LINK_DIRS
|
||||
OpenMP_${LANG}_IMPLICIT_FWK_DIRS
|
||||
OpenMP_${LANG}_LOG_VAR
|
||||
"${CMAKE_${LANG}_IMPLICIT_OBJECT_REGEX}"
|
||||
)
|
||||
|
||||
file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log
|
||||
"Parsed ${LANG} OpenMP implicit link information from above output:\n${OpenMP_${LANG}_LOG_VAR}\n\n")
|
||||
|
||||
unset(_OPENMP_LIB_NAMES)
|
||||
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_IMPLICIT_LIBRARIES)
|
||||
get_filename_component(_OPENMP_IMPLICIT_LIB_DIR "${_OPENMP_IMPLICIT_LIB}" DIRECTORY)
|
||||
get_filename_component(_OPENMP_IMPLICIT_LIB_NAME "${_OPENMP_IMPLICIT_LIB}" NAME)
|
||||
get_filename_component(_OPENMP_IMPLICIT_LIB_PLAIN "${_OPENMP_IMPLICIT_LIB}" NAME_WE)
|
||||
string(REGEX REPLACE "([][+.*?()^$])" "\\\\\\1" _OPENMP_IMPLICIT_LIB_PLAIN_ESC "${_OPENMP_IMPLICIT_LIB_PLAIN}")
|
||||
string(REGEX REPLACE "([][+.*?()^$])" "\\\\\\1" _OPENMP_IMPLICIT_LIB_PATH_ESC "${_OPENMP_IMPLICIT_LIB}")
|
||||
if(NOT ( "${_OPENMP_IMPLICIT_LIB}" IN_LIST CMAKE_${LANG}_IMPLICIT_LINK_LIBRARIES
|
||||
OR "${CMAKE_${LANG}_STANDARD_LIBRARIES}" MATCHES "(^| )(-Wl,)?(-l)?(${_OPENMP_IMPLICIT_LIB_PLAIN_ESC}|${_OPENMP_IMPLICIT_LIB_PATH_ESC})( |$)"
|
||||
OR "${CMAKE_${LANG}_LINK_EXECUTABLE}" MATCHES "(^| )(-Wl,)?(-l)?(${_OPENMP_IMPLICIT_LIB_PLAIN_ESC}|${_OPENMP_IMPLICIT_LIB_PATH_ESC})( |$)" ) )
|
||||
if(_OPENMP_IMPLICIT_LIB_DIR)
|
||||
set(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY "${_OPENMP_IMPLICIT_LIB}" CACHE FILEPATH
|
||||
"Path to the ${_OPENMP_IMPLICIT_LIB_PLAIN} library for OpenMP")
|
||||
else()
|
||||
find_library(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY
|
||||
NAMES "${_OPENMP_IMPLICIT_LIB_NAME}"
|
||||
DOC "Path to the ${_OPENMP_IMPLICIT_LIB_PLAIN} library for OpenMP"
|
||||
HINTS ${OpenMP_${LANG}_IMPLICIT_LINK_DIRS}
|
||||
CMAKE_FIND_ROOT_PATH_BOTH
|
||||
NO_DEFAULT_PATH
|
||||
)
|
||||
endif()
|
||||
mark_as_advanced(OpenMP_${_OPENMP_IMPLICIT_LIB_PLAIN}_LIBRARY)
|
||||
list(APPEND _OPENMP_LIB_NAMES ${_OPENMP_IMPLICIT_LIB_PLAIN})
|
||||
endif()
|
||||
endforeach()
|
||||
set("${OPENMP_LIB_NAMES_VAR}" "${_OPENMP_LIB_NAMES}" PARENT_SCOPE)
|
||||
else()
|
||||
# We do not know how to extract implicit OpenMP libraries for this compiler.
|
||||
# Assume that it handles them automatically, e.g. the Intel Compiler on
|
||||
# Windows should put the dependency in its object files.
|
||||
set("${OPENMP_LIB_NAMES_VAR}" "" PARENT_SCOPE)
|
||||
endif()
|
||||
break()
|
||||
elseif(CMAKE_${LANG}_COMPILER_ID STREQUAL "AppleClang"
|
||||
AND CMAKE_${LANG}_COMPILER_VERSION VERSION_GREATER_EQUAL "7.0")
|
||||
|
||||
# Check for separate OpenMP library on AppleClang 7+
|
||||
find_library(OpenMP_libomp_LIBRARY
|
||||
NAMES omp gomp iomp5
|
||||
HINTS ${CMAKE_${LANG}_IMPLICIT_LINK_DIRECTORIES}
|
||||
)
|
||||
mark_as_advanced(OpenMP_libomp_LIBRARY)
|
||||
|
||||
if(OpenMP_libomp_LIBRARY)
|
||||
try_compile( OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG} ${CMAKE_BINARY_DIR} ${_OPENMP_TEST_SRC}
|
||||
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OPENMP_FLAGS_TEST}"
|
||||
LINK_LIBRARIES ${CMAKE_${LANG}_VERBOSE_FLAG} ${OpenMP_libomp_LIBRARY}
|
||||
OUTPUT_VARIABLE OpenMP_TRY_COMPILE_OUTPUT
|
||||
)
|
||||
if(OpenMP_COMPILE_RESULT_${FLAG_MODE}_${OPENMP_PLAIN_FLAG})
|
||||
set("${OPENMP_FLAG_VAR}" "${OPENMP_FLAG}" PARENT_SCOPE)
|
||||
set("${OPENMP_LIB_NAMES_VAR}" "libomp" PARENT_SCOPE)
|
||||
break()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
set("${OPENMP_LIB_NAMES_VAR}" "NOTFOUND" PARENT_SCOPE)
|
||||
set("${OPENMP_FLAG_VAR}" "NOTFOUND" PARENT_SCOPE)
|
||||
endforeach()
|
||||
|
||||
unset(OpenMP_VERBOSE_COMPILE_OPTIONS)
|
||||
endfunction()
|
||||
|
||||
set(OpenMP_C_CXX_CHECK_VERSION_SOURCE
|
||||
"
|
||||
#include <stdio.h>
|
||||
#include <omp.h>
|
||||
const char ompver_str[] = { 'I', 'N', 'F', 'O', ':', 'O', 'p', 'e', 'n', 'M',
|
||||
'P', '-', 'd', 'a', 't', 'e', '[',
|
||||
('0' + ((_OPENMP/100000)%10)),
|
||||
('0' + ((_OPENMP/10000)%10)),
|
||||
('0' + ((_OPENMP/1000)%10)),
|
||||
('0' + ((_OPENMP/100)%10)),
|
||||
('0' + ((_OPENMP/10)%10)),
|
||||
('0' + ((_OPENMP/1)%10)),
|
||||
']', '\\0' };
|
||||
int main()
|
||||
{
|
||||
puts(ompver_str);
|
||||
return 0;
|
||||
}
|
||||
")
|
||||
|
||||
set(OpenMP_Fortran_CHECK_VERSION_SOURCE
|
||||
"
|
||||
program omp_ver
|
||||
@OpenMP_Fortran_INCLUDE_LINE@
|
||||
integer, parameter :: zero = ichar('0')
|
||||
integer, parameter :: ompv = openmp_version
|
||||
character, dimension(24), parameter :: ompver_str =&
|
||||
(/ 'I', 'N', 'F', 'O', ':', 'O', 'p', 'e', 'n', 'M', 'P', '-',&
|
||||
'd', 'a', 't', 'e', '[',&
|
||||
char(zero + mod(ompv/100000, 10)),&
|
||||
char(zero + mod(ompv/10000, 10)),&
|
||||
char(zero + mod(ompv/1000, 10)),&
|
||||
char(zero + mod(ompv/100, 10)),&
|
||||
char(zero + mod(ompv/10, 10)),&
|
||||
char(zero + mod(ompv/1, 10)), ']' /)
|
||||
print *, ompver_str
|
||||
end program omp_ver
|
||||
")
|
||||
|
||||
function(_OPENMP_GET_SPEC_DATE LANG SPEC_DATE)
|
||||
_OPENMP_WRITE_SOURCE_FILE("${LANG}" "CHECK_VERSION_SOURCE" OpenMPCheckVersion _OPENMP_TEST_SRC)
|
||||
|
||||
set(BIN_FILE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/FindOpenMP/ompver_${LANG}.bin")
|
||||
string(REGEX REPLACE "[-/=+]" "" OPENMP_PLAIN_FLAG "${OPENMP_FLAG}")
|
||||
try_compile(OpenMP_SPECTEST_${LANG}_${OPENMP_PLAIN_FLAG} "${CMAKE_BINARY_DIR}" "${_OPENMP_TEST_SRC}"
|
||||
CMAKE_FLAGS "-DCOMPILE_DEFINITIONS:STRING=${OpenMP_${LANG}_FLAGS}"
|
||||
COPY_FILE ${BIN_FILE})
|
||||
|
||||
if(${OpenMP_SPECTEST_${LANG}_${OPENMP_PLAIN_FLAG}})
|
||||
file(STRINGS ${BIN_FILE} specstr LIMIT_COUNT 1 REGEX "INFO:OpenMP-date")
|
||||
set(regex_spec_date ".*INFO:OpenMP-date\\[0*([^]]*)\\].*")
|
||||
if("${specstr}" MATCHES "${regex_spec_date}")
|
||||
set(${SPEC_DATE} "${CMAKE_MATCH_1}" PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
macro(_OPENMP_SET_VERSION_BY_SPEC_DATE LANG)
|
||||
set(OpenMP_SPEC_DATE_MAP
|
||||
# Preview versions
|
||||
"201611=5.0" # OpenMP 5.0 preview 1
|
||||
# Combined versions, 2.5 onwards
|
||||
"201511=4.5"
|
||||
"201307=4.0"
|
||||
"201107=3.1"
|
||||
"200805=3.0"
|
||||
"200505=2.5"
|
||||
# C/C++ version 2.0
|
||||
"200203=2.0"
|
||||
# Fortran version 2.0
|
||||
"200011=2.0"
|
||||
# Fortran version 1.1
|
||||
"199911=1.1"
|
||||
# C/C++ version 1.0 (there's no 1.1 for C/C++)
|
||||
"199810=1.0"
|
||||
# Fortran version 1.0
|
||||
"199710=1.0"
|
||||
)
|
||||
|
||||
if(OpenMP_${LANG}_SPEC_DATE)
|
||||
string(REGEX MATCHALL "${OpenMP_${LANG}_SPEC_DATE}=([0-9]+)\\.([0-9]+)" _version_match "${OpenMP_SPEC_DATE_MAP}")
|
||||
else()
|
||||
set(_version_match "")
|
||||
endif()
|
||||
if(NOT _version_match STREQUAL "")
|
||||
set(OpenMP_${LANG}_VERSION_MAJOR ${CMAKE_MATCH_1})
|
||||
set(OpenMP_${LANG}_VERSION_MINOR ${CMAKE_MATCH_2})
|
||||
set(OpenMP_${LANG}_VERSION "${OpenMP_${LANG}_VERSION_MAJOR}.${OpenMP_${LANG}_VERSION_MINOR}")
|
||||
else()
|
||||
unset(OpenMP_${LANG}_VERSION_MAJOR)
|
||||
unset(OpenMP_${LANG}_VERSION_MINOR)
|
||||
unset(OpenMP_${LANG}_VERSION)
|
||||
endif()
|
||||
unset(_version_match)
|
||||
unset(OpenMP_SPEC_DATE_MAP)
|
||||
endmacro()
|
||||
|
||||
foreach(LANG IN ITEMS C CXX)
|
||||
if(CMAKE_${LANG}_COMPILER_LOADED)
|
||||
if(NOT DEFINED OpenMP_${LANG}_FLAGS OR "${OpenMP_${LANG}_FLAGS}" STREQUAL "NOTFOUND"
|
||||
OR NOT DEFINED OpenMP_${LANG}_LIB_NAMES OR "${OpenMP_${LANG}_LIB_NAMES}" STREQUAL "NOTFOUND")
|
||||
_OPENMP_GET_FLAGS("${LANG}" "${LANG}" OpenMP_${LANG}_FLAGS_WORK OpenMP_${LANG}_LIB_NAMES_WORK)
|
||||
endif()
|
||||
|
||||
set(OpenMP_${LANG}_FLAGS "${OpenMP_${LANG}_FLAGS_WORK}"
|
||||
CACHE STRING "${LANG} compiler flags for OpenMP parallelization")
|
||||
set(OpenMP_${LANG}_LIB_NAMES "${OpenMP_${LANG}_LIB_NAMES_WORK}"
|
||||
CACHE STRING "${LANG} compiler libraries for OpenMP parallelization")
|
||||
mark_as_advanced(OpenMP_${LANG}_FLAGS OpenMP_${LANG}_LIB_NAMES)
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(CMAKE_Fortran_COMPILER_LOADED)
|
||||
if(NOT DEFINED OpenMP_Fortran_FLAGS OR "${OpenMP_Fortran_FLAGS}" STREQUAL "NOTFOUND"
|
||||
OR NOT DEFINED OpenMP_Fortran_LIB_NAMES OR "${OpenMP_Fortran_LIB_NAMES}" STREQUAL "NOTFOUND"
|
||||
OR NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_MODULE)
|
||||
set(OpenMP_Fortran_INCLUDE_LINE "use omp_lib\n implicit none")
|
||||
_OPENMP_GET_FLAGS("Fortran" "FortranHeader" OpenMP_Fortran_FLAGS_WORK OpenMP_Fortran_LIB_NAMES_WORK)
|
||||
if(OpenMP_Fortran_FLAGS_WORK)
|
||||
set(OpenMP_Fortran_HAVE_OMPLIB_MODULE TRUE CACHE BOOL INTERNAL "")
|
||||
endif()
|
||||
|
||||
set(OpenMP_Fortran_FLAGS "${OpenMP_Fortran_FLAGS_WORK}"
|
||||
CACHE STRING "Fortran compiler flags for OpenMP parallelization")
|
||||
set(OpenMP_Fortran_LIB_NAMES "${OpenMP_Fortran_LIB_NAMES_WORK}"
|
||||
CACHE STRING "Fortran compiler libraries for OpenMP parallelization")
|
||||
mark_as_advanced(OpenMP_Fortran_FLAGS OpenMP_Fortran_LIB_NAMES)
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED OpenMP_Fortran_FLAGS OR "${OpenMP_Fortran_FLAGS}" STREQUAL "NOTFOUND"
|
||||
OR NOT DEFINED OpenMP_Fortran_LIB_NAMES OR "${OpenMP_Fortran_LIB_NAMES}" STREQUAL "NOTFOUND"
|
||||
OR NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_HEADER)
|
||||
set(OpenMP_Fortran_INCLUDE_LINE "implicit none\n include 'omp_lib.h'")
|
||||
_OPENMP_GET_FLAGS("Fortran" "FortranModule" OpenMP_Fortran_FLAGS_WORK OpenMP_Fortran_LIB_NAMES_WORK)
|
||||
if(OpenMP_Fortran_FLAGS_WORK)
|
||||
set(OpenMP_Fortran_HAVE_OMPLIB_HEADER TRUE CACHE BOOL INTERNAL "")
|
||||
endif()
|
||||
|
||||
set(OpenMP_Fortran_FLAGS "${OpenMP_Fortran_FLAGS_WORK}"
|
||||
CACHE STRING "Fortran compiler flags for OpenMP parallelization")
|
||||
|
||||
set(OpenMP_Fortran_LIB_NAMES "${OpenMP_Fortran_LIB_NAMES}"
|
||||
CACHE STRING "Fortran compiler libraries for OpenMP parallelization")
|
||||
endif()
|
||||
|
||||
if(OpenMP_Fortran_HAVE_OMPLIB_MODULE)
|
||||
set(OpenMP_Fortran_INCLUDE_LINE "use omp_lib\n implicit none")
|
||||
else()
|
||||
set(OpenMP_Fortran_INCLUDE_LINE "implicit none\n include 'omp_lib.h'")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT OpenMP_FIND_COMPONENTS)
|
||||
set(OpenMP_FINDLIST C CXX Fortran)
|
||||
else()
|
||||
set(OpenMP_FINDLIST ${OpenMP_FIND_COMPONENTS})
|
||||
endif()
|
||||
|
||||
unset(_OpenMP_MIN_VERSION)
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
|
||||
foreach(LANG IN LISTS OpenMP_FINDLIST)
|
||||
if(CMAKE_${LANG}_COMPILER_LOADED)
|
||||
if (NOT OpenMP_${LANG}_SPEC_DATE AND OpenMP_${LANG}_FLAGS)
|
||||
_OPENMP_GET_SPEC_DATE("${LANG}" OpenMP_${LANG}_SPEC_DATE_INTERNAL)
|
||||
set(OpenMP_${LANG}_SPEC_DATE "${OpenMP_${LANG}_SPEC_DATE_INTERNAL}" CACHE
|
||||
INTERNAL "${LANG} compiler's OpenMP specification date")
|
||||
_OPENMP_SET_VERSION_BY_SPEC_DATE("${LANG}")
|
||||
endif()
|
||||
|
||||
set(OpenMP_${LANG}_FIND_QUIETLY ${OpenMP_FIND_QUIETLY})
|
||||
set(OpenMP_${LANG}_FIND_REQUIRED ${OpenMP_FIND_REQUIRED})
|
||||
set(OpenMP_${LANG}_FIND_VERSION ${OpenMP_FIND_VERSION})
|
||||
set(OpenMP_${LANG}_FIND_VERSION_EXACT ${OpenMP_FIND_VERSION_EXACT})
|
||||
|
||||
set(_OPENMP_${LANG}_REQUIRED_VARS OpenMP_${LANG}_FLAGS)
|
||||
if("${OpenMP_${LANG}_LIB_NAMES}" STREQUAL "NOTFOUND")
|
||||
set(_OPENMP_${LANG}_REQUIRED_LIB_VARS OpenMP_${LANG}_LIB_NAMES)
|
||||
else()
|
||||
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_LIB_NAMES)
|
||||
list(APPEND _OPENMP_${LANG}_REQUIRED_LIB_VARS OpenMP_${_OPENMP_IMPLICIT_LIB}_LIBRARY)
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
find_package_handle_standard_args(OpenMP_${LANG}
|
||||
REQUIRED_VARS OpenMP_${LANG}_FLAGS ${_OPENMP_${LANG}_REQUIRED_LIB_VARS}
|
||||
VERSION_VAR OpenMP_${LANG}_VERSION
|
||||
)
|
||||
|
||||
if(OpenMP_${LANG}_FOUND)
|
||||
if(DEFINED OpenMP_${LANG}_VERSION)
|
||||
if(NOT _OpenMP_MIN_VERSION OR _OpenMP_MIN_VERSION VERSION_GREATER OpenMP_${LANG}_VERSION)
|
||||
set(_OpenMP_MIN_VERSION OpenMP_${LANG}_VERSION)
|
||||
endif()
|
||||
endif()
|
||||
set(OpenMP_${LANG}_LIBRARIES "")
|
||||
foreach(_OPENMP_IMPLICIT_LIB IN LISTS OpenMP_${LANG}_LIB_NAMES)
|
||||
list(APPEND OpenMP_${LANG}_LIBRARIES "${OpenMP_${_OPENMP_IMPLICIT_LIB}_LIBRARY}")
|
||||
endforeach()
|
||||
|
||||
if(NOT TARGET OpenMP::OpenMP_${LANG})
|
||||
add_library(OpenMP::OpenMP_${LANG} INTERFACE IMPORTED)
|
||||
endif()
|
||||
if(OpenMP_${LANG}_FLAGS)
|
||||
separate_arguments(_OpenMP_${LANG}_OPTIONS NATIVE_COMMAND "${OpenMP_${LANG}_FLAGS}")
|
||||
set_property(TARGET OpenMP::OpenMP_${LANG} PROPERTY
|
||||
INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:${LANG}>:${_OpenMP_${LANG}_OPTIONS}>")
|
||||
unset(_OpenMP_${LANG}_OPTIONS)
|
||||
endif()
|
||||
if(OpenMP_${LANG}_LIBRARIES)
|
||||
set_property(TARGET OpenMP::OpenMP_${LANG} PROPERTY
|
||||
INTERFACE_LINK_LIBRARIES "${OpenMP_${LANG}_LIBRARIES}")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
unset(_OpenMP_REQ_VARS)
|
||||
foreach(LANG IN ITEMS C CXX Fortran)
|
||||
if((NOT OpenMP_FIND_COMPONENTS AND CMAKE_${LANG}_COMPILER_LOADED) OR LANG IN_LIST OpenMP_FIND_COMPONENTS)
|
||||
list(APPEND _OpenMP_REQ_VARS "OpenMP_${LANG}_FOUND")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
find_package_handle_standard_args(OpenMP
|
||||
REQUIRED_VARS ${_OpenMP_REQ_VARS}
|
||||
VERSION_VAR ${_OpenMP_MIN_VERSION}
|
||||
HANDLE_COMPONENTS)
|
||||
|
||||
set(OPENMP_FOUND ${OpenMP_FOUND})
|
||||
|
||||
if(CMAKE_Fortran_COMPILER_LOADED AND OpenMP_Fortran_FOUND)
|
||||
if(NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_MODULE)
|
||||
set(OpenMP_Fortran_HAVE_OMPLIB_MODULE FALSE CACHE BOOL INTERNAL "")
|
||||
endif()
|
||||
if(NOT DEFINED OpenMP_Fortran_HAVE_OMPLIB_HEADER)
|
||||
set(OpenMP_Fortran_HAVE_OMPLIB_HEADER FALSE CACHE BOOL INTERNAL "")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT ( CMAKE_C_COMPILER_LOADED OR CMAKE_CXX_COMPILER_LOADED OR CMAKE_Fortran_COMPILER_LOADED ))
|
||||
message(SEND_ERROR "FindOpenMP requires the C, CXX or Fortran languages to be enabled")
|
||||
endif()
|
||||
|
||||
unset(OpenMP_C_CXX_TEST_SOURCE)
|
||||
unset(OpenMP_Fortran_TEST_SOURCE)
|
||||
unset(OpenMP_C_CXX_CHECK_VERSION_SOURCE)
|
||||
unset(OpenMP_Fortran_CHECK_VERSION_SOURCE)
|
||||
unset(OpenMP_Fortran_INCLUDE_LINE)
|
||||
|
||||
cmake_policy(POP)
|
@ -27,6 +27,9 @@
|
||||
# vtkm::tbb Target that contains tbb related link information
|
||||
# implicitly linked to by `vtkm_cont` if tbb is enabled
|
||||
#
|
||||
# vtkm::openmp Target that contains openmp related link information
|
||||
# implicitly linked to by `vtkm_cont` if openmp is enabled
|
||||
#
|
||||
# vtkm::cuda Target that contains cuda related link information
|
||||
# implicitly linked to by `vtkm_cont` if cuda is enabled
|
||||
#
|
||||
@ -41,6 +44,7 @@
|
||||
# VTKm_BUILD_SHARED_LIBS Will be enabled if VTK-m was built shared/dynamic
|
||||
# VTKm_ENABLE_CUDA Will be enabled if VTK-m was built with CUDA support
|
||||
# VTKm_ENABLE_TBB Will be enabled if VTK-m was built with TBB support
|
||||
# VTKm_ENABLE_OPENMP Will be enabled if VTK-m was built with OpenMP support
|
||||
# VTKm_ENABLE_MPI Will be enabled if VTK-m was built with MPI support
|
||||
# VTKm_ENABLE_RENDERING Will be enabled if VTK-m was built with rendering support
|
||||
# VTKm_ENABLE_GL_CONTEXT Will be enabled if VTK-m rendering was built with a GL context
|
||||
@ -63,6 +67,7 @@ set(VTKm_VERSION "@VTKm_VERSION@")
|
||||
set(VTKm_BUILD_SHARED_LIBS "@VTKm_BUILD_SHARED_LIBS@")
|
||||
set(VTKm_ENABLE_CUDA "@VTKm_ENABLE_CUDA@")
|
||||
set(VTKm_ENABLE_TBB "@VTKm_ENABLE_TBB@")
|
||||
set(VTKm_ENABLE_OPENMP "@VTKm_ENABLE_OPENMP@")
|
||||
set(VTKm_ENABLE_RENDERING "@VTKm_ENABLE_RENDERING@")
|
||||
set(VTKm_ENABLE_GL_CONTEXT "@VTKm_ENABLE_GL_CONTEXT@")
|
||||
set(VTKm_ENABLE_OSMESA_CONTEXT "@VTKm_ENABLE_OSMESA_CONTEXT@")
|
||||
|
@ -47,6 +47,19 @@ if(VTKm_ENABLE_TBB AND NOT TARGET vtkm::tbb)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(VTKm_ENABLE_OPENMP AND NOT TARGET vtkm::openmp)
|
||||
find_package(OpenMP 4.5 REQUIRED COMPONENTS CXX QUIET)
|
||||
|
||||
add_library(vtkm::openmp INTERFACE IMPORTED GLOBAL)
|
||||
if(OpenMP_CXX_FLAGS)
|
||||
set_target_properties(vtkm::openmp PROPERTIES
|
||||
INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CXX>:${OpenMP_CXX_FLAGS}>")
|
||||
endif()
|
||||
if(OpenMP_CXX_LIBRARIES)
|
||||
set_target_properties(vtkm::openmp PROPERTIES
|
||||
INTERFACE_LINK_LIBRARIES "${OpenMP_CXX_LIBRARIES}")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(VTKm_ENABLE_CUDA AND NOT TARGET vtkm::cuda)
|
||||
cmake_minimum_required(VERSION 3.9 FATAL_ERROR)
|
||||
|
@ -86,6 +86,7 @@ endmacro ()
|
||||
# Configurable Options
|
||||
vtkm_option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
|
||||
vtkm_option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
|
||||
vtkm_option(VTKm_ENABLE_OPENMP "Enable OpenMP support" OFF)
|
||||
vtkm_option(VTKm_ENABLE_RENDERING "Enable rendering library" ON)
|
||||
vtkm_option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON)
|
||||
vtkm_option(VTKm_ENABLE_BENCHMARKS "Enable VTKm Benchmarking" OFF)
|
||||
@ -266,6 +267,7 @@ if(NOT VTKm_INSTALL_ONLY_LIBRARIES)
|
||||
FILES
|
||||
${VTKm_SOURCE_DIR}/CMake/FindTBB.cmake
|
||||
${VTKm_SOURCE_DIR}/CMake/FindOpenGL.cmake
|
||||
${VTKm_SOURCE_DIR}/CMake/FindOpenMP.cmake
|
||||
DESTINATION ${VTKm_INSTALL_CMAKE_MODULE_DIR}
|
||||
)
|
||||
|
||||
|
@ -73,6 +73,8 @@ Optional dependencies are:
|
||||
+ [Cuda Toolkit 7+](https://developer.nvidia.com/cuda-toolkit)
|
||||
+ TBB Device Adapter
|
||||
+ [TBB](https://www.threadingbuildingblocks.org/)
|
||||
+ OpenMP Device Adapter
|
||||
+ Requires a compiler that supports OpenMP >= 4.5.
|
||||
+ OpenGL Rendering
|
||||
+ The rendering module contains multiple rendering implementations
|
||||
including standalone rendering code. The rendering module also
|
||||
|
@ -46,7 +46,9 @@
|
||||
|
||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
||||
#include <tbb/task_scheduler_init.h>
|
||||
#endif // TBB
|
||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
// This benchmark has a number of commandline options to customize its behavior.
|
||||
// See The BenchDevAlgoConfig documentations for details.
|
||||
@ -1193,6 +1195,8 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
||||
int numThreads = tbb::task_scheduler_init::automatic;
|
||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||
int numThreads = omp_get_max_threads();
|
||||
#endif // TBB
|
||||
|
||||
vtkm::benchmarking::BenchDevAlgoConfig& config = vtkm::benchmarking::Config;
|
||||
@ -1323,8 +1327,12 @@ int main(int argc, char* argv[])
|
||||
std::istringstream parse(argv[i]);
|
||||
parse >> numThreads;
|
||||
std::cout << "Selected " << numThreads << " TBB threads." << std::endl;
|
||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||
std::istringstream parse(argv[i]);
|
||||
parse >> numThreads;
|
||||
std::cout << "Selected " << numThreads << " OpenMP threads." << std::endl;
|
||||
#else
|
||||
std::cerr << "NumThreads valid only on TBB. Ignoring." << std::endl;
|
||||
std::cerr << "NumThreads not valid on this device. Ignoring." << std::endl;
|
||||
#endif // TBB
|
||||
}
|
||||
else
|
||||
@ -1337,6 +1345,8 @@ int main(int argc, char* argv[])
|
||||
#if VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
|
||||
// Must not be destroyed as long as benchmarks are running:
|
||||
tbb::task_scheduler_init init(numThreads);
|
||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||
omp_set_num_threads(numThreads);
|
||||
#endif // TBB
|
||||
|
||||
if (config.BenchmarkFlags == 0)
|
||||
|
@ -30,6 +30,12 @@ function(add_benchmark name files)
|
||||
target_compile_definitions(${name}_TBB PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_TBB")
|
||||
endif()
|
||||
|
||||
if (TARGET vtkm::openmp)
|
||||
add_executable(${name}_OPENMP ${files})
|
||||
list(APPEND benchmarks ${name}_OPENMP)
|
||||
target_compile_definitions(${name}_OPENMP PRIVATE "VTKM_DEVICE_ADAPTER=VTKM_DEVICE_ADAPTER_OPENMP")
|
||||
endif()
|
||||
|
||||
if (TARGET vtkm::cuda)
|
||||
get_filename_component(fname "${name}" NAME_WE)
|
||||
get_filename_component(fullpath "${name}.cxx" ABSOLUTE)
|
||||
@ -70,6 +76,9 @@ if(TARGET vtkm_rendering)
|
||||
if(TARGET BenchmarkRayTracing_TBB)
|
||||
target_link_libraries(BenchmarkRayTracing_TBB PRIVATE vtkm_rendering)
|
||||
endif()
|
||||
if(TARGET BenchmarkRayTracing_OPENMP)
|
||||
target_link_libraries(BenchmarkRayTracing_OPENMP PRIVATE vtkm_rendering)
|
||||
endif()
|
||||
if(TARGET BenchmarkRayTracing_CUDA)
|
||||
target_link_libraries(BenchmarkRayTracing_CUDA PRIVATE vtkm_rendering)
|
||||
endif()
|
||||
|
5
docs/changelog/openmp-backend.md
Normal file
5
docs/changelog/openmp-backend.md
Normal file
@ -0,0 +1,5 @@
|
||||
# OpenMP Device Adapter
|
||||
|
||||
A device adapter that leverages OpenMP 4.5 for threading is now available. The
|
||||
new adapter is enabled using the CMake option `VTKm_ENABLE_OPENMP` and its
|
||||
performance is comparable to the TBB device adapter.
|
@ -151,6 +151,7 @@ add_subdirectory(arg)
|
||||
add_subdirectory(diy)
|
||||
add_subdirectory(serial)
|
||||
add_subdirectory(tbb)
|
||||
add_subdirectory(openmp)
|
||||
add_subdirectory(cuda)
|
||||
|
||||
set(backends )
|
||||
@ -160,6 +161,9 @@ endif()
|
||||
if(TARGET vtkm::cuda)
|
||||
list(APPEND backends vtkm::cuda)
|
||||
endif()
|
||||
if(TARGET vtkm::openmp)
|
||||
list(APPEND backends vtkm::openmp)
|
||||
endif()
|
||||
target_link_libraries(vtkm_cont PUBLIC vtkm_compiler_flags ${backends})
|
||||
if(TARGET vtkm_diy)
|
||||
# This will become a required dependency eventually.
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <vtkm/ListTag.h>
|
||||
|
||||
#include <vtkm/cont/cuda/DeviceAdapterCuda.h>
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
|
||||
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
|
||||
|
||||
@ -37,6 +38,7 @@ namespace cont
|
||||
|
||||
struct DeviceAdapterListTagCommon : vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagCuda,
|
||||
vtkm::cont::DeviceAdapterTagTBB,
|
||||
vtkm::cont::DeviceAdapterTagOpenMP,
|
||||
vtkm::cont::DeviceAdapterTagSerial>
|
||||
{
|
||||
};
|
||||
|
@ -36,7 +36,9 @@
|
||||
// Unfortunately, VTKM_ENABLE_TBB does not guarantee that TBB is (or isn't)
|
||||
// available, but there is no way to check for sure in a header library.
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_TBB
|
||||
#else // !VTKM_CUDA && !VTKM_ENABLE_TBB
|
||||
#elif defined(VTKM_ENABLE_OPENMP)
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_OPENMP
|
||||
#else // !VTKM_CUDA && !VTKM_ENABLE_TBB && !VTKM_ENABLE_OPENMP
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_SERIAL
|
||||
#endif // !VTKM_CUDA && !VTKM_ENABLE_TBB
|
||||
#endif // VTKM_DEVICE_ADAPTER
|
||||
@ -68,6 +70,14 @@
|
||||
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
|
||||
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagTBB
|
||||
|
||||
// OpenMP:
|
||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
|
||||
|
||||
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagOpenMP
|
||||
|
||||
// Error:
|
||||
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
|
@ -32,6 +32,7 @@
|
||||
#define VTKM_DEVICE_ADAPTER_SERIAL 1
|
||||
#define VTKM_DEVICE_ADAPTER_CUDA 2
|
||||
#define VTKM_DEVICE_ADAPTER_TBB 3
|
||||
#define VTKM_DEVICE_ADAPTER_OPENMP 4
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
|
32
vtkm/cont/openmp/CMakeLists.txt
Normal file
32
vtkm/cont/openmp/CMakeLists.txt
Normal file
@ -0,0 +1,32 @@
|
||||
##============================================================================
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
## Copyright 2018 UT-Battelle, LLC.
|
||||
## Copyright 2018 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
##
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##============================================================================
|
||||
|
||||
set(headers
|
||||
DeviceAdapterOpenMP.h
|
||||
)
|
||||
|
||||
add_subdirectory(internal)
|
||||
|
||||
vtkm_declare_headers(${headers} TESTABLE ${VTKm_ENABLE_OPENMP})
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
if (TARGET vtkm::openmp)
|
||||
add_subdirectory(testing)
|
||||
endif()
|
33
vtkm/cont/openmp/DeviceAdapterOpenMP.h
Normal file
33
vtkm/cont/openmp/DeviceAdapterOpenMP.h
Normal file
@ -0,0 +1,33 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_cont_openmp_DeviceAdapterOpenMP_h
|
||||
#define vtk_m_cont_openmp_DeviceAdapterOpenMP_h
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterRuntimeDetectorOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
|
||||
#ifdef VTKM_ENABLE_OPENMP
|
||||
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h>
|
||||
#endif
|
||||
|
||||
#endif //vtk_m_cont_openmp_DeviceAdapterOpenMP_h
|
42
vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.cxx
Normal file
42
vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.cxx
Normal file
@ -0,0 +1,42 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
|
||||
|
||||
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
ExecutionArrayInterfaceBasic<DeviceAdapterTagOpenMP>::ExecutionArrayInterfaceBasic(
|
||||
StorageBasicBase& storage)
|
||||
: Superclass(storage)
|
||||
{
|
||||
}
|
||||
|
||||
} // end namespace internal
|
||||
|
||||
VTKM_INSTANTIATE_ARRAYHANDLES_FOR_DEVICE_ADAPTER(DeviceAdapterTagOpenMP)
|
||||
}
|
||||
} // end vtkm::cont
|
109
vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h
Normal file
109
vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h
Normal file
@ -0,0 +1,109 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h
|
||||
#define vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h
|
||||
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
|
||||
#include <vtkm/cont/internal/ArrayExportMacros.h>
|
||||
#include <vtkm/cont/internal/ArrayManagerExecution.h>
|
||||
#include <vtkm/cont/internal/ArrayManagerExecutionShareWithControl.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template <typename T, class StorageTag>
|
||||
class ArrayManagerExecution<T, StorageTag, vtkm::cont::DeviceAdapterTagOpenMP>
|
||||
: public vtkm::cont::internal::ArrayManagerExecutionShareWithControl<T, StorageTag>
|
||||
{
|
||||
public:
|
||||
using Superclass = vtkm::cont::internal::ArrayManagerExecutionShareWithControl<T, StorageTag>;
|
||||
using ValueType = typename Superclass::ValueType;
|
||||
using PortalType = typename Superclass::PortalType;
|
||||
using PortalConstType = typename Superclass::PortalConstType;
|
||||
using StorageType = typename Superclass::StorageType;
|
||||
|
||||
VTKM_CONT
|
||||
ArrayManagerExecution(StorageType* storage)
|
||||
: Superclass(storage)
|
||||
{
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
PortalConstType PrepareForInput(bool updateData)
|
||||
{
|
||||
return this->Superclass::PrepareForInput(updateData);
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
PortalType PrepareForInPlace(bool updateData)
|
||||
{
|
||||
return this->Superclass::PrepareForInPlace(updateData);
|
||||
}
|
||||
|
||||
VTKM_CONT
|
||||
PortalType PrepareForOutput(vtkm::Id numberOfValues)
|
||||
{
|
||||
return this->Superclass::PrepareForOutput(numberOfValues);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct ExecutionPortalFactoryBasic<T, DeviceAdapterTagOpenMP>
|
||||
: public ExecutionPortalFactoryBasicShareWithControl<T>
|
||||
{
|
||||
using Superclass = ExecutionPortalFactoryBasicShareWithControl<T>;
|
||||
|
||||
using typename Superclass::ValueType;
|
||||
using typename Superclass::PortalType;
|
||||
using typename Superclass::PortalConstType;
|
||||
using Superclass::CreatePortal;
|
||||
using Superclass::CreatePortalConst;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct VTKM_CONT_EXPORT ExecutionArrayInterfaceBasic<DeviceAdapterTagOpenMP>
|
||||
: public ExecutionArrayInterfaceBasicShareWithControl
|
||||
{
|
||||
using Superclass = ExecutionArrayInterfaceBasicShareWithControl;
|
||||
|
||||
VTKM_CONT
|
||||
ExecutionArrayInterfaceBasic(StorageBasicBase& storage);
|
||||
|
||||
VTKM_CONT
|
||||
DeviceAdapterId GetDeviceId() const final { return VTKM_DEVICE_ADAPTER_OPENMP; }
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
#ifndef vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
|
||||
VTKM_EXPORT_ARRAYHANDLES_FOR_DEVICE_ADAPTER(DeviceAdapterTagOpenMP)
|
||||
#endif // !vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_cxx
|
||||
}
|
||||
} // namespace vtkm::cont
|
||||
|
||||
#endif // vtk_m_cont_openmp_internal_ArrayManagerExecutionOpenMP_h
|
49
vtkm/cont/openmp/internal/CMakeLists.txt
Normal file
49
vtkm/cont/openmp/internal/CMakeLists.txt
Normal file
@ -0,0 +1,49 @@
|
||||
##============================================================================
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
## Copyright 2018 UT-Battelle, LLC.
|
||||
## Copyright 2018 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
##
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##============================================================================
|
||||
|
||||
set(headers
|
||||
ArrayManagerExecutionOpenMP.h
|
||||
DeviceAdapterAlgorithmOpenMP.h
|
||||
DeviceAdapterRuntimeDetectorOpenMP.h
|
||||
DeviceAdapterTagOpenMP.h
|
||||
FunctorsOpenMP.h
|
||||
ParallelQuickSortOpenMP.h
|
||||
ParallelRadixSortOpenMP.h
|
||||
ParallelScanOpenMP.h
|
||||
ParallelSortOpenMP.h
|
||||
VirtualObjectTransferOpenMP.h
|
||||
)
|
||||
|
||||
vtkm_declare_headers(${headers} TESTABLE ${VTKm_ENABLE_OPENMP})
|
||||
|
||||
target_sources(vtkm_cont PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterRuntimeDetectorOpenMP.cxx
|
||||
)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
if (NOT VTKm_ENABLE_OPENMP)
|
||||
return()
|
||||
endif()
|
||||
|
||||
target_sources(vtkm_cont PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ArrayManagerExecutionOpenMP.cxx
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/DeviceAdapterAlgorithmOpenMP.cxx
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/ParallelRadixSortOpenMP.cxx
|
||||
)
|
140
vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.cxx
Normal file
140
vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.cxx
Normal file
@ -0,0 +1,140 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||
|
||||
#include <vtkm/cont/ErrorExecution.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
|
||||
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>::ScheduleTask(
|
||||
vtkm::exec::openmp::internal::TaskTiling1D& functor,
|
||||
vtkm::Id size)
|
||||
{
|
||||
static constexpr vtkm::Id MESSAGE_SIZE = 1024;
|
||||
char errorString[MESSAGE_SIZE];
|
||||
errorString[0] = '\0';
|
||||
vtkm::exec::internal::ErrorMessageBuffer errorMessage(errorString, MESSAGE_SIZE);
|
||||
functor.SetErrorMessageBuffer(errorMessage);
|
||||
|
||||
static constexpr vtkm::Id CHUNK_SIZE = 1024;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel for
|
||||
schedule(guided))
|
||||
for (vtkm::Id i = 0; i < size; i += CHUNK_SIZE)
|
||||
{
|
||||
const vtkm::Id end = std::min(i + CHUNK_SIZE, size);
|
||||
functor(i, end);
|
||||
}
|
||||
|
||||
if (errorMessage.IsErrorRaised())
|
||||
{
|
||||
throw vtkm::cont::ErrorExecution(errorString);
|
||||
}
|
||||
}
|
||||
|
||||
void DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>::ScheduleTask(
|
||||
vtkm::exec::openmp::internal::TaskTiling3D& functor,
|
||||
vtkm::Id3 size)
|
||||
{
|
||||
static constexpr vtkm::Id MESSAGE_SIZE = 1024;
|
||||
char errorString[MESSAGE_SIZE];
|
||||
errorString[0] = '\0';
|
||||
vtkm::exec::internal::ErrorMessageBuffer errorMessage(errorString, MESSAGE_SIZE);
|
||||
functor.SetErrorMessageBuffer(errorMessage);
|
||||
|
||||
vtkm::Id3 chunkDims;
|
||||
if (size[0] > 512)
|
||||
{
|
||||
chunkDims = { 1024, 4, 1 };
|
||||
}
|
||||
else if (size[0] > 256)
|
||||
{
|
||||
chunkDims = { 512, 4, 2 };
|
||||
}
|
||||
else if (size[0] > 128)
|
||||
{
|
||||
chunkDims = { 256, 4, 4 };
|
||||
}
|
||||
else if (size[0] > 64)
|
||||
{
|
||||
chunkDims = { 128, 8, 4 };
|
||||
}
|
||||
else if (size[0] > 32)
|
||||
{
|
||||
chunkDims = { 64, 8, 8 };
|
||||
}
|
||||
else if (size[0] > 16)
|
||||
{
|
||||
chunkDims = { 32, 16, 8 };
|
||||
}
|
||||
else
|
||||
{
|
||||
chunkDims = { 16, 16, 16 };
|
||||
}
|
||||
|
||||
const vtkm::Id3 numChunks{ openmp::CeilDivide(size[0], chunkDims[0]),
|
||||
openmp::CeilDivide(size[1], chunkDims[1]),
|
||||
openmp::CeilDivide(size[2], chunkDims[2]) };
|
||||
const vtkm::Id chunkCount = numChunks[0] * numChunks[1] * numChunks[2];
|
||||
|
||||
// Lambda to convert chunkIdx into a start/end {i, j, k}:
|
||||
auto computeIJK = [&](const vtkm::Id& chunkIdx, vtkm::Id3& start, vtkm::Id3& end) {
|
||||
start[0] = chunkIdx % numChunks[0];
|
||||
start[1] = (chunkIdx / numChunks[0]) % numChunks[1];
|
||||
start[2] = (chunkIdx / (numChunks[0] * numChunks[1]));
|
||||
start *= chunkDims; // c-wise mult
|
||||
|
||||
end[0] = std::min(start[0] + chunkDims[0], size[0]);
|
||||
end[1] = std::min(start[1] + chunkDims[1], size[1]);
|
||||
end[2] = std::min(start[2] + chunkDims[2], size[2]);
|
||||
};
|
||||
|
||||
// Iterate through each chunk, converting the chunkIdx into an ijk range:
|
||||
VTKM_OPENMP_DIRECTIVE(parallel for
|
||||
schedule(guided))
|
||||
for (vtkm::Id chunkIdx = 0; chunkIdx < chunkCount; ++chunkIdx)
|
||||
{
|
||||
vtkm::Id3 startIJK;
|
||||
vtkm::Id3 endIJK;
|
||||
computeIJK(chunkIdx, startIJK, endIJK);
|
||||
|
||||
for (vtkm::Id k = startIJK[2]; k < endIJK[2]; ++k)
|
||||
{
|
||||
for (vtkm::Id j = startIJK[1]; j < endIJK[1]; ++j)
|
||||
{
|
||||
functor(startIJK[0], endIJK[0], j, k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (errorMessage.IsErrorRaised())
|
||||
{
|
||||
throw vtkm::cont::ErrorExecution(errorString);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont
|
385
vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h
Normal file
385
vtkm/cont/openmp/internal/DeviceAdapterAlgorithmOpenMP.h
Normal file
@ -0,0 +1,385 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h
|
||||
#define vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h
|
||||
|
||||
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
|
||||
#include <vtkm/cont/Error.h>
|
||||
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/ParallelScanOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/ParallelSortOpenMP.h>
|
||||
#include <vtkm/exec/openmp/internal/TaskTilingOpenMP.h>
|
||||
|
||||
// For serial fallback:
|
||||
#include <vtkm/cont/serial/DeviceAdapterSerial.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
|
||||
template <>
|
||||
struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>
|
||||
: vtkm::cont::internal::DeviceAdapterAlgorithmGeneral<
|
||||
DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagOpenMP>,
|
||||
vtkm::cont::DeviceAdapterTagOpenMP>
|
||||
{
|
||||
using DevTag = DeviceAdapterTagOpenMP;
|
||||
using SerialAlgo = DeviceAdapterAlgorithm<DeviceAdapterTagSerial>;
|
||||
|
||||
public:
|
||||
template <typename T, typename U, class CIn, class COut>
|
||||
VTKM_CONT static void Copy(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
vtkm::cont::ArrayHandle<U, COut>& output)
|
||||
{
|
||||
using namespace vtkm::cont::openmp;
|
||||
|
||||
const vtkm::Id inSize = input.GetNumberOfValues();
|
||||
if (inSize == 0)
|
||||
{
|
||||
output.Allocate(0);
|
||||
return;
|
||||
}
|
||||
auto inputPortal = input.PrepareForInput(DevTag());
|
||||
auto outputPortal = output.PrepareForOutput(inSize, DevTag());
|
||||
CopyHelper(inputPortal, outputPortal, 0, 0, inSize);
|
||||
}
|
||||
|
||||
template <typename T, typename U, class CIn, class CStencil, class COut>
|
||||
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
|
||||
vtkm::cont::ArrayHandle<T, COut>& output)
|
||||
{
|
||||
::vtkm::NotZeroInitialized unary_predicate;
|
||||
CopyIf(input, stencil, output, unary_predicate);
|
||||
}
|
||||
|
||||
template <typename T, typename U, class CIn, class CStencil, class COut, class UnaryPredicate>
|
||||
VTKM_CONT static void CopyIf(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
const vtkm::cont::ArrayHandle<U, CStencil>& stencil,
|
||||
vtkm::cont::ArrayHandle<T, COut>& output,
|
||||
UnaryPredicate unary_predicate)
|
||||
{
|
||||
using namespace vtkm::cont::openmp;
|
||||
|
||||
vtkm::Id inSize = input.GetNumberOfValues();
|
||||
if (inSize == 0)
|
||||
{
|
||||
output.Allocate(0);
|
||||
return;
|
||||
}
|
||||
auto inputPortal = input.PrepareForInput(DevTag());
|
||||
auto stencilPortal = stencil.PrepareForInput(DevTag());
|
||||
auto outputPortal = output.PrepareForOutput(inSize, DevTag());
|
||||
|
||||
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(inputPortal);
|
||||
auto stencilIter = vtkm::cont::ArrayPortalToIteratorBegin(stencilPortal);
|
||||
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outputPortal);
|
||||
|
||||
CopyIfHelper helper;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
|
||||
{
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(single)
|
||||
{
|
||||
// Calls omp_get_num_threads, thus must be used inside a parallel section.
|
||||
helper.Initialize(inSize, sizeof(T));
|
||||
}
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(for schedule(static))
|
||||
for (vtkm::Id i = 0; i < helper.NumChunks; ++i)
|
||||
{
|
||||
helper.CopyIf(inIter, stencilIter, outIter, unary_predicate, i);
|
||||
}
|
||||
}
|
||||
|
||||
vtkm::Id numValues = helper.Reduce(outIter);
|
||||
output.Shrink(numValues);
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename U, class CIn, class COut>
|
||||
VTKM_CONT static bool CopySubRange(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
vtkm::Id inputStartIndex,
|
||||
vtkm::Id numberOfValuesToCopy,
|
||||
vtkm::cont::ArrayHandle<U, COut>& output,
|
||||
vtkm::Id outputIndex = 0)
|
||||
{
|
||||
using namespace vtkm::cont::openmp;
|
||||
|
||||
const vtkm::Id inSize = input.GetNumberOfValues();
|
||||
|
||||
// Check if the ranges overlap and fail if they do.
|
||||
if (input == output &&
|
||||
((outputIndex >= inputStartIndex && outputIndex < inputStartIndex + numberOfValuesToCopy) ||
|
||||
(inputStartIndex >= outputIndex && inputStartIndex < outputIndex + numberOfValuesToCopy)))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (inputStartIndex < 0 || numberOfValuesToCopy < 0 || outputIndex < 0 ||
|
||||
inputStartIndex >= inSize)
|
||||
{ //invalid parameters
|
||||
return false;
|
||||
}
|
||||
|
||||
//determine if the numberOfElementsToCopy needs to be reduced
|
||||
if (inSize < (inputStartIndex + numberOfValuesToCopy))
|
||||
{ //adjust the size
|
||||
numberOfValuesToCopy = (inSize - inputStartIndex);
|
||||
}
|
||||
|
||||
const vtkm::Id outSize = output.GetNumberOfValues();
|
||||
const vtkm::Id copyOutEnd = outputIndex + numberOfValuesToCopy;
|
||||
if (outSize < copyOutEnd)
|
||||
{ //output is not large enough
|
||||
if (outSize == 0)
|
||||
{ //since output has nothing, just need to allocate to correct length
|
||||
output.Allocate(copyOutEnd);
|
||||
}
|
||||
else
|
||||
{ //we currently have data in this array, so preserve it in the new
|
||||
//resized array
|
||||
vtkm::cont::ArrayHandle<U, COut> temp;
|
||||
temp.Allocate(copyOutEnd);
|
||||
CopySubRange(output, 0, outSize, temp);
|
||||
output = temp;
|
||||
}
|
||||
}
|
||||
|
||||
auto inputPortal = input.PrepareForInput(DevTag());
|
||||
auto outputPortal = output.PrepareForInPlace(DevTag());
|
||||
|
||||
CopyHelper(inputPortal, outputPortal, inputStartIndex, outputIndex, numberOfValuesToCopy);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T, typename U, class CIn>
|
||||
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input, U initialValue)
|
||||
{
|
||||
return Reduce(input, initialValue, vtkm::Add());
|
||||
}
|
||||
|
||||
template <typename T, typename U, class CIn, class BinaryFunctor>
|
||||
VTKM_CONT static U Reduce(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
U initialValue,
|
||||
BinaryFunctor binary_functor)
|
||||
{
|
||||
using namespace vtkm::cont::openmp;
|
||||
|
||||
auto portal = input.PrepareForInput(DevTag());
|
||||
const OpenMPReductionSupported<typename std::decay<U>::type> fastPath;
|
||||
|
||||
return ReduceHelper::Execute(portal, initialValue, binary_functor, fastPath);
|
||||
}
|
||||
|
||||
template <typename T,
|
||||
typename U,
|
||||
class CKeyIn,
|
||||
class CValIn,
|
||||
class CKeyOut,
|
||||
class CValOut,
|
||||
class BinaryFunctor>
|
||||
VTKM_CONT static void ReduceByKey(const vtkm::cont::ArrayHandle<T, CKeyIn>& keys,
|
||||
const vtkm::cont::ArrayHandle<U, CValIn>& values,
|
||||
vtkm::cont::ArrayHandle<T, CKeyOut>& keys_output,
|
||||
vtkm::cont::ArrayHandle<U, CValOut>& values_output,
|
||||
BinaryFunctor func)
|
||||
{
|
||||
openmp::ReduceByKeyHelper(keys, values, keys_output, values_output, func);
|
||||
}
|
||||
|
||||
template <typename T, class CIn, class COut>
|
||||
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
vtkm::cont::ArrayHandle<T, COut>& output)
|
||||
{
|
||||
return ScanInclusive(input, output, vtkm::Add());
|
||||
}
|
||||
|
||||
template <typename T, class CIn, class COut, class BinaryFunctor>
|
||||
VTKM_CONT static T ScanInclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
vtkm::cont::ArrayHandle<T, COut>& output,
|
||||
BinaryFunctor binaryFunctor)
|
||||
{
|
||||
if (input.GetNumberOfValues() * sizeof(T) <= openmp::PAGE_SIZE)
|
||||
{
|
||||
return SerialAlgo::ScanInclusive(input, output, binaryFunctor);
|
||||
}
|
||||
|
||||
using InPortalT = decltype(input.PrepareForInput(DevTag()));
|
||||
using OutPortalT = decltype(output.PrepareForOutput(0, DevTag()));
|
||||
using Impl = openmp::ScanInclusiveHelper<InPortalT, OutPortalT, BinaryFunctor>;
|
||||
|
||||
vtkm::Id numVals = input.GetNumberOfValues();
|
||||
Impl impl(
|
||||
input.PrepareForInput(DevTag()), output.PrepareForOutput(numVals, DevTag()), binaryFunctor);
|
||||
|
||||
return impl.Execute(vtkm::Id2(0, numVals));
|
||||
}
|
||||
|
||||
template <typename T, class CIn, class COut>
|
||||
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
vtkm::cont::ArrayHandle<T, COut>& output)
|
||||
{
|
||||
return ScanExclusive(input, output, vtkm::Add(), vtkm::TypeTraits<T>::ZeroInitialization());
|
||||
}
|
||||
|
||||
template <typename T, class CIn, class COut, class BinaryFunctor>
|
||||
VTKM_CONT static T ScanExclusive(const vtkm::cont::ArrayHandle<T, CIn>& input,
|
||||
vtkm::cont::ArrayHandle<T, COut>& output,
|
||||
BinaryFunctor binaryFunctor,
|
||||
const T& initialValue)
|
||||
{
|
||||
if (input.GetNumberOfValues() * sizeof(T) <= openmp::PAGE_SIZE)
|
||||
{
|
||||
return SerialAlgo::ScanExclusive(input, output, binaryFunctor, initialValue);
|
||||
}
|
||||
|
||||
using InPortalT = decltype(input.PrepareForInput(DevTag()));
|
||||
using OutPortalT = decltype(output.PrepareForOutput(0, DevTag()));
|
||||
using Impl = openmp::ScanExclusiveHelper<InPortalT, OutPortalT, BinaryFunctor>;
|
||||
|
||||
vtkm::Id numVals = input.GetNumberOfValues();
|
||||
Impl impl(input.PrepareForInput(DevTag()),
|
||||
output.PrepareForOutput(numVals, DevTag()),
|
||||
binaryFunctor,
|
||||
initialValue);
|
||||
|
||||
return impl.Execute(vtkm::Id2(0, numVals));
|
||||
}
|
||||
|
||||
/// \brief Unstable ascending sort of input array.
|
||||
///
|
||||
/// Sorts the contents of \c values so that they in ascending value. Doesn't
|
||||
/// guarantee stability
|
||||
///
|
||||
template <typename T, class Storage>
|
||||
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values)
|
||||
{
|
||||
Sort(values, vtkm::SortLess());
|
||||
}
|
||||
|
||||
template <typename T, class Storage, class BinaryCompare>
|
||||
VTKM_CONT static void Sort(vtkm::cont::ArrayHandle<T, Storage>& values,
|
||||
BinaryCompare binary_compare)
|
||||
{
|
||||
openmp::sort::parallel_sort(values, binary_compare);
|
||||
}
|
||||
|
||||
template <typename T, typename U, class StorageT, class StorageU>
|
||||
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||
vtkm::cont::ArrayHandle<U, StorageU>& values)
|
||||
{
|
||||
SortByKey(keys, values, std::less<T>());
|
||||
}
|
||||
|
||||
template <typename T, typename U, class StorageT, class StorageU, class BinaryCompare>
|
||||
VTKM_CONT static void SortByKey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||
vtkm::cont::ArrayHandle<U, StorageU>& values,
|
||||
BinaryCompare binary_compare)
|
||||
{
|
||||
openmp::sort::parallel_sort_bykey(keys, values, binary_compare);
|
||||
}
|
||||
|
||||
template <typename T, class Storage>
|
||||
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values)
|
||||
{
|
||||
Unique(values, std::equal_to<T>());
|
||||
}
|
||||
|
||||
template <typename T, class Storage, class BinaryCompare>
|
||||
VTKM_CONT static void Unique(vtkm::cont::ArrayHandle<T, Storage>& values,
|
||||
BinaryCompare binary_compare)
|
||||
{
|
||||
auto portal = values.PrepareForInPlace(DevTag());
|
||||
auto iter = vtkm::cont::ArrayPortalToIteratorBegin(portal);
|
||||
|
||||
using IterT = typename std::decay<decltype(iter)>::type;
|
||||
using Uniqifier = openmp::UniqueHelper<IterT, BinaryCompare>;
|
||||
|
||||
Uniqifier uniquifier(iter, portal.GetNumberOfValues(), binary_compare);
|
||||
vtkm::Id outSize = uniquifier.Execute();
|
||||
values.Shrink(outSize);
|
||||
}
|
||||
|
||||
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::openmp::internal::TaskTiling1D& functor,
|
||||
vtkm::Id size);
|
||||
VTKM_CONT_EXPORT static void ScheduleTask(vtkm::exec::openmp::internal::TaskTiling3D& functor,
|
||||
vtkm::Id3 size);
|
||||
|
||||
template <class FunctorType>
|
||||
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id numInstances)
|
||||
{
|
||||
vtkm::exec::openmp::internal::TaskTiling1D kernel(functor);
|
||||
ScheduleTask(kernel, numInstances);
|
||||
}
|
||||
|
||||
template <class FunctorType>
|
||||
VTKM_CONT static inline void Schedule(FunctorType functor, vtkm::Id3 rangeMax)
|
||||
{
|
||||
vtkm::exec::openmp::internal::TaskTiling3D kernel(functor);
|
||||
ScheduleTask(kernel, rangeMax);
|
||||
}
|
||||
|
||||
VTKM_CONT static void Synchronize()
|
||||
{
|
||||
// Nothing to do. This device schedules all of its operations using a
|
||||
// split/join paradigm. This means that the if the control thread is
|
||||
// calling this method, then nothing should be running in the execution
|
||||
// environment.
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class DeviceTaskTypes<vtkm::cont::DeviceAdapterTagOpenMP>
|
||||
{
|
||||
public:
|
||||
template <typename WorkletType, typename InvocationType>
|
||||
static vtkm::exec::serial::internal::TaskTiling1D MakeTask(const WorkletType& worklet,
|
||||
const InvocationType& invocation,
|
||||
vtkm::Id,
|
||||
vtkm::Id globalIndexOffset = 0)
|
||||
{
|
||||
return vtkm::exec::openmp::internal::TaskTiling1D(worklet, invocation, globalIndexOffset);
|
||||
}
|
||||
|
||||
template <typename WorkletType, typename InvocationType>
|
||||
static vtkm::exec::serial::internal::TaskTiling3D MakeTask(const WorkletType& worklet,
|
||||
const InvocationType& invocation,
|
||||
vtkm::Id3,
|
||||
vtkm::Id globalIndexOffset = 0)
|
||||
{
|
||||
return vtkm::exec::openmp::internal::TaskTiling3D(worklet, invocation, globalIndexOffset);
|
||||
}
|
||||
};
|
||||
}
|
||||
} // namespace vtkm::cont
|
||||
|
||||
#endif //vtk_m_cont_openmp_internal_DeviceAdapterAlgorithmOpenMP_h
|
@ -0,0 +1,32 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterRuntimeDetectorOpenMP.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
VTKM_CONT bool DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagOpenMP>::Exists() const
|
||||
{
|
||||
using DeviceAdapterTraits = vtkm::cont::DeviceAdapterTraits<vtkm::cont::DeviceAdapterTagOpenMP>;
|
||||
return DeviceAdapterTraits::Valid;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterRuntimeDetector_h
|
||||
#define vtk_m_cont_openmp_internal_DeviceAdapterRuntimeDetector_h
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
#include <vtkm/cont/vtkm_cont_export.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
|
||||
template <class DeviceAdapterTag>
|
||||
class DeviceAdapterRuntimeDetector;
|
||||
|
||||
/// Determine if this machine supports Serial backend
|
||||
///
|
||||
template <>
|
||||
class VTKM_CONT_EXPORT DeviceAdapterRuntimeDetector<vtkm::cont::DeviceAdapterTagOpenMP>
|
||||
{
|
||||
public:
|
||||
/// Returns true if the given device adapter is supported on the current
|
||||
/// machine.
|
||||
VTKM_CONT bool Exists() const;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
32
vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h
Normal file
32
vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h
Normal file
@ -0,0 +1,32 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h
|
||||
#define vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h
|
||||
|
||||
#include <vtkm/cont/internal/DeviceAdapterTag.h>
|
||||
|
||||
#ifdef VTKM_ENABLE_OPENMP
|
||||
VTKM_VALID_DEVICE_ADAPTER(OpenMP, VTKM_DEVICE_ADAPTER_OPENMP)
|
||||
#else
|
||||
VTKM_INVALID_DEVICE_ADAPTER(OpenMP, VTKM_DEVICE_ADAPTER_OPENMP)
|
||||
#endif
|
||||
|
||||
#endif // vtk_m_cont_openmp_internal_DeviceAdapterTagOpenMP_h
|
674
vtkm/cont/openmp/internal/FunctorsOpenMP.h
Normal file
674
vtkm/cont/openmp/internal/FunctorsOpenMP.h
Normal file
@ -0,0 +1,674 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_cont_openmp_internal_FunctorsOpenMP_h
|
||||
#define vtk_m_cont_openmp_internal_FunctorsOpenMP_h
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
|
||||
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
||||
|
||||
#include <vtkm/BinaryOperators.h>
|
||||
#include <vtkm/BinaryPredicates.h>
|
||||
#include <vtkm/Pair.h>
|
||||
#include <vtkm/Types.h>
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/ErrorExecution.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
// Wrap all '#pragma omp ...' calls in this macro so we can disable them in
|
||||
// non-omp builds and avoid a multitude of 'ignoring pragma..." warnings.
|
||||
#ifdef _OPENMP
|
||||
#define _VTKM_OPENMP_DIRECTIVE_IMPL(fullDir) _Pragma(#fullDir)
|
||||
#define VTKM_OPENMP_DIRECTIVE(dir) _VTKM_OPENMP_DIRECTIVE_IMPL(omp dir)
|
||||
#else // _OPENMP
|
||||
#define VTKM_OPENMP_DIRECTIVE(directive)
|
||||
#endif // _OPENMP
|
||||
|
||||
// When defined, supported type / operator combinations will use the OpenMP
|
||||
// reduction(...) clause. Otherwise, all reductions use the general
|
||||
// implementation with a manual reduction once the threads complete.
|
||||
// I don't know how, but the benchmarks currently perform better without the
|
||||
// specializations.
|
||||
//#define VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace openmp
|
||||
{
|
||||
|
||||
constexpr static vtkm::Id CACHE_LINE_SIZE = 64;
|
||||
constexpr static vtkm::Id PAGE_SIZE = 4096;
|
||||
|
||||
// Returns ceil(num/den) for integral types
|
||||
template <typename T>
|
||||
static constexpr T CeilDivide(const T& numerator, const T& denominator)
|
||||
{
|
||||
return (numerator + denominator - 1) / denominator;
|
||||
}
|
||||
|
||||
// Computes the number of values per chunk. Note that numChunks + chunkSize may
|
||||
// exceed numVals, so be sure to check upper limits.
|
||||
static void ComputeChunkSize(const vtkm::Id numVals,
|
||||
const vtkm::Id numThreads,
|
||||
const vtkm::Id chunksPerThread,
|
||||
const vtkm::Id bytesPerValue,
|
||||
vtkm::Id& numChunks,
|
||||
vtkm::Id& valuesPerChunk)
|
||||
{
|
||||
// try to evenly distribute pages across chunks:
|
||||
const vtkm::Id bytesIn = numVals * bytesPerValue;
|
||||
const vtkm::Id pagesIn = CeilDivide(bytesIn, PAGE_SIZE);
|
||||
// If we don't have enough pages to honor chunksPerThread, ignore it:
|
||||
numChunks = (pagesIn > numThreads * chunksPerThread) ? numThreads * chunksPerThread : numThreads;
|
||||
const vtkm::Id pagesPerChunk = CeilDivide(pagesIn, numChunks);
|
||||
valuesPerChunk = CeilDivide(pagesPerChunk * PAGE_SIZE, bytesPerValue);
|
||||
}
|
||||
|
||||
template <typename T, typename U>
|
||||
VTKM_EXEC_CONT static void DoCopy(T src, U dst, vtkm::Id numVals, std::true_type)
|
||||
{
|
||||
if (numVals)
|
||||
{
|
||||
std::copy(src, src + numVals, dst);
|
||||
}
|
||||
}
|
||||
|
||||
// Don't use std::copy when type conversion is required because MSVC.
|
||||
template <typename InIterT, typename OutIterT>
|
||||
VTKM_EXEC_CONT static void DoCopy(InIterT inIter,
|
||||
OutIterT outIter,
|
||||
vtkm::Id numVals,
|
||||
std::false_type)
|
||||
{
|
||||
using ValueType = typename std::iterator_traits<OutIterT>::value_type;
|
||||
|
||||
for (vtkm::Id i = 0; i < numVals; ++i)
|
||||
{
|
||||
*(outIter++) = static_cast<ValueType>(*(inIter++));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename InIterT, typename OutIterT>
|
||||
VTKM_EXEC_CONT static void DoCopy(InIterT inIter, OutIterT outIter, vtkm::Id numVals)
|
||||
{
|
||||
using InValueType = typename std::iterator_traits<InIterT>::value_type;
|
||||
using OutValueType = typename std::iterator_traits<OutIterT>::value_type;
|
||||
|
||||
DoCopy(inIter, outIter, numVals, std::is_same<InValueType, OutValueType>());
|
||||
}
|
||||
|
||||
|
||||
template <typename InPortalT, typename OutPortalT>
|
||||
VTKM_EXEC_CONT static void CopyHelper(InPortalT inPortal,
|
||||
OutPortalT outPortal,
|
||||
vtkm::Id inStart,
|
||||
vtkm::Id outStart,
|
||||
vtkm::Id numVals)
|
||||
{
|
||||
using InValueT = typename InPortalT::ValueType;
|
||||
using OutValueT = typename OutPortalT::ValueType;
|
||||
constexpr auto isSame = std::is_same<InValueT, OutValueT>();
|
||||
|
||||
auto inIter = vtkm::cont::ArrayPortalToIteratorBegin(inPortal) + inStart;
|
||||
auto outIter = vtkm::cont::ArrayPortalToIteratorBegin(outPortal) + outStart;
|
||||
vtkm::Id valuesPerChunk;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel default(none) shared(inIter, outIter, valuesPerChunk, numVals))
|
||||
{
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(single)
|
||||
{
|
||||
// Evenly distribute full pages to all threads. We manually chunk the
|
||||
// data here so that we can exploit std::copy's memmove optimizations.
|
||||
vtkm::Id numChunks;
|
||||
ComputeChunkSize(
|
||||
numVals, omp_get_num_threads(), 8, sizeof(InValueT), numChunks, valuesPerChunk);
|
||||
}
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(for schedule(static))
|
||||
for (vtkm::Id i = 0; i < numVals; i += valuesPerChunk)
|
||||
{
|
||||
vtkm::Id chunkSize = std::min(numVals - i, valuesPerChunk);
|
||||
DoCopy(inIter + i, outIter + i, chunkSize, isSame);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct CopyIfHelper
|
||||
{
|
||||
vtkm::Id NumValues;
|
||||
vtkm::Id NumThreads;
|
||||
vtkm::Id ValueSize;
|
||||
|
||||
vtkm::Id NumChunks;
|
||||
vtkm::Id ChunkSize;
|
||||
std::vector<vtkm::Id> EndIds;
|
||||
|
||||
CopyIfHelper() = default;
|
||||
|
||||
void Initialize(vtkm::Id numValues, vtkm::Id valueSize)
|
||||
{
|
||||
this->NumValues = numValues;
|
||||
this->NumThreads = omp_get_num_threads();
|
||||
this->ValueSize = valueSize;
|
||||
|
||||
// Evenly distribute pages across the threads. We manually chunk the
|
||||
// data here so that we can exploit std::copy's memmove optimizations.
|
||||
ComputeChunkSize(
|
||||
this->NumValues, this->NumThreads, 8, valueSize, this->NumChunks, this->ChunkSize);
|
||||
|
||||
this->EndIds.resize(this->NumChunks);
|
||||
}
|
||||
|
||||
template <typename InIterT, typename StencilIterT, typename OutIterT, typename PredicateT>
|
||||
void CopyIf(InIterT inIter,
|
||||
StencilIterT stencilIter,
|
||||
OutIterT outIter,
|
||||
PredicateT pred,
|
||||
vtkm::Id chunk)
|
||||
{
|
||||
vtkm::Id startPos = std::min(chunk * this->ChunkSize, this->NumValues);
|
||||
vtkm::Id endPos = std::min((chunk + 1) * this->ChunkSize, this->NumValues);
|
||||
|
||||
vtkm::Id outPos = startPos;
|
||||
for (vtkm::Id inPos = startPos; inPos < endPos; ++inPos)
|
||||
{
|
||||
if (pred(stencilIter[inPos]))
|
||||
{
|
||||
outIter[outPos++] = inIter[inPos];
|
||||
}
|
||||
}
|
||||
|
||||
this->EndIds[chunk] = outPos;
|
||||
}
|
||||
|
||||
template <typename OutIterT>
|
||||
vtkm::Id Reduce(OutIterT data)
|
||||
{
|
||||
vtkm::Id endPos = this->EndIds.front();
|
||||
for (vtkm::Id i = 1; i < this->NumChunks; ++i)
|
||||
{
|
||||
vtkm::Id chunkStart = std::min(i * this->ChunkSize, this->NumValues);
|
||||
vtkm::Id chunkEnd = this->EndIds[i];
|
||||
vtkm::Id numValuesToCopy = chunkEnd - chunkStart;
|
||||
if (numValuesToCopy > 0 && chunkStart != endPos)
|
||||
{
|
||||
std::copy(data + chunkStart, data + chunkEnd, data + endPos);
|
||||
}
|
||||
endPos += numValuesToCopy;
|
||||
}
|
||||
return endPos;
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||
// OpenMP only declares reduction operations for primitive types. This utility
|
||||
// detects if a type T is supported.
|
||||
template <typename T>
|
||||
struct OpenMPReductionSupported : std::false_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<Int8> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<UInt8> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<Int16> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<UInt16> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<Int32> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<UInt32> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<Int64> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<UInt64> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<Float32> : std::true_type
|
||||
{
|
||||
};
|
||||
template <>
|
||||
struct OpenMPReductionSupported<Float64> : std::true_type
|
||||
{
|
||||
};
|
||||
#else
|
||||
template <typename T>
|
||||
using OpenMPReductionSupported = std::false_type;
|
||||
#endif // VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||
|
||||
struct ReduceHelper
|
||||
{
|
||||
// Generic implementation:
|
||||
template <typename PortalT, typename ReturnType, typename Functor>
|
||||
static ReturnType Execute(PortalT portal, ReturnType init, Functor functorIn, std::false_type)
|
||||
{
|
||||
internal::WrappedBinaryOperator<ReturnType, Functor> f(functorIn);
|
||||
|
||||
const vtkm::Id numVals = portal.GetNumberOfValues();
|
||||
auto data = vtkm::cont::ArrayPortalToIteratorBegin(portal);
|
||||
|
||||
bool doParallel = false;
|
||||
std::vector<ReturnType> threadData;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(f)
|
||||
shared(data, threadData, doParallel))
|
||||
{
|
||||
|
||||
int tid = omp_get_thread_num();
|
||||
int numThreads = omp_get_num_threads();
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(single)
|
||||
{
|
||||
if (numVals >= numThreads * 2)
|
||||
{
|
||||
doParallel = true;
|
||||
threadData.resize(numThreads);
|
||||
}
|
||||
}
|
||||
|
||||
if (doParallel)
|
||||
{
|
||||
// Use the first (numThreads*2) values for initializing:
|
||||
ReturnType accum;
|
||||
accum = f(data[2 * tid], data[2 * tid + 1]);
|
||||
|
||||
// Assign each thread chunks of the remaining values for local reduction
|
||||
VTKM_OPENMP_DIRECTIVE(for schedule(static))
|
||||
for (vtkm::Id i = numThreads * 2; i < numVals; i++)
|
||||
{
|
||||
accum = f(accum, data[i]);
|
||||
}
|
||||
|
||||
threadData[tid] = accum;
|
||||
}
|
||||
} // end parallel
|
||||
|
||||
if (doParallel)
|
||||
{
|
||||
// do the final reduction serially:
|
||||
for (size_t i = 0; i < threadData.size(); ++i)
|
||||
{
|
||||
init = f(init, threadData[i]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not enough threads. Do the entire reduction in serial:
|
||||
for (vtkm::Id i = 0; i < numVals; ++i)
|
||||
{
|
||||
init = f(init, data[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return init;
|
||||
}
|
||||
|
||||
#ifdef VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||
|
||||
// Specialize for vtkm functors with OpenMP special cases:
|
||||
#define VTKM_OPENMP_SPECIALIZE_REDUCE1(FunctorType, PragmaString) \
|
||||
template <typename PortalT, typename ReturnType> \
|
||||
static ReturnType Execute( \
|
||||
PortalT portal, ReturnType value, FunctorType functorIn, std::true_type) \
|
||||
{ \
|
||||
const vtkm::Id numValues = portal.GetNumberOfValues(); \
|
||||
internal::WrappedBinaryOperator<ReturnType, FunctorType> f(functorIn); \
|
||||
_Pragma(#PragmaString) for (vtkm::Id i = 0; i < numValues; ++i) \
|
||||
{ \
|
||||
value = f(value, portal.Get(i)); \
|
||||
} \
|
||||
return value; \
|
||||
}
|
||||
|
||||
// Constructing the pragma string inside the _Pragma call doesn't work so
|
||||
// we jump through a hoop:
|
||||
#define VTKM_OPENMP_SPECIALIZE_REDUCE(FunctorType, Operator) \
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE1(FunctorType, "omp parallel for reduction(" #Operator ":value)")
|
||||
|
||||
// + (Add, Sum)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Add, +)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Sum, +)
|
||||
// * (Multiply, Product)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Multiply, *)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Product, *)
|
||||
// - (Subtract)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Subtract, -)
|
||||
// & (BitwiseAnd)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseAnd, &)
|
||||
// | (BitwiseOr)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseOr, |)
|
||||
// ^ (BitwiseXor)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::BitwiseXor, ^)
|
||||
// && (LogicalAnd)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::LogicalAnd, &&)
|
||||
// || (LogicalOr)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::LogicalOr, ||)
|
||||
// min (Minimum)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Minimum, min)
|
||||
// max (Maximum)
|
||||
VTKM_OPENMP_SPECIALIZE_REDUCE(vtkm::Maximum, max)
|
||||
|
||||
#undef VTKM_OPENMP_SPECIALIZE_REDUCE
|
||||
#undef VTKM_OPENMP_SPECIALIZE_REDUCE1
|
||||
|
||||
#endif // VTKM_OPENMP_USE_NATIVE_REDUCTION
|
||||
};
|
||||
|
||||
template <typename KeysInArray,
|
||||
typename ValuesInArray,
|
||||
typename KeysOutArray,
|
||||
typename ValuesOutArray,
|
||||
typename BinaryFunctor>
|
||||
void ReduceByKeyHelper(KeysInArray keysInArray,
|
||||
ValuesInArray valuesInArray,
|
||||
KeysOutArray keysOutArray,
|
||||
ValuesOutArray valuesOutArray,
|
||||
BinaryFunctor functor)
|
||||
{
|
||||
using KeyType = typename KeysInArray::ValueType;
|
||||
using ValueType = typename ValuesInArray::ValueType;
|
||||
|
||||
const vtkm::Id numValues = keysInArray.GetNumberOfValues();
|
||||
auto keysInPortal = keysInArray.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto valuesInPortal = valuesInArray.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto keysIn = vtkm::cont::ArrayPortalToIteratorBegin(keysInPortal);
|
||||
auto valuesIn = vtkm::cont::ArrayPortalToIteratorBegin(valuesInPortal);
|
||||
|
||||
auto keysOutPortal = keysOutArray.PrepareForOutput(numValues, DeviceAdapterTagOpenMP());
|
||||
auto valuesOutPortal = valuesOutArray.PrepareForOutput(numValues, DeviceAdapterTagOpenMP());
|
||||
auto keysOut = vtkm::cont::ArrayPortalToIteratorBegin(keysOutPortal);
|
||||
auto valuesOut = vtkm::cont::ArrayPortalToIteratorBegin(valuesOutPortal);
|
||||
|
||||
internal::WrappedBinaryOperator<ValueType, BinaryFunctor> f(functor);
|
||||
vtkm::Id outIdx = 0;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel default(none) firstprivate(keysIn, valuesIn, keysOut, valuesOut, f)
|
||||
shared(outIdx))
|
||||
{
|
||||
int tid = omp_get_thread_num();
|
||||
int numThreads = omp_get_num_threads();
|
||||
|
||||
// Determine bounds for this thread's scan operation:
|
||||
vtkm::Id chunkSize = (numValues + numThreads - 1) / numThreads;
|
||||
vtkm::Id scanIdx = std::min(tid * chunkSize, numValues);
|
||||
vtkm::Id scanEnd = std::min(scanIdx + chunkSize, numValues);
|
||||
|
||||
auto threadKeysBegin = keysOut + scanIdx;
|
||||
auto threadValuesBegin = valuesOut + scanIdx;
|
||||
auto threadKey = threadKeysBegin;
|
||||
auto threadValue = threadValuesBegin;
|
||||
|
||||
// Reduce each thread's partition:
|
||||
KeyType rangeKey;
|
||||
ValueType rangeValue;
|
||||
for (;;)
|
||||
{
|
||||
if (scanIdx < scanEnd)
|
||||
{
|
||||
rangeKey = keysIn[scanIdx];
|
||||
rangeValue = valuesIn[scanIdx];
|
||||
++scanIdx;
|
||||
|
||||
// Locate end of current range:
|
||||
while (scanIdx < scanEnd && static_cast<KeyType>(keysIn[scanIdx]) == rangeKey)
|
||||
{
|
||||
rangeValue = f(rangeValue, valuesIn[scanIdx]);
|
||||
++scanIdx;
|
||||
}
|
||||
|
||||
*threadKey = rangeKey;
|
||||
*threadValue = rangeValue;
|
||||
++threadKey;
|
||||
++threadValue;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
outIdx = static_cast<vtkm::Id>(threadKey - threadKeysBegin);
|
||||
}
|
||||
|
||||
// Combine the reduction results. Skip tid == 0, since it's already in
|
||||
// the correct location:
|
||||
for (int i = 1; i < numThreads; ++i)
|
||||
{
|
||||
|
||||
// This barrier ensures that:
|
||||
// 1) Threads remain synchronized through this final reduction loop.
|
||||
// 2) The outIdx variable is initialized by thread 0.
|
||||
// 3) All threads have reduced their partitions.
|
||||
VTKM_OPENMP_DIRECTIVE(barrier)
|
||||
|
||||
if (tid == i)
|
||||
{
|
||||
// Check if the previous thread's last key matches our first:
|
||||
if (outIdx > 0 && threadKeysBegin < threadKey && keysOut[outIdx - 1] == *threadKeysBegin)
|
||||
{
|
||||
valuesOut[outIdx - 1] = f(valuesOut[outIdx - 1], *threadValuesBegin);
|
||||
++threadKeysBegin;
|
||||
++threadValuesBegin;
|
||||
}
|
||||
|
||||
// Copy reduced partition to final location (if needed)
|
||||
if (threadKeysBegin < threadKey && threadKeysBegin != keysOut + outIdx)
|
||||
{
|
||||
std::copy(threadKeysBegin, threadKey, keysOut + outIdx);
|
||||
std::copy(threadValuesBegin, threadValue, valuesOut + outIdx);
|
||||
}
|
||||
|
||||
outIdx += static_cast<vtkm::Id>(threadKey - threadKeysBegin);
|
||||
|
||||
} // end tid == i
|
||||
} // end combine reduction
|
||||
} // end parallel
|
||||
|
||||
keysOutArray.Shrink(outIdx);
|
||||
valuesOutArray.Shrink(outIdx);
|
||||
}
|
||||
|
||||
template <typename IterT, typename RawPredicateT>
|
||||
struct UniqueHelper
|
||||
{
|
||||
using ValueType = typename std::iterator_traits<IterT>::value_type;
|
||||
using PredicateT = internal::WrappedBinaryOperator<bool, RawPredicateT>;
|
||||
|
||||
struct Node
|
||||
{
|
||||
vtkm::Id2 InputRange{ -1, -1 };
|
||||
vtkm::Id2 OutputRange{ -1, -1 };
|
||||
|
||||
// Pad the node out to the size of a cache line to prevent false sharing:
|
||||
static constexpr size_t DataSize = 2 * sizeof(vtkm::Id2);
|
||||
static constexpr size_t NumCacheLines = CeilDivide<size_t>(DataSize, CACHE_LINE_SIZE);
|
||||
static constexpr size_t PaddingSize = NumCacheLines * CACHE_LINE_SIZE - DataSize;
|
||||
unsigned char Padding[PaddingSize];
|
||||
};
|
||||
|
||||
IterT Data;
|
||||
vtkm::Id NumValues;
|
||||
PredicateT Predicate;
|
||||
vtkm::Id LeafSize;
|
||||
std::vector<Node> Nodes;
|
||||
size_t NextNode;
|
||||
|
||||
UniqueHelper(IterT iter, vtkm::Id numValues, RawPredicateT pred)
|
||||
: Data(iter)
|
||||
, NumValues(numValues)
|
||||
, Predicate(pred)
|
||||
, LeafSize(0)
|
||||
, NextNode(0)
|
||||
{
|
||||
}
|
||||
|
||||
vtkm::Id Execute()
|
||||
{
|
||||
vtkm::Id outSize = 0;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
|
||||
{
|
||||
VTKM_OPENMP_DIRECTIVE(single)
|
||||
{
|
||||
this->Prepare();
|
||||
|
||||
// Kick off task-based divide-and-conquer uniquification:
|
||||
Node* rootNode = this->AllocNode();
|
||||
rootNode->InputRange = vtkm::Id2(0, this->NumValues);
|
||||
this->Uniquify(rootNode);
|
||||
outSize = rootNode->OutputRange[1] - rootNode->OutputRange[0];
|
||||
}
|
||||
}
|
||||
|
||||
return outSize;
|
||||
}
|
||||
|
||||
private:
|
||||
void Prepare()
|
||||
{
|
||||
// Figure out how many values each thread should handle:
|
||||
int numThreads = omp_get_num_threads();
|
||||
vtkm::Id chunksPerThread = 8;
|
||||
vtkm::Id numChunks;
|
||||
ComputeChunkSize(
|
||||
this->NumValues, numThreads, chunksPerThread, sizeof(ValueType), numChunks, this->LeafSize);
|
||||
|
||||
// Compute an upper-bound of the number of nodes in the tree:
|
||||
size_t numNodes = numChunks;
|
||||
while (numChunks > 1)
|
||||
{
|
||||
numChunks = (numChunks + 1) / 2;
|
||||
numNodes += numChunks;
|
||||
}
|
||||
this->Nodes.resize(numNodes);
|
||||
this->NextNode = 0;
|
||||
}
|
||||
|
||||
Node* AllocNode()
|
||||
{
|
||||
size_t nodeIdx;
|
||||
|
||||
// GCC emits a false positive "value computed but not used" for this block:
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-value"
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(atomic capture)
|
||||
{
|
||||
nodeIdx = this->NextNode;
|
||||
++this->NextNode;
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
VTKM_ASSERT(nodeIdx < this->Nodes.size());
|
||||
|
||||
return &this->Nodes[nodeIdx];
|
||||
}
|
||||
|
||||
bool IsLeaf(const vtkm::Id2& range) { return (range[1] - range[0]) <= this->LeafSize; }
|
||||
|
||||
// Not an strict midpoint, but ensures that the first range will always be
|
||||
// a multiple of the leaf size.
|
||||
vtkm::Id ComputeMidpoint(const vtkm::Id2& range)
|
||||
{
|
||||
const vtkm::Id n = range[1] - range[0];
|
||||
const vtkm::Id np = this->LeafSize;
|
||||
|
||||
return CeilDivide(n / 2, np) * np + range[0];
|
||||
}
|
||||
|
||||
void Uniquify(Node* node)
|
||||
{
|
||||
if (!this->IsLeaf(node->InputRange))
|
||||
{
|
||||
vtkm::Id midpoint = this->ComputeMidpoint(node->InputRange);
|
||||
|
||||
Node* right = this->AllocNode();
|
||||
Node* left = this->AllocNode();
|
||||
|
||||
right->InputRange = vtkm::Id2(midpoint, node->InputRange[1]);
|
||||
|
||||
// Intel compilers seem to have trouble following the 'this' pointer
|
||||
// when launching tasks, resulting in a corrupt task environment.
|
||||
// Explicitly copying the pointer into a local variable seems to fix this.
|
||||
auto explicitThis = this;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(taskgroup)
|
||||
{
|
||||
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->Uniquify(right); }
|
||||
|
||||
left->InputRange = vtkm::Id2(node->InputRange[0], midpoint);
|
||||
this->Uniquify(left);
|
||||
|
||||
} // end taskgroup. Both sides of the tree will be completed here.
|
||||
|
||||
// Combine the ranges in the left side:
|
||||
if (this->Predicate(this->Data[left->OutputRange[1] - 1], this->Data[right->OutputRange[0]]))
|
||||
{
|
||||
++right->OutputRange[0];
|
||||
}
|
||||
|
||||
vtkm::Id numVals = right->OutputRange[1] - right->OutputRange[0];
|
||||
DoCopy(this->Data + right->OutputRange[0], this->Data + left->OutputRange[1], numVals);
|
||||
|
||||
node->OutputRange[0] = left->OutputRange[0];
|
||||
node->OutputRange[1] = left->OutputRange[1] + numVals;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto start = this->Data + node->InputRange[0];
|
||||
auto end = this->Data + node->InputRange[1];
|
||||
end = std::unique(start, end, this->Predicate);
|
||||
node->OutputRange[0] = node->InputRange[0];
|
||||
node->OutputRange[1] = node->InputRange[0] + static_cast<vtkm::Id>(end - start);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::openmp
|
||||
|
||||
#endif // vtk_m_cont_openmp_internal_FunctorsOpenMP_h
|
271
vtkm/cont/openmp/internal/ParallelQuickSortOpenMP.h
Normal file
271
vtkm/cont/openmp/internal/ParallelQuickSortOpenMP.h
Normal file
@ -0,0 +1,271 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||
|
||||
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
||||
|
||||
#include <vtkm/Types.h>
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#include <iterator>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace openmp
|
||||
{
|
||||
namespace sort
|
||||
{
|
||||
namespace quick
|
||||
{
|
||||
|
||||
template <typename IterType, typename RawBinaryCompare>
|
||||
struct QuickSorter
|
||||
{
|
||||
using BinaryCompare = vtkm::cont::internal::WrappedBinaryOperator<bool, RawBinaryCompare>;
|
||||
using ValueType = typename std::iterator_traits<IterType>::value_type;
|
||||
|
||||
IterType Data;
|
||||
BinaryCompare Compare;
|
||||
vtkm::Id SerialSize;
|
||||
|
||||
QuickSorter(IterType iter, RawBinaryCompare comp)
|
||||
: Data(iter)
|
||||
, Compare(comp)
|
||||
, SerialSize(0)
|
||||
{
|
||||
}
|
||||
|
||||
void Execute(const vtkm::Id2 range)
|
||||
{
|
||||
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
|
||||
{
|
||||
VTKM_OPENMP_DIRECTIVE(single)
|
||||
{
|
||||
this->Prepare(range);
|
||||
this->Sort(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void Prepare(const vtkm::Id2 /*range*/)
|
||||
{
|
||||
// Rough benchmarking on an 4-core+4HT processor shows that this sort is
|
||||
// most efficient (within 5% of TBB sort) when we switch to a serial
|
||||
// implementation once a partition is less than 32K keys
|
||||
this->SerialSize = 32768;
|
||||
}
|
||||
|
||||
vtkm::Pair<vtkm::Id, ValueType> MedianOf3(const vtkm::Pair<vtkm::Id, ValueType>& v1,
|
||||
const vtkm::Pair<vtkm::Id, ValueType>& v2,
|
||||
const vtkm::Pair<vtkm::Id, ValueType>& v3) const
|
||||
{
|
||||
if (this->Compare(v1.second, v2.second))
|
||||
{ // v1 < v2
|
||||
if (this->Compare(v1.second, v3.second))
|
||||
{ // v1 < v3
|
||||
if (this->Compare(v2.second, v3.second))
|
||||
{ // v1 < v2 < v3
|
||||
return v2;
|
||||
}
|
||||
else // v3 < v2
|
||||
{ // v1 < v3 < v2
|
||||
return v3;
|
||||
}
|
||||
}
|
||||
else // v3 < v1
|
||||
{ // v3 < v1 < v2
|
||||
return v1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // v2 < v1
|
||||
if (this->Compare(v2.second, v3.second))
|
||||
{ // v2 < v3
|
||||
if (this->Compare(v1.second, v3.second))
|
||||
{ // v2 < v1 < v3
|
||||
return v1;
|
||||
}
|
||||
else
|
||||
{ // v2 < v3 < v1
|
||||
return v3;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // v3 < v2 < v1
|
||||
return v2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
vtkm::Pair<vtkm::Id, ValueType> MedianOf3(const vtkm::Id ids[3]) const
|
||||
{
|
||||
return this->MedianOf3(vtkm::Pair<vtkm::Id, ValueType>(ids[0], this->Data[ids[0]]),
|
||||
vtkm::Pair<vtkm::Id, ValueType>(ids[1], this->Data[ids[1]]),
|
||||
vtkm::Pair<vtkm::Id, ValueType>(ids[2], this->Data[ids[2]]));
|
||||
}
|
||||
|
||||
vtkm::Pair<vtkm::Id, ValueType> PseudoMedianOf9(const vtkm::Id ids[9]) const
|
||||
{
|
||||
return this->MedianOf3(
|
||||
this->MedianOf3(ids), this->MedianOf3(ids + 3), this->MedianOf3(ids + 6));
|
||||
}
|
||||
|
||||
// Approximate the median of the range and return its index.
|
||||
vtkm::Pair<vtkm::Id, ValueType> SelectPivot(const vtkm::Id2 range) const
|
||||
{
|
||||
const vtkm::Id numVals = range[1] - range[0];
|
||||
assert(numVals >= 9);
|
||||
|
||||
// Pseudorandomize the pivot locations to avoid issues with periodic data
|
||||
// (evenly sampling inputs with periodic values tends to cause the same
|
||||
// value to be obtained for all samples)
|
||||
const vtkm::Id seed = range[0] * 3 / 2 + range[1] * 11 / 3 + numVals * 10 / 7;
|
||||
const vtkm::Id delta = (numVals / 9) * 4 / 3;
|
||||
|
||||
vtkm::Id sampleLocations[9] = {
|
||||
range[0] + ((seed + 0 * delta) % numVals), range[0] + ((seed + 1 * delta) % numVals),
|
||||
range[0] + ((seed + 2 * delta) % numVals), range[0] + ((seed + 3 * delta) % numVals),
|
||||
range[0] + ((seed + 4 * delta) % numVals), range[0] + ((seed + 5 * delta) % numVals),
|
||||
range[0] + ((seed + 6 * delta) % numVals), range[0] + ((seed + 7 * delta) % numVals),
|
||||
range[0] + ((seed + 8 * delta) % numVals)
|
||||
};
|
||||
|
||||
return this->PseudoMedianOf9(sampleLocations);
|
||||
}
|
||||
|
||||
// Select a pivot and partition data with it, returning the final location of
|
||||
// the pivot element(s). We use Bentley-McIlroy three-way partitioning to
|
||||
// improve handling of duplicate keys, so the pivot "location" is actually
|
||||
// a range of identical keys, hence the vtkm::Id2 return type, which mark
|
||||
// the [begin, end) of the pivot range.
|
||||
vtkm::Id2 PartitionData(const vtkm::Id2 range)
|
||||
{
|
||||
using namespace std; // For ADL swap
|
||||
|
||||
const vtkm::Pair<vtkm::Id, ValueType> pivotData = this->SelectPivot(range);
|
||||
const vtkm::Id& origPivotIdx = pivotData.first;
|
||||
const ValueType& pivotVal = pivotData.second;
|
||||
|
||||
// Move the pivot to the end of the block while we partition the rest:
|
||||
swap(this->Data[origPivotIdx], this->Data[range[1] - 1]);
|
||||
|
||||
// Indices of the last partitioned keys:
|
||||
vtkm::Id2 dataCursors(range[0] - 1, range[1] - 1);
|
||||
|
||||
// Indices of the start/end of the keys equal to the pivot:
|
||||
vtkm::Id2 pivotCursors(dataCursors);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
// Advance the data cursors past all keys that are already partitioned:
|
||||
while (this->Compare(this->Data[++dataCursors[0]], pivotVal))
|
||||
;
|
||||
while (this->Compare(pivotVal, this->Data[--dataCursors[1]]) && dataCursors[1] > range[0])
|
||||
;
|
||||
|
||||
// Range is partitioned the cursors have crossed:
|
||||
if (dataCursors[0] >= dataCursors[1])
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
// Both dataCursors are pointing at incorrectly partitioned keys. Swap
|
||||
// them to place them in the proper partitions:
|
||||
swap(this->Data[dataCursors[0]], this->Data[dataCursors[1]]);
|
||||
|
||||
// If the elements we just swapped are actually equivalent to the pivot
|
||||
// value, move them to the pivot storage locations:
|
||||
if (!this->Compare(this->Data[dataCursors[0]], pivotVal))
|
||||
{
|
||||
++pivotCursors[0];
|
||||
swap(this->Data[pivotCursors[0]], this->Data[dataCursors[0]]);
|
||||
}
|
||||
if (!this->Compare(pivotVal, this->Data[dataCursors[1]]))
|
||||
{
|
||||
--pivotCursors[1];
|
||||
swap(this->Data[pivotCursors[1]], this->Data[dataCursors[1]]);
|
||||
}
|
||||
}
|
||||
|
||||
// Data is now partitioned as:
|
||||
// | Equal | Less | Greater | Equal |
|
||||
// Move the equal keys to the middle for the final partitioning:
|
||||
// | Less | Equal | Greater |
|
||||
// First the original pivot value at the end:
|
||||
swap(this->Data[range[1] - 1], this->Data[dataCursors[0]]);
|
||||
|
||||
// Update the cursors to either side of the pivot:
|
||||
dataCursors = vtkm::Id2(dataCursors[0] - 1, dataCursors[0] + 1);
|
||||
|
||||
for (vtkm::Id i = range[0]; i < pivotCursors[0]; ++i, --dataCursors[0])
|
||||
{
|
||||
swap(this->Data[i], this->Data[dataCursors[0]]);
|
||||
}
|
||||
|
||||
for (vtkm::Id i = range[1] - 2; i > pivotCursors[1]; --i, ++dataCursors[1])
|
||||
{
|
||||
swap(this->Data[i], this->Data[dataCursors[1]]);
|
||||
}
|
||||
|
||||
// Adjust the cursor so we can use them to construct the regions for the
|
||||
// recursive call:
|
||||
++dataCursors[0];
|
||||
|
||||
return dataCursors;
|
||||
}
|
||||
|
||||
void Sort(const vtkm::Id2 range)
|
||||
{
|
||||
const vtkm::Id numVals = range[1] - range[0];
|
||||
if (numVals <= this->SerialSize)
|
||||
{
|
||||
std::sort(this->Data + range[0], this->Data + range[1], this->Compare);
|
||||
return;
|
||||
}
|
||||
|
||||
const vtkm::Id2 pivots = this->PartitionData(range);
|
||||
const vtkm::Id2 lhRange = vtkm::Id2(range[0], pivots[0]);
|
||||
const vtkm::Id2 rhRange = vtkm::Id2(pivots[1], range[1]);
|
||||
|
||||
// Intel compilers seem to have trouble following the 'this' pointer
|
||||
// when launching tasks, resulting in a corrupt task environment.
|
||||
// Explicitly copying the pointer into a local variable seems to fix this.
|
||||
auto explicitThis = this;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(task default(none) firstprivate(rhRange, explicitThis))
|
||||
{
|
||||
explicitThis->Sort(rhRange);
|
||||
}
|
||||
|
||||
this->Sort(lhRange);
|
||||
}
|
||||
};
|
||||
}
|
||||
} // end namespace sort::quick
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::openmp
|
88
vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx
Normal file
88
vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.cxx
Normal file
@ -0,0 +1,88 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#include <vtkm/cont/internal/ParallelRadixSort.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace openmp
|
||||
{
|
||||
namespace sort
|
||||
{
|
||||
namespace radix
|
||||
{
|
||||
|
||||
struct RadixThreaderOpenMP
|
||||
{
|
||||
size_t GetAvailableCores() const
|
||||
{
|
||||
size_t result;
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
result = static_cast<size_t>(omp_get_num_threads());
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma omp parallel
|
||||
{
|
||||
result = static_cast<size_t>(omp_get_num_threads());
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename TaskType>
|
||||
void RunParentTask(TaskType task)
|
||||
{
|
||||
assert(!omp_in_parallel());
|
||||
#pragma omp parallel default(none) shared(task)
|
||||
{
|
||||
#pragma omp single
|
||||
{
|
||||
task();
|
||||
}
|
||||
} // Implied barrier ensures that child tasks will finish.
|
||||
}
|
||||
|
||||
template <typename TaskType, typename ThreadData>
|
||||
void RunChildTasks(ThreadData, TaskType left, TaskType right)
|
||||
{
|
||||
assert(omp_in_parallel());
|
||||
#pragma omp task default(none) firstprivate(right)
|
||||
{
|
||||
right();
|
||||
}
|
||||
|
||||
// Execute the left task in the existing thread.
|
||||
left();
|
||||
}
|
||||
};
|
||||
|
||||
VTKM_INSTANTIATE_RADIX_SORT_FOR_THREADER(RadixThreaderOpenMP)
|
||||
}
|
||||
} // end namespace sort::radix
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::openmp
|
44
vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.h
Normal file
44
vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.h
Normal file
@ -0,0 +1,44 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h
|
||||
#define vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h
|
||||
|
||||
#include <vtkm/cont/internal/ParallelRadixSortInterface.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace openmp
|
||||
{
|
||||
namespace sort
|
||||
{
|
||||
namespace radix
|
||||
{
|
||||
|
||||
VTKM_DECLARE_RADIX_SORT()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::openmp::sort::radix
|
||||
|
||||
#endif // vtk_m_cont_openmp_internal_ParallelRadixSortOpenMP_h
|
489
vtkm/cont/openmp/internal/ParallelScanOpenMP.h
Normal file
489
vtkm/cont/openmp/internal/ParallelScanOpenMP.h
Normal file
@ -0,0 +1,489 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||
|
||||
#include <vtkm/cont/internal/FunctorsGeneral.h>
|
||||
|
||||
#include <vtkm/Types.h>
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace openmp
|
||||
{
|
||||
namespace scan
|
||||
{
|
||||
|
||||
enum class ChildType
|
||||
{
|
||||
Left,
|
||||
Right
|
||||
};
|
||||
|
||||
// Generic implementation of modified Ladner & Fischer 1977 "adder" algorithm
|
||||
// used for backbone of exclusive/inclusive scans. Language in comments is
|
||||
// specific to computing a sum, but the implementation should be generic enough
|
||||
// for any scan operation.
|
||||
//
|
||||
// The basic idea is that a tree structure is used to partition the input into
|
||||
// sets of LeafSize. Each leaf of the tree is processed in two stages: First,
|
||||
// the sum of each leaf is computed, and this information is pushed up the tree
|
||||
// to compute the sum of each node's child leaves. Then the partial sum at the
|
||||
// start of each node is computed and pushed down the tree (the "carry"
|
||||
// values). In the second pass through each leaf's data, these partial sums are
|
||||
// used to compute the final output from the carry value and the input data.
|
||||
//
|
||||
// The passes will likely overlap due to the "leftEdge" optimizations, which
|
||||
// allow each leaf to start the second pass as soon as the first pass of all
|
||||
// previous leaves is completed. Additionally, the first leaf in the data will
|
||||
// combine both passes into one, computing the final output data while
|
||||
// generating its sum for the communication stage.
|
||||
template <typename ScanBody>
|
||||
struct Adder : public ScanBody
|
||||
{
|
||||
template <typename NodeImpl>
|
||||
struct NodeWrapper : public NodeImpl
|
||||
{
|
||||
// Range of IDs this node represents
|
||||
vtkm::Id2 Range{ -1, -1 };
|
||||
|
||||
// Connections:
|
||||
NodeWrapper* Parent{ nullptr };
|
||||
NodeWrapper* Left{ nullptr };
|
||||
NodeWrapper* Right{ nullptr };
|
||||
|
||||
// Special flag to mark nodes on the far left edge of the tree. This allows
|
||||
// various optimization that start the second pass sooner on some ranges.
|
||||
bool LeftEdge{ false };
|
||||
|
||||
// Pad the node out to the size of a cache line to prevent false sharing:
|
||||
static constexpr size_t DataSize =
|
||||
sizeof(NodeImpl) + sizeof(vtkm::Id2) + 3 * sizeof(NodeWrapper*) + sizeof(bool);
|
||||
static constexpr size_t NumCacheLines = CeilDivide<size_t>(DataSize, CACHE_LINE_SIZE);
|
||||
static constexpr size_t PaddingSize = NumCacheLines * CACHE_LINE_SIZE - DataSize;
|
||||
unsigned char Padding[PaddingSize];
|
||||
};
|
||||
|
||||
using Node = NodeWrapper<typename ScanBody::Node>;
|
||||
using ValueType = typename ScanBody::ValueType;
|
||||
|
||||
vtkm::Id LeafSize;
|
||||
std::vector<Node> Nodes;
|
||||
size_t NextNode;
|
||||
|
||||
// Use ScanBody's ctor:
|
||||
using ScanBody::ScanBody;
|
||||
|
||||
// Returns the total array sum:
|
||||
ValueType Execute(const vtkm::Id2& range)
|
||||
{
|
||||
Node* rootNode = nullptr;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel default(shared))
|
||||
{
|
||||
VTKM_OPENMP_DIRECTIVE(single)
|
||||
{
|
||||
// Allocate nodes, prep metadata:
|
||||
this->Prepare(range);
|
||||
|
||||
// Compute the partition and node sums:
|
||||
rootNode = this->AllocNode();
|
||||
rootNode->Range = range;
|
||||
rootNode->LeftEdge = true;
|
||||
ScanBody::InitializeRootNode(rootNode);
|
||||
|
||||
this->Scan(rootNode);
|
||||
} // end single
|
||||
} // end parallel
|
||||
|
||||
return rootNode ? ScanBody::GetFinalResult(rootNode)
|
||||
: vtkm::TypeTraits<ValueType>::ZeroInitialization();
|
||||
}
|
||||
|
||||
private:
|
||||
// Returns the next available node in a thread-safe manner.
|
||||
Node* AllocNode()
|
||||
{
|
||||
size_t nodeIdx;
|
||||
|
||||
// GCC emits a false positive "value computed but not used" for this block:
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunused-value"
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(atomic capture)
|
||||
{
|
||||
nodeIdx = this->NextNode;
|
||||
++this->NextNode;
|
||||
}
|
||||
|
||||
#pragma GCC diagnostic pop
|
||||
|
||||
VTKM_ASSERT(nodeIdx < this->Nodes.size());
|
||||
|
||||
return &this->Nodes[nodeIdx];
|
||||
}
|
||||
|
||||
// Does the range represent a leave node?
|
||||
bool IsLeaf(const vtkm::Id2& range) const { return (range[1] - range[0]) <= this->LeafSize; }
|
||||
|
||||
// Use to split ranges. Ensures that the first range is always a multiple of
|
||||
// LeafSize, when possible.
|
||||
vtkm::Id ComputeMidpoint(const vtkm::Id2& range) const
|
||||
{
|
||||
const vtkm::Id n = range[1] - range[0];
|
||||
const vtkm::Id np = this->LeafSize;
|
||||
|
||||
return (((n / 2) + (np - 1)) / np) * np + range[0];
|
||||
}
|
||||
|
||||
void Prepare(const vtkm::Id2& range)
|
||||
{
|
||||
// Figure out how many values each thread should handle:
|
||||
vtkm::Id numVals = range[1] - range[0];
|
||||
int numThreads = omp_get_num_threads();
|
||||
vtkm::Id chunksPerThread = 8;
|
||||
vtkm::Id numChunks;
|
||||
ComputeChunkSize(
|
||||
numVals, numThreads, chunksPerThread, sizeof(ValueType), numChunks, this->LeafSize);
|
||||
|
||||
// Compute an upper-bound of the number of nodes in the tree:
|
||||
size_t numNodes = numChunks;
|
||||
while (numChunks > 1)
|
||||
{
|
||||
numChunks = (numChunks + 1) / 2;
|
||||
numNodes += numChunks;
|
||||
}
|
||||
this->Nodes.resize(numNodes);
|
||||
this->NextNode = 0;
|
||||
}
|
||||
|
||||
// Build the tree and compute the sums:
|
||||
void Scan(Node* node)
|
||||
{
|
||||
if (!this->IsLeaf(node->Range))
|
||||
{ // split range:
|
||||
vtkm::Id midpoint = this->ComputeMidpoint(node->Range);
|
||||
|
||||
Node* right = this->AllocNode();
|
||||
right->Parent = node;
|
||||
node->Right = right;
|
||||
right->Range = vtkm::Id2(midpoint, node->Range[1]);
|
||||
ScanBody::InitializeChildNode(right, node, ChildType::Right, false);
|
||||
|
||||
// Intel compilers seem to have trouble following the 'this' pointer
|
||||
// when launching tasks, resulting in a corrupt task environment.
|
||||
// Explicitly copying the pointer into a local variable seems to fix this.
|
||||
auto explicitThis = this;
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(taskgroup)
|
||||
{
|
||||
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->Scan(right); } // end right task
|
||||
|
||||
Node* left = this->AllocNode();
|
||||
left->Parent = node;
|
||||
node->Left = left;
|
||||
left->Range = vtkm::Id2(node->Range[0], midpoint);
|
||||
left->LeftEdge = node->LeftEdge;
|
||||
ScanBody::InitializeChildNode(left, node, ChildType::Left, left->LeftEdge);
|
||||
this->Scan(left);
|
||||
|
||||
} // end task group. Both l/r sums will be finished here.
|
||||
|
||||
ScanBody::CombineSummaries(node, node->Left, node->Right);
|
||||
if (node->LeftEdge)
|
||||
{
|
||||
this->UpdateOutput(node);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // Compute sums:
|
||||
ScanBody::ComputeSummary(node, node->Range, node->LeftEdge);
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateOutput(Node* node)
|
||||
{
|
||||
if (node->Left != nullptr)
|
||||
{
|
||||
assert(node->Right != nullptr);
|
||||
ScanBody::PropagateSummaries(node, node->Left, node->Right, node->LeftEdge);
|
||||
|
||||
// if this node is on the left edge, we know that the left child's
|
||||
// output is already updated, so only descend to the right:
|
||||
if (node->LeftEdge)
|
||||
{
|
||||
this->UpdateOutput(node->Right);
|
||||
}
|
||||
else // Otherwise descent into both:
|
||||
{
|
||||
// Intel compilers seem to have trouble following the 'this' pointer
|
||||
// when launching tasks, resulting in a corrupt task environment.
|
||||
// Explicitly copying the pointer into a local variable seems to fix
|
||||
// this.
|
||||
auto explicitThis = this;
|
||||
|
||||
// no taskgroup/sync needed other than the final barrier of the parallel
|
||||
// section.
|
||||
VTKM_OPENMP_DIRECTIVE(task) { explicitThis->UpdateOutput(node->Right); } // end task
|
||||
this->UpdateOutput(node->Left);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ScanBody::UpdateOutput(node, node->Range, node->LeftEdge);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <typename InPortalT, typename OutPortalT, typename RawFunctorT>
|
||||
struct ScanExclusiveBody
|
||||
{
|
||||
using ValueType = typename InPortalT::ValueType;
|
||||
using FunctorType = internal::WrappedBinaryOperator<ValueType, RawFunctorT>;
|
||||
|
||||
InPortalT InPortal;
|
||||
OutPortalT OutPortal;
|
||||
FunctorType Functor;
|
||||
ValueType InitialValue;
|
||||
|
||||
struct Node
|
||||
{
|
||||
// Sum of all values in range
|
||||
ValueType Sum{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
|
||||
|
||||
// The sum of all elements prior to this node's range
|
||||
ValueType Carry{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
|
||||
};
|
||||
|
||||
ScanExclusiveBody(const InPortalT& inPortal,
|
||||
const OutPortalT& outPortal,
|
||||
const RawFunctorT& functor,
|
||||
const ValueType& init)
|
||||
: InPortal(inPortal)
|
||||
, OutPortal(outPortal)
|
||||
, Functor(functor)
|
||||
, InitialValue(init)
|
||||
{
|
||||
}
|
||||
|
||||
// Initialize the root of the node tree
|
||||
void InitializeRootNode(Node* /*root*/) {}
|
||||
|
||||
void InitializeChildNode(Node* /*node*/,
|
||||
const Node* /*parent*/,
|
||||
ChildType /*type*/,
|
||||
bool /*leftEdge*/)
|
||||
{
|
||||
}
|
||||
|
||||
void ComputeSummary(Node* node, const vtkm::Id2& range, bool leftEdge)
|
||||
{
|
||||
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
|
||||
node->Sum = input[range[0]];
|
||||
|
||||
// If this block is on the left edge, we can update the output while we
|
||||
// compute the sum:
|
||||
if (leftEdge)
|
||||
{
|
||||
// Set leftEdge arg to false to force the update:
|
||||
node->Sum = UpdateOutputImpl(node, range, false, true);
|
||||
}
|
||||
else // Otherwise, only compute the sum and update the output in pass 2.
|
||||
{
|
||||
for (vtkm::Id i = range[0] + 1; i < range[1]; ++i)
|
||||
{
|
||||
node->Sum = this->Functor(node->Sum, input[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CombineSummaries(Node* parent, const Node* left, const Node* right)
|
||||
{
|
||||
parent->Sum = this->Functor(left->Sum, right->Sum);
|
||||
}
|
||||
|
||||
void PropagateSummaries(const Node* parent, Node* left, Node* right, bool leftEdge)
|
||||
{
|
||||
left->Carry = parent->Carry;
|
||||
right->Carry = leftEdge ? left->Sum : this->Functor(parent->Carry, left->Sum);
|
||||
}
|
||||
|
||||
void UpdateOutput(const Node* node, const vtkm::Id2& range, bool leftEdge)
|
||||
{
|
||||
this->UpdateOutputImpl(node, range, leftEdge, false);
|
||||
}
|
||||
|
||||
ValueType UpdateOutputImpl(const Node* node, const vtkm::Id2& range, bool skip, bool useInit)
|
||||
{
|
||||
if (skip)
|
||||
{
|
||||
// Do nothing; this was already done in ComputeSummary.
|
||||
return vtkm::TypeTraits<ValueType>::ZeroInitialization();
|
||||
}
|
||||
|
||||
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
|
||||
auto output = vtkm::cont::ArrayPortalToIteratorBegin(this->OutPortal);
|
||||
|
||||
// Be careful with the order input/output are modified. They might be
|
||||
// pointing at the same data:
|
||||
ValueType carry = useInit ? this->InitialValue : node->Carry;
|
||||
vtkm::Id end = range[1];
|
||||
|
||||
for (vtkm::Id i = range[0]; i < end; ++i)
|
||||
{
|
||||
output[i] = this->Functor(carry, input[i]);
|
||||
|
||||
using std::swap; // Enable ADL
|
||||
swap(output[i], carry);
|
||||
}
|
||||
|
||||
return carry;
|
||||
}
|
||||
|
||||
// Compute the final sum from the node's metadata:
|
||||
ValueType GetFinalResult(const Node* node) const { return this->Functor(node->Sum, node->Carry); }
|
||||
};
|
||||
|
||||
template <typename InPortalT, typename OutPortalT, typename RawFunctorT>
|
||||
struct ScanInclusiveBody
|
||||
{
|
||||
using ValueType = typename InPortalT::ValueType;
|
||||
using FunctorType = internal::WrappedBinaryOperator<ValueType, RawFunctorT>;
|
||||
|
||||
InPortalT InPortal;
|
||||
OutPortalT OutPortal;
|
||||
FunctorType Functor;
|
||||
|
||||
struct Node
|
||||
{
|
||||
// Sum of all values in range
|
||||
ValueType Sum{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
|
||||
|
||||
// The sum of all elements prior to this node's range
|
||||
ValueType Carry{ vtkm::TypeTraits<ValueType>::ZeroInitialization() };
|
||||
};
|
||||
|
||||
ScanInclusiveBody(const InPortalT& inPortal,
|
||||
const OutPortalT& outPortal,
|
||||
const RawFunctorT& functor)
|
||||
: InPortal(inPortal)
|
||||
, OutPortal(outPortal)
|
||||
, Functor(functor)
|
||||
{
|
||||
}
|
||||
|
||||
// Initialize the root of the node tree
|
||||
void InitializeRootNode(Node*)
|
||||
{
|
||||
// no-op
|
||||
}
|
||||
|
||||
void InitializeChildNode(Node*, const Node*, ChildType, bool)
|
||||
{
|
||||
// no-op
|
||||
}
|
||||
|
||||
void ComputeSummary(Node* node, const vtkm::Id2& range, bool leftEdge)
|
||||
{
|
||||
// If this block is on the left edge, we can update the output while we
|
||||
// compute the sum:
|
||||
if (leftEdge)
|
||||
{
|
||||
node->Sum = UpdateOutputImpl(node, range, false, false);
|
||||
}
|
||||
else // Otherwise, only compute the sum and update the output in pass 2.
|
||||
{
|
||||
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
|
||||
node->Sum = input[range[0]];
|
||||
for (vtkm::Id i = range[0] + 1; i < range[1]; ++i)
|
||||
{
|
||||
node->Sum = this->Functor(node->Sum, input[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CombineSummaries(Node* parent, const Node* left, const Node* right)
|
||||
{
|
||||
parent->Sum = this->Functor(left->Sum, right->Sum);
|
||||
}
|
||||
|
||||
void PropagateSummaries(const Node* parent, Node* left, Node* right, bool leftEdge)
|
||||
{
|
||||
left->Carry = parent->Carry;
|
||||
right->Carry = leftEdge ? left->Sum : this->Functor(parent->Carry, left->Sum);
|
||||
}
|
||||
|
||||
void UpdateOutput(const Node* node, const vtkm::Id2& range, bool leftEdge)
|
||||
{
|
||||
UpdateOutputImpl(node, range, leftEdge, true);
|
||||
}
|
||||
|
||||
ValueType UpdateOutputImpl(const Node* node, const vtkm::Id2& range, bool skip, bool useCarry)
|
||||
{
|
||||
if (skip)
|
||||
{
|
||||
// Do nothing; this was already done in ComputeSummary.
|
||||
return vtkm::TypeTraits<ValueType>::ZeroInitialization();
|
||||
}
|
||||
|
||||
auto input = vtkm::cont::ArrayPortalToIteratorBegin(this->InPortal);
|
||||
auto output = vtkm::cont::ArrayPortalToIteratorBegin(this->OutPortal);
|
||||
|
||||
vtkm::Id start = range[0];
|
||||
vtkm::Id end = range[1];
|
||||
ValueType carry = node->Carry;
|
||||
|
||||
// Initialize with the first value if this is the first range:
|
||||
if (!useCarry && start < end)
|
||||
{
|
||||
carry = input[start];
|
||||
output[start] = carry;
|
||||
++start;
|
||||
}
|
||||
|
||||
for (vtkm::Id i = start; i < end; ++i)
|
||||
{
|
||||
output[i] = this->Functor(carry, input[i]);
|
||||
carry = output[i];
|
||||
}
|
||||
|
||||
return output[end - 1];
|
||||
}
|
||||
|
||||
// Compute the final sum from the node's metadata:
|
||||
ValueType GetFinalResult(const Node* node) const { return node->Sum; }
|
||||
};
|
||||
|
||||
} // end namespace scan
|
||||
|
||||
template <typename InPortalT, typename OutPortalT, typename FunctorT>
|
||||
using ScanExclusiveHelper = scan::Adder<scan::ScanExclusiveBody<InPortalT, OutPortalT, FunctorT>>;
|
||||
|
||||
template <typename InPortalT, typename OutPortalT, typename FunctorT>
|
||||
using ScanInclusiveHelper = scan::Adder<scan::ScanInclusiveBody<InPortalT, OutPortalT, FunctorT>>;
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::openmp
|
251
vtkm/cont/openmp/internal/ParallelSortOpenMP.h
Normal file
251
vtkm/cont/openmp/internal/ParallelSortOpenMP.h
Normal file
@ -0,0 +1,251 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#include <vtkm/cont/openmp/internal/ArrayManagerExecutionOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/FunctorsOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/ParallelQuickSortOpenMP.h>
|
||||
#include <vtkm/cont/openmp/internal/ParallelRadixSortOpenMP.h>
|
||||
|
||||
#include <vtkm/BinaryPredicates.h>
|
||||
#include <vtkm/cont/ArrayHandle.h>
|
||||
#include <vtkm/cont/ArrayHandleIndex.h>
|
||||
#include <vtkm/cont/ArrayHandleZip.h>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace openmp
|
||||
{
|
||||
namespace sort
|
||||
{
|
||||
|
||||
// Forward declare entry points (See stack overflow discussion 7255281 --
|
||||
// templated overloads of template functions are not specialization, and will
|
||||
// be resolved during the first phase of two part lookup).
|
||||
template <typename T, typename Container, class BinaryCompare>
|
||||
void parallel_sort(vtkm::cont::ArrayHandle<T, Container>&, BinaryCompare);
|
||||
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
|
||||
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>&,
|
||||
vtkm::cont::ArrayHandle<U, StorageU>&,
|
||||
BinaryCompare);
|
||||
|
||||
// Quicksort values:
|
||||
template <typename HandleType, class BinaryCompare>
|
||||
void parallel_sort(HandleType& values,
|
||||
BinaryCompare binary_compare,
|
||||
vtkm::cont::internal::radix::PSortTag)
|
||||
{
|
||||
auto portal = values.PrepareForInPlace(DeviceAdapterTagOpenMP());
|
||||
auto iter = vtkm::cont::ArrayPortalToIteratorBegin(portal);
|
||||
vtkm::Id2 range(0, values.GetNumberOfValues());
|
||||
|
||||
using IterType = typename std::decay<decltype(iter)>::type;
|
||||
using Sorter = quick::QuickSorter<IterType, BinaryCompare>;
|
||||
|
||||
Sorter sorter(iter, binary_compare);
|
||||
sorter.Execute(range);
|
||||
}
|
||||
|
||||
// Radix sort values:
|
||||
template <typename T, typename StorageT, class BinaryCompare>
|
||||
void parallel_sort(vtkm::cont::ArrayHandle<T, StorageT>& values,
|
||||
BinaryCompare binary_compare,
|
||||
vtkm::cont::internal::radix::RadixSortTag)
|
||||
{
|
||||
auto c = vtkm::cont::internal::radix::get_std_compare(binary_compare, T{});
|
||||
radix::parallel_radix_sort(
|
||||
values.GetStorage().GetArray(), static_cast<std::size_t>(values.GetNumberOfValues()), c);
|
||||
}
|
||||
|
||||
// Value sort -- static switch between quicksort & radix sort
|
||||
template <typename T, typename Container, class BinaryCompare>
|
||||
void parallel_sort(vtkm::cont::ArrayHandle<T, Container>& values, BinaryCompare binary_compare)
|
||||
{
|
||||
using namespace vtkm::cont::internal::radix;
|
||||
using SortAlgorithmTag = typename sort_tag_type<T, Container, BinaryCompare>::type;
|
||||
|
||||
parallel_sort(values, binary_compare, SortAlgorithmTag{});
|
||||
}
|
||||
|
||||
// Quicksort by key:
|
||||
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
|
||||
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||
vtkm::cont::ArrayHandle<U, StorageU>& values,
|
||||
BinaryCompare binary_compare,
|
||||
vtkm::cont::internal::radix::PSortTag)
|
||||
{
|
||||
using KeyType = vtkm::cont::ArrayHandle<T, StorageT>;
|
||||
constexpr bool larger_than_64bits = sizeof(U) > sizeof(vtkm::Int64);
|
||||
if (larger_than_64bits)
|
||||
{
|
||||
/// More efficient sort:
|
||||
/// Move value indexes when sorting and reorder the value array at last
|
||||
|
||||
using ValueType = vtkm::cont::ArrayHandle<U, StorageU>;
|
||||
using IndexType = vtkm::cont::ArrayHandle<vtkm::Id>;
|
||||
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, IndexType>;
|
||||
|
||||
IndexType indexArray;
|
||||
ValueType valuesScattered;
|
||||
const vtkm::Id size = values.GetNumberOfValues();
|
||||
|
||||
// Generate an in-memory index array:
|
||||
{
|
||||
auto handle = ArrayHandleIndex(keys.GetNumberOfValues());
|
||||
auto inputPortal = handle.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto outputPortal =
|
||||
indexArray.PrepareForOutput(keys.GetNumberOfValues(), DeviceAdapterTagOpenMP());
|
||||
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, keys.GetNumberOfValues());
|
||||
}
|
||||
|
||||
// Sort the keys and indicies:
|
||||
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, indexArray);
|
||||
parallel_sort(zipHandle,
|
||||
vtkm::cont::internal::KeyCompare<T, vtkm::Id, BinaryCompare>(binary_compare),
|
||||
vtkm::cont::internal::radix::PSortTag());
|
||||
|
||||
// Permute the values to their sorted locations:
|
||||
{
|
||||
auto valuesInPortal = values.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto indexPortal = indexArray.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto valuesOutPortal = valuesScattered.PrepareForOutput(size, DeviceAdapterTagOpenMP());
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel for
|
||||
default(none)
|
||||
firstprivate(valuesInPortal, indexPortal, valuesOutPortal)
|
||||
schedule(static))
|
||||
for (vtkm::Id i = 0; i < size; ++i)
|
||||
{
|
||||
valuesOutPortal.Set(i, valuesInPortal.Get(indexPortal.Get(i)));
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the values back into the input array:
|
||||
{
|
||||
auto inputPortal = valuesScattered.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto outputPortal =
|
||||
values.PrepareForOutput(valuesScattered.GetNumberOfValues(), DeviceAdapterTagOpenMP());
|
||||
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, size);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
using ValueType = vtkm::cont::ArrayHandle<U, StorageU>;
|
||||
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, ValueType>;
|
||||
|
||||
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, values);
|
||||
parallel_sort(zipHandle,
|
||||
vtkm::cont::internal::KeyCompare<T, U, BinaryCompare>(binary_compare),
|
||||
vtkm::cont::internal::radix::PSortTag{});
|
||||
}
|
||||
}
|
||||
|
||||
// Radix sort by key:
|
||||
template <typename T, typename StorageT, typename StorageU, class BinaryCompare>
|
||||
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||
vtkm::cont::ArrayHandle<vtkm::Id, StorageU>& values,
|
||||
BinaryCompare binary_compare,
|
||||
vtkm::cont::internal::radix::RadixSortTag)
|
||||
{
|
||||
using namespace vtkm::cont::internal::radix;
|
||||
auto c = get_std_compare(binary_compare, T{});
|
||||
radix::parallel_radix_sort_key_values(keys.GetStorage().GetArray(),
|
||||
values.GetStorage().GetArray(),
|
||||
static_cast<std::size_t>(keys.GetNumberOfValues()),
|
||||
c);
|
||||
}
|
||||
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
|
||||
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||
vtkm::cont::ArrayHandle<U, StorageU>& values,
|
||||
BinaryCompare binary_compare,
|
||||
vtkm::cont::internal::radix::RadixSortTag)
|
||||
{
|
||||
using KeyType = vtkm::cont::ArrayHandle<T, vtkm::cont::StorageTagBasic>;
|
||||
using ValueType = vtkm::cont::ArrayHandle<U, vtkm::cont::StorageTagBasic>;
|
||||
using IndexType = vtkm::cont::ArrayHandle<vtkm::Id, vtkm::cont::StorageTagBasic>;
|
||||
using ZipHandleType = vtkm::cont::ArrayHandleZip<KeyType, IndexType>;
|
||||
|
||||
IndexType indexArray;
|
||||
ValueType valuesScattered;
|
||||
const vtkm::Id size = values.GetNumberOfValues();
|
||||
|
||||
{
|
||||
auto handle = ArrayHandleIndex(keys.GetNumberOfValues());
|
||||
auto inputPortal = handle.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto outputPortal =
|
||||
indexArray.PrepareForOutput(keys.GetNumberOfValues(), DeviceAdapterTagOpenMP());
|
||||
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, keys.GetNumberOfValues());
|
||||
}
|
||||
|
||||
const vtkm::Id valuesBytes = static_cast<vtkm::Id>(sizeof(T)) * keys.GetNumberOfValues();
|
||||
if (valuesBytes > static_cast<vtkm::Id>(vtkm::cont::internal::radix::MIN_BYTES_FOR_PARALLEL))
|
||||
{
|
||||
parallel_sort_bykey(keys, indexArray, binary_compare);
|
||||
}
|
||||
else
|
||||
{
|
||||
ZipHandleType zipHandle = vtkm::cont::make_ArrayHandleZip(keys, indexArray);
|
||||
parallel_sort(zipHandle,
|
||||
vtkm::cont::internal::KeyCompare<T, vtkm::Id, BinaryCompare>(binary_compare),
|
||||
vtkm::cont::internal::radix::PSortTag());
|
||||
}
|
||||
|
||||
// Permute the values to their sorted locations:
|
||||
{
|
||||
auto valuesInPortal = values.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto indexPortal = indexArray.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto valuesOutPortal = valuesScattered.PrepareForOutput(size, DeviceAdapterTagOpenMP());
|
||||
|
||||
VTKM_OPENMP_DIRECTIVE(parallel for
|
||||
default(none)
|
||||
firstprivate(valuesInPortal, indexPortal, valuesOutPortal)
|
||||
schedule(static))
|
||||
for (vtkm::Id i = 0; i < size; ++i)
|
||||
{
|
||||
valuesOutPortal.Set(i, valuesInPortal.Get(indexPortal.Get(i)));
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto inputPortal = valuesScattered.PrepareForInput(DeviceAdapterTagOpenMP());
|
||||
auto outputPortal =
|
||||
values.PrepareForOutput(valuesScattered.GetNumberOfValues(), DeviceAdapterTagOpenMP());
|
||||
openmp::CopyHelper(inputPortal, outputPortal, 0, 0, valuesScattered.GetNumberOfValues());
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by key -- static switch between radix and quick sort:
|
||||
template <typename T, typename StorageT, typename U, typename StorageU, class BinaryCompare>
|
||||
void parallel_sort_bykey(vtkm::cont::ArrayHandle<T, StorageT>& keys,
|
||||
vtkm::cont::ArrayHandle<U, StorageU>& values,
|
||||
BinaryCompare binary_compare)
|
||||
{
|
||||
using namespace vtkm::cont::internal::radix;
|
||||
using SortAlgorithmTag =
|
||||
typename sortbykey_tag_type<T, U, StorageT, StorageU, BinaryCompare>::type;
|
||||
parallel_sort_bykey(keys, values, binary_compare, SortAlgorithmTag{});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end namespace vtkm::cont::openmp::sort
|
50
vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h
Normal file
50
vtkm/cont/openmp/internal/VirtualObjectTransferOpenMP.h
Normal file
@ -0,0 +1,50 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#ifndef vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h
|
||||
#define vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h
|
||||
|
||||
#include <vtkm/cont/internal/VirtualObjectTransfer.h>
|
||||
#include <vtkm/cont/internal/VirtualObjectTransferShareWithControl.h>
|
||||
#include <vtkm/cont/openmp/internal/DeviceAdapterTagOpenMP.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace cont
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
template <typename VirtualDerivedType>
|
||||
struct VirtualObjectTransfer<VirtualDerivedType, vtkm::cont::DeviceAdapterTagOpenMP>
|
||||
: VirtualObjectTransferShareWithControl<VirtualDerivedType>
|
||||
{
|
||||
VTKM_CONT VirtualObjectTransfer(const VirtualDerivedType* virtualObject)
|
||||
: VirtualObjectTransferShareWithControl<VirtualDerivedType>(virtualObject)
|
||||
{
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
} // vtkm::cont::internal
|
||||
|
||||
|
||||
|
||||
#endif // vtk_m_cont_openmp_internal_VirtualObjectTransferOpenMP_h
|
33
vtkm/cont/openmp/testing/CMakeLists.txt
Normal file
33
vtkm/cont/openmp/testing/CMakeLists.txt
Normal file
@ -0,0 +1,33 @@
|
||||
##============================================================================
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
## Copyright 2018 UT-Battelle, LLC.
|
||||
## Copyright 2018 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
##
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##============================================================================
|
||||
|
||||
set(unit_tests
|
||||
UnitTestOpenMPArrayHandle.cxx
|
||||
UnitTestOpenMPArrayHandleFancy.cxx
|
||||
UnitTestOpenMPCellLocatorTwoLevelUniformGrid.cxx
|
||||
UnitTestOpenMPComputeRange.cxx
|
||||
UnitTestOpenMPDataSetExplicit.cxx
|
||||
UnitTestOpenMPDataSetSingleType.cxx
|
||||
UnitTestOpenMPDeviceAdapter.cxx
|
||||
UnitTestOpenMPImplicitFunction.cxx
|
||||
UnitTestOpenMPPointLocatorUniformGrid.cxx
|
||||
UnitTestOpenMPVirtualObjectHandle.cxx
|
||||
)
|
||||
vtkm_unit_tests(OpenMP SOURCES ${unit_tests})
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPArrayHandle.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPArrayHandle.cxx
Normal file
@ -0,0 +1,31 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingArrayHandles.h>
|
||||
|
||||
int UnitTestOpenMPArrayHandle(int, char* [])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
return vtkm::cont::testing::TestingArrayHandles<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||
}
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPArrayHandleFancy.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPArrayHandleFancy.cxx
Normal file
@ -0,0 +1,31 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingFancyArrayHandles.h>
|
||||
|
||||
int UnitTestOpenMPArrayHandleFancy(int, char* [])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
return vtkm::cont::testing::TestingFancyArrayHandles<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingCellLocatorTwoLevelUniformGrid.h>
|
||||
|
||||
int UnitTestOpenMPCellLocatorTwoLevelUniformGrid(int, char* [])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
return vtkm::cont::testing::Testing::Run(
|
||||
TestingCellLocatorTwoLevelUniformGrid<vtkm::cont::DeviceAdapterTagOpenMP>);
|
||||
}
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPComputeRange.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPComputeRange.cxx
Normal file
@ -0,0 +1,31 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingComputeRange.h>
|
||||
|
||||
int UnitTestOpenMPComputeRange(int, char* [])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
return vtkm::cont::testing::TestingComputeRange<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||
}
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPDataSetExplicit.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPDataSetExplicit.cxx
Normal file
@ -0,0 +1,31 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingDataSetExplicit.h>
|
||||
|
||||
int UnitTestOpenMPDataSetExplicit(int, char* [])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
return vtkm::cont::testing::TestingDataSetExplicit<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||
}
|
31
vtkm/cont/openmp/testing/UnitTestOpenMPDataSetSingleType.cxx
Normal file
31
vtkm/cont/openmp/testing/UnitTestOpenMPDataSetSingleType.cxx
Normal file
@ -0,0 +1,31 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingDataSetSingleType.h>
|
||||
|
||||
int UnitTestOpenMPDataSetSingleType(int, char* [])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
return vtkm::cont::testing::TestingDataSetSingleType<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||
}
|
32
vtkm/cont/openmp/testing/UnitTestOpenMPDeviceAdapter.cxx
Normal file
32
vtkm/cont/openmp/testing/UnitTestOpenMPDeviceAdapter.cxx
Normal file
@ -0,0 +1,32 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/RuntimeDeviceTracker.h>
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingDeviceAdapter.h>
|
||||
|
||||
int UnitTestOpenMPDeviceAdapter(int, char* [])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
return vtkm::cont::testing::TestingDeviceAdapter<vtkm::cont::DeviceAdapterTagOpenMP>::Run();
|
||||
}
|
43
vtkm/cont/openmp/testing/UnitTestOpenMPImplicitFunction.cxx
Normal file
43
vtkm/cont/openmp/testing/UnitTestOpenMPImplicitFunction.cxx
Normal file
@ -0,0 +1,43 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingImplicitFunction.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void TestImplicitFunctions()
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
|
||||
vtkm::cont::testing::TestingImplicitFunction testing;
|
||||
testing.Run(vtkm::cont::DeviceAdapterTagOpenMP());
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int UnitTestOpenMPImplicitFunction(int, char* [])
|
||||
{
|
||||
return vtkm::cont::testing::Testing::Run(TestImplicitFunctions);
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingPointLocatorUniformGrid.h>
|
||||
|
||||
int UnitTestOpenMPPointLocatorUniformGrid(int, char* [])
|
||||
{
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
|
||||
return vtkm::cont::testing::Testing::Run(
|
||||
TestingPointLocatorUniformGrid<vtkm::cont::DeviceAdapterTagOpenMP>());
|
||||
}
|
@ -0,0 +1,49 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/cont/testing/TestingVirtualObjectHandle.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void TestVirtualObjectHandle()
|
||||
{
|
||||
using DeviceAdapterList = vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagOpenMP>;
|
||||
using DeviceAdapterList2 =
|
||||
vtkm::ListTagBase<vtkm::cont::DeviceAdapterTagSerial, vtkm::cont::DeviceAdapterTagOpenMP>;
|
||||
|
||||
auto tracker = vtkm::cont::GetGlobalRuntimeDeviceTracker();
|
||||
|
||||
tracker.ForceDevice(vtkm::cont::DeviceAdapterTagOpenMP{});
|
||||
vtkm::cont::testing::TestingVirtualObjectHandle<DeviceAdapterList>::Run();
|
||||
|
||||
tracker.Reset();
|
||||
vtkm::cont::testing::TestingVirtualObjectHandle<DeviceAdapterList2>::Run();
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
int UnitTestOpenMPVirtualObjectHandle(int, char* [])
|
||||
{
|
||||
return vtkm::cont::testing::Testing::Run(TestVirtualObjectHandle);
|
||||
}
|
@ -52,6 +52,7 @@ vtkm_declare_headers(${header_impls} TESTABLE OFF)
|
||||
#-----------------------------------------------------------------------------
|
||||
add_subdirectory(serial)
|
||||
add_subdirectory(tbb)
|
||||
add_subdirectory(openmp)
|
||||
add_subdirectory(cuda)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
22
vtkm/exec/openmp/CMakeLists.txt
Normal file
22
vtkm/exec/openmp/CMakeLists.txt
Normal file
@ -0,0 +1,22 @@
|
||||
##============================================================================
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
## Copyright 2018 UT-Battelle, LLC.
|
||||
## Copyright 2018 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
##
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##============================================================================
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
add_subdirectory(internal)
|
30
vtkm/exec/openmp/internal/CMakeLists.txt
Normal file
30
vtkm/exec/openmp/internal/CMakeLists.txt
Normal file
@ -0,0 +1,30 @@
|
||||
##============================================================================
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
## Copyright 2018 UT-Battelle, LLC.
|
||||
## Copyright 2018 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
##
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##============================================================================
|
||||
|
||||
set(headers
|
||||
TaskTilingOpenMP.h
|
||||
)
|
||||
|
||||
vtkm_declare_headers(${headers})
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
if (VTKm_ENABLE_OPENMP)
|
||||
add_subdirectory(testing)
|
||||
endif()
|
41
vtkm/exec/openmp/internal/TaskTilingOpenMP.h
Normal file
41
vtkm/exec/openmp/internal/TaskTilingOpenMP.h
Normal file
@ -0,0 +1,41 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
#ifndef vtk_m_exec_openmp_internal_TaskTilingOpenMP_h
|
||||
#define vtk_m_exec_openmp_internal_TaskTilingOpenMP_h
|
||||
|
||||
#include <vtkm/exec/serial/internal/TaskTiling.h>
|
||||
|
||||
namespace vtkm
|
||||
{
|
||||
namespace exec
|
||||
{
|
||||
namespace openmp
|
||||
{
|
||||
namespace internal
|
||||
{
|
||||
|
||||
using TaskTiling1D = vtkm::exec::serial::internal::TaskTiling1D;
|
||||
using TaskTiling3D = vtkm::exec::serial::internal::TaskTiling3D;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace vtkm::exec::tbb::internal
|
||||
|
||||
#endif //vtk_m_exec_tbb_internal_TaskTiling_h
|
27
vtkm/exec/openmp/internal/testing/CMakeLists.txt
Normal file
27
vtkm/exec/openmp/internal/testing/CMakeLists.txt
Normal file
@ -0,0 +1,27 @@
|
||||
##=============================================================================
|
||||
##
|
||||
## Copyright (c) Kitware, Inc.
|
||||
## All rights reserved.
|
||||
## See LICENSE.txt for details.
|
||||
##
|
||||
## This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
## PURPOSE. See the above copyright notice for more information.
|
||||
##
|
||||
## Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
## Copyright 2018 UT-Battelle, LLC.
|
||||
## Copyright 2018 Los Alamos National Security.
|
||||
##
|
||||
## Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
## the U.S. Government retains certain rights in this software.
|
||||
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
## Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
## this software.
|
||||
##
|
||||
##=============================================================================
|
||||
|
||||
set(unit_tests
|
||||
UnitTestTaskTilingOpenMP.cxx
|
||||
)
|
||||
|
||||
vtkm_unit_tests(SOURCES ${unit_tests})
|
@ -0,0 +1,30 @@
|
||||
//============================================================================
|
||||
// Copyright (c) Kitware, Inc.
|
||||
// All rights reserved.
|
||||
// See LICENSE.txt for details.
|
||||
// This software is distributed WITHOUT ANY WARRANTY; without even
|
||||
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
// PURPOSE. See the above copyright notice for more information.
|
||||
//
|
||||
// Copyright 2018 National Technology & Engineering Solutions of Sandia, LLC (NTESS).
|
||||
// Copyright 2018 UT-Battelle, LLC.
|
||||
// Copyright 2018 Los Alamos National Security.
|
||||
//
|
||||
// Under the terms of Contract DE-NA0003525 with NTESS,
|
||||
// the U.S. Government retains certain rights in this software.
|
||||
//
|
||||
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
|
||||
// Laboratory (LANL), the U.S. Government retains certain rights in
|
||||
// this software.
|
||||
//============================================================================
|
||||
|
||||
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
|
||||
|
||||
#include <vtkm/cont/openmp/DeviceAdapterOpenMP.h>
|
||||
#include <vtkm/exec/internal/testing/TestingTaskTiling.h>
|
||||
|
||||
int UnitTestTaskTilingOpenMP(int, char* [])
|
||||
{
|
||||
return vtkm::cont::testing::Testing::Run(
|
||||
vtkm::exec::internal::testing::TestTaskTiling<vtkm::cont::DeviceAdapterTagOpenMP>);
|
||||
}
|
@ -29,6 +29,7 @@ set(VTKM_USE_64BIT_IDS ${VTKm_USE_64BIT_IDS})
|
||||
|
||||
set(VTKM_ENABLE_CUDA ${VTKm_ENABLE_CUDA})
|
||||
set(VTKM_ENABLE_TBB ${VTKm_ENABLE_TBB})
|
||||
set(VTKM_ENABLE_OPENMP ${VTKm_ENABLE_OPENMP})
|
||||
set(VTKM_ENABLE_MPI ${VTKm_ENABLE_MPI})
|
||||
|
||||
if(VTKM_ENABLE_CUDA)
|
||||
|
@ -229,6 +229,10 @@
|
||||
#ifndef VTKM_ENABLE_TBB
|
||||
#cmakedefine VTKM_ENABLE_TBB
|
||||
#endif
|
||||
//Mark if we are building with OpenMP enabled
|
||||
#ifndef VTKM_ENABLE_OPENMP
|
||||
#cmakedefine VTKM_ENABLE_OPENMP
|
||||
#endif
|
||||
|
||||
//Mark if we are building with MPI enabled.
|
||||
#cmakedefine VTKM_ENABLE_MPI
|
||||
|
@ -905,6 +905,10 @@ template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<
|
||||
template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagTBB>(
|
||||
vtkm::cont::DeviceAdapterTagTBB);
|
||||
#endif
|
||||
#ifdef VTKM_ENABLE_OPENMP
|
||||
template VTKM_CONT_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagOpenMP>(
|
||||
vtkm::cont::DeviceAdapterTagOpenMP);
|
||||
#endif
|
||||
#ifdef VTKM_ENABLE_CUDA
|
||||
template VTKM_RENDERING_EXPORT void LinearBVH::ConstructOnDevice<vtkm::cont::DeviceAdapterTagCuda>(
|
||||
vtkm::cont::DeviceAdapterTagCuda);
|
||||
|
@ -87,6 +87,13 @@ inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagTBB>(vtkm::cont::
|
||||
return "tbb";
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagOpenMP>(
|
||||
vtkm::cont::DeviceAdapterTagOpenMP)
|
||||
{
|
||||
return "openmp";
|
||||
}
|
||||
|
||||
template <>
|
||||
inline std::string GetDeviceString<vtkm::cont::DeviceAdapterTagCuda>(
|
||||
vtkm::cont::DeviceAdapterTagCuda)
|
||||
|
Loading…
Reference in New Issue
Block a user