Add TBB backend.

This commit is contained in:
Sujin Philip 2015-05-29 10:38:28 -04:00
parent e09b936b23
commit 08f88b1cb9
22 changed files with 1546 additions and 65 deletions

345
CMake/FindTBB.cmake Normal file

@ -0,0 +1,345 @@
# - Find ThreadingBuildingBlocks include dirs and libraries
# Use this module by invoking find_package with the form:
# find_package(TBB
# [REQUIRED] # Fail with error if TBB is not found
# ) #
# Once done, this will define
#
# TBB_FOUND - system has TBB
# TBB_INCLUDE_DIRS - the TBB include directories
# TBB_LIBRARIES - TBB libraries to be lined, doesn't include malloc or
# malloc proxy
#
# TBB_VERSION_MAJOR - Major Product Version Number
# TBB_VERSION_MINOR - Minor Product Version Number
# TBB_INTERFACE_VERSION - Engineering Focused Version Number
# TBB_COMPATIBLE_INTERFACE_VERSION - The oldest major interface version
# still supported. This uses the engineering
# focused interface version numbers.
#
# TBB_MALLOC_FOUND - system has TBB malloc library
# TBB_MALLOC_INCLUDE_DIRS - the TBB malloc include directories
# TBB_MALLOC_LIBRARIES - The TBB malloc libraries to be lined
#
# TBB_MALLOC_PROXY_FOUND - system has TBB malloc proxy library
# TBB_MALLOC_PROXY_INCLUDE_DIRS = the TBB malloc proxy include directories
# TBB_MALLOC_PROXY_LIBRARIES - The TBB malloc proxy libraries to be lined
#
#
# This module reads hints about search locations from variables:
# ENV TBB_ARCH_PLATFORM - for eg. set it to "mic" for Xeon Phi builds
# ENV TBB_ROOT or just TBB_ROOT - root directory of tbb installation
# ENV TBB_BUILD_PREFIX - specifies the build prefix for user built tbb
# libraries. Should be specified with ENV TBB_ROOT
# and optionally...
# ENV TBB_BUILD_DIR - if build directory is different than ${TBB_ROOT}/build
#
#
# Modified by Robert Maynard from the original OGRE source
#
#-------------------------------------------------------------------
# This file is part of the CMake build system for OGRE
# (Object-oriented Graphics Rendering Engine)
# For the latest info, see http://www.ogre3d.org/
#
# The contents of this file are placed in the public domain. Feel
# free to make use of it in any way you like.
#-------------------------------------------------------------------
#
#=============================================================================
# Copyright 2010-2012 Kitware, Inc.
# Copyright 2012 Rolf Eike Beer <eike@sf-mail.de>
#
# Distributed under the OSI-approved BSD License (the "License");
# see accompanying file Copyright.txt for details.
#
# This software is distributed WITHOUT ANY WARRANTY; without even the
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the License for more information.
#=============================================================================
# (To distribute this file outside of CMake, substitute the full
# License text for the above reference.)
#=============================================================================
# FindTBB helper functions and macros
#
#===============================================
# Do the final processing for the package find.
#===============================================
macro(findpkg_finish PREFIX)
# skip if already processed during this run
if (NOT ${PREFIX}_FOUND)
if (${PREFIX}_INCLUDE_DIR AND ${PREFIX}_LIBRARY)
set(${PREFIX}_FOUND TRUE)
set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIR})
set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARY})
else ()
if (${PREFIX}_FIND_REQUIRED AND NOT ${PREFIX}_FIND_QUIETLY)
message(FATAL_ERROR "Required library ${PREFIX} not found.")
endif ()
endif ()
#mark the following variables as internal variables
mark_as_advanced(${PREFIX}_INCLUDE_DIR
${PREFIX}_LIBRARY
${PREFIX}_LIBRARY_DEBUG
${PREFIX}_LIBRARY_RELEASE)
endif ()
endmacro(findpkg_finish)
#===============================================
# Generate debug names from given RELEASEease names
#===============================================
macro(get_debug_names PREFIX)
foreach(i ${${PREFIX}})
set(${PREFIX}_DEBUG ${${PREFIX}_DEBUG} ${i}d ${i}D ${i}_d ${i}_D ${i}_debug ${i})
endforeach(i)
endmacro(get_debug_names)
#===============================================
# See if we have env vars to help us find tbb
#===============================================
macro(getenv_path VAR)
set(ENV_${VAR} $ENV{${VAR}})
# replace won't work if var is blank
if (ENV_${VAR})
string( REGEX REPLACE "\\\\" "/" ENV_${VAR} ${ENV_${VAR}} )
endif ()
endmacro(getenv_path)
#===============================================
# Couple a set of RELEASEease AND debug libraries
#===============================================
macro(make_library_set PREFIX)
if (${PREFIX}_RELEASE AND ${PREFIX}_DEBUG)
set(${PREFIX} optimized ${${PREFIX}_RELEASE} debug ${${PREFIX}_DEBUG})
elseif (${PREFIX}_RELEASE)
set(${PREFIX} ${${PREFIX}_RELEASE})
elseif (${PREFIX}_DEBUG)
set(${PREFIX} ${${PREFIX}_DEBUG})
endif ()
endmacro(make_library_set)
#=============================================================================
# Now to actually find TBB
#
# Get path, convert backslashes as ${ENV_${var}}
getenv_path(TBB_ROOT)
# initialize search paths
set(TBB_PREFIX_PATH ${TBB_ROOT} ${ENV_TBB_ROOT})
set(TBB_INC_SEARCH_PATH "")
set(TBB_LIB_SEARCH_PATH "")
# If user built from sources
set(TBB_BUILD_PREFIX $ENV{TBB_BUILD_PREFIX})
if (TBB_BUILD_PREFIX AND ENV_TBB_ROOT)
getenv_path(TBB_BUILD_DIR)
if (NOT ENV_TBB_BUILD_DIR)
set(ENV_TBB_BUILD_DIR ${ENV_TBB_ROOT}/build)
endif ()
# include directory under ${ENV_TBB_ROOT}/include
list(APPEND TBB_LIB_SEARCH_PATH
${ENV_TBB_BUILD_DIR}/${TBB_BUILD_PREFIX}_release
${ENV_TBB_BUILD_DIR}/${TBB_BUILD_PREFIX}_debug)
endif ()
# For Windows, let's assume that the user might be using the precompiled
# TBB packages from the main website. These use a rather awkward directory
# structure (at least for automatically finding the right files) depending
# on platform and compiler, but we'll do our best to accommodate it.
# Not adding the same effort for the precompiled linux builds, though. Those
# have different versions for CC compiler versions and linux kernels which
# will never adequately match the user's setup, so there is no feasible way
# to detect the "best" version to use. The user will have to manually
# select the right files. (Chances are the distributions are shipping their
# custom version of tbb, anyway, so the problem is probably nonexistant.)
if (WIN32 AND MSVC)
set(COMPILER_PREFIX "vc7.1")
if (MSVC_VERSION EQUAL 1400)
set(COMPILER_PREFIX "vc8")
elseif(MSVC_VERSION EQUAL 1500)
set(COMPILER_PREFIX "vc9")
elseif(MSVC_VERSION EQUAL 1600)
set(COMPILER_PREFIX "vc10")
elseif(MSVC_VERSION EQUAL 1700)
set(COMPILER_PREFIX "vc11")
elseif(MSVC_VERSION EQUAL 1800)
set(COMPILER_PREFIX "vc12")
elseif(MSVC_VERSION EQUAL 1900)
set(COMPILER_PREFIX "vc14")
endif ()
# for each prefix path, add ia32/64\${COMPILER_PREFIX}\lib to the lib search path
foreach (dir ${TBB_PREFIX_PATH})
if (CMAKE_CL_64)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/ia64/${COMPILER_PREFIX}/lib)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia64/${COMPILER_PREFIX})
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/intel64/${COMPILER_PREFIX}/lib)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/intel64/${COMPILER_PREFIX})
else ()
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/ia32/${COMPILER_PREFIX}/lib)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia32/${COMPILER_PREFIX})
endif ()
endforeach ()
endif ()
# For OS X binary distribution, choose libc++ based libraries for Maverics and
# above and AppleClang
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin" AND
NOT ${CMAKE_SYSTEM_VERSION} LESS 13.0)
set (USE_LIBCXX OFF)
cmake_policy(GET CMP0025 POLICY_VAR)
if ("${POLICY_VAR}" STREQUAL "NEW")
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "AppleClang")
set (USE_LIBCXX ON)
endif ()
else ()
if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Clang")
set (USE_LIBCXX ON)
endif ()
endif ()
if (${USE_LIBCXX})
foreach (dir ${TBB_PREFIX_PATH})
list (APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/libc++ ${dir}/libc++/lib)
endforeach ()
endif ()
endif ()
# check compiler ABI
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.4)
set(COMPILER_PREFIX "gcc4.1")
else () # Assume compatibility with 4.4 for other compilers
set(COMPILER_PREFIX "gcc4.4")
endif ()
# if platform architecture is explicitly specified
set(TBB_ARCH_PLATFORM $ENV{TBB_ARCH_PLATFORM})
if (TBB_ARCH_PLATFORM)
foreach (dir ${TBB_PREFIX_PATH})
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/${TBB_ARCH_PLATFORM}/lib)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/${TBB_ARCH_PLATFORM})
endforeach ()
endif ()
foreach (dir ${TBB_PREFIX_PATH})
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/intel64)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/intel64/${COMPILER_PREFIX})
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/intel64/lib)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/intel64/${COMPILER_PREFIX}/lib)
else ()
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia32)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia32/${COMPILER_PREFIX})
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/ia32/lib)
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/ia32/${COMPILER_PREFIX}/lib)
endif ()
endforeach ()
# add general search paths
foreach (dir ${TBB_PREFIX_PATH})
list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib ${dir}/Lib ${dir}/lib/tbb
${dir}/Libs)
list(APPEND TBB_INC_SEARCH_PATH ${dir}/include ${dir}/Include
${dir}/include/tbb)
endforeach ()
set(TBB_LIBRARY_NAMES tbb)
get_debug_names(TBB_LIBRARY_NAMES)
find_path(TBB_INCLUDE_DIR
NAMES tbb/tbb.h
PATHS ${TBB_INC_SEARCH_PATH})
find_library(TBB_LIBRARY_RELEASE
NAMES ${TBB_LIBRARY_NAMES}
PATHS ${TBB_LIB_SEARCH_PATH})
find_library(TBB_LIBRARY_DEBUG
NAMES ${TBB_LIBRARY_NAMES_DEBUG}
PATHS ${TBB_LIB_SEARCH_PATH})
make_library_set(TBB_LIBRARY)
findpkg_finish(TBB)
#if we haven't found TBB no point on going any further
if (NOT TBB_FOUND)
return()
endif ()
#=============================================================================
# Look for TBB's malloc package
set(TBB_MALLOC_LIBRARY_NAMES tbbmalloc)
get_debug_names(TBB_MALLOC_LIBRARY_NAMES)
find_path(TBB_MALLOC_INCLUDE_DIR
NAMES tbb/tbb.h
PATHS ${TBB_INC_SEARCH_PATH})
find_library(TBB_MALLOC_LIBRARY_RELEASE
NAMES ${TBB_MALLOC_LIBRARY_NAMES}
PATHS ${TBB_LIB_SEARCH_PATH})
find_library(TBB_MALLOC_LIBRARY_DEBUG
NAMES ${TBB_MALLOC_LIBRARY_NAMES_DEBUG}
PATHS ${TBB_LIB_SEARCH_PATH})
make_library_set(TBB_MALLOC_LIBRARY)
findpkg_finish(TBB_MALLOC)
#=============================================================================
# Look for TBB's malloc proxy package
set(TBB_MALLOC_PROXY_LIBRARY_NAMES tbbmalloc_proxy)
get_debug_names(TBB_MALLOC_PROXY_LIBRARY_NAMES)
find_path(TBB_MALLOC_PROXY_INCLUDE_DIR
NAMES tbb/tbbmalloc_proxy.h
PATHS ${TBB_INC_SEARCH_PATH})
find_library(TBB_MALLOC_PROXY_LIBRARY_RELEASE
NAMES ${TBB_MALLOC_PROXY_LIBRARY_NAMES}
PATHS ${TBB_LIB_SEARCH_PATH})
find_library(TBB_MALLOC_PROXY_LIBRARY_DEBUG
NAMES ${TBB_MALLOC_PROXY_LIBRARY_NAMES_DEBUG}
PATHS ${TBB_LIB_SEARCH_PATH})
make_library_set(TBB_MALLOC_PROXY_LIBRARY)
findpkg_finish(TBB_MALLOC_PROXY)
#=============================================================================
#parse all the version numbers from tbb
if(NOT TBB_VERSION)
#only read the start of the file
file(READ
"${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h"
TBB_VERSION_CONTENTS
LIMIT 2048)
string(REGEX REPLACE
".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1"
TBB_VERSION_MAJOR "${TBB_VERSION_CONTENTS}")
string(REGEX REPLACE
".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1"
TBB_VERSION_MINOR "${TBB_VERSION_CONTENTS}")
string(REGEX REPLACE
".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1"
TBB_INTERFACE_VERSION "${TBB_VERSION_CONTENTS}")
string(REGEX REPLACE
".*#define TBB_COMPATIBLE_INTERFACE_VERSION ([0-9]+).*" "\\1"
TBB_COMPATIBLE_INTERFACE_VERSION "${TBB_VERSION_CONTENTS}")
endif()

57
CMake/UseVTKmTBB.cmake Normal file

@ -0,0 +1,57 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2014 Sandia Corporation.
## Copyright 2014 UT-Battelle, LLC.
## Copyright 2014 Los Alamos National Security.
##
## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
if (VTKm_TBB_initialize_complete)
return()
endif (VTKm_TBB_initialize_complete)
#-----------------------------------------------------------------------------
# Find TBB.
#-----------------------------------------------------------------------------
if (NOT VTKm_TBB_FOUND)
find_package(TBB REQUIRED)
set (VTKm_TBB_FOUND ${TBB_FOUND})
endif()
#-----------------------------------------------------------------------------
# Find the Boost library.
#-----------------------------------------------------------------------------
if (VTKm_TBB_FOUND)
if(NOT Boost_FOUND)
find_package(BoostHeaders ${VTKm_REQUIRED_BOOST_VERSION})
endif()
if (NOT Boost_FOUND)
message(STATUS "Boost not found")
set(VTKm_TBB_FOUND FALSE)
endif()
endif()
#-----------------------------------------------------------------------------
# Set up all these dependent packages (if they were all found).
#-----------------------------------------------------------------------------
if (VTKm_TBB_FOUND)
include_directories(
${Boost_INCLUDE_DIRS}
${VTKm_INCLUDE_DIRS}
${TBB_INCLUDE_DIRS}
)
set(VTKm_TBB_initialize_complete TRUE)
endif()

@ -340,6 +340,9 @@ function(vtkm_worklet_unit_tests device_adapter)
cuda_add_executable(${test_prog} ${unit_test_drivers} ${unit_test_srcs}) cuda_add_executable(${test_prog} ${unit_test_drivers} ${unit_test_srcs})
else() else()
add_executable(${test_prog} ${unit_test_drivers} ${unit_test_srcs}) add_executable(${test_prog} ${unit_test_drivers} ${unit_test_srcs})
if("${device_adapter}" STREQUAL "VTKM_DEVICE_ADAPTER_TBB")
target_link_libraries(${test_prog} ${TBB_LIBRARIES})
endif()
endif() endif()
#add a test for each worklet test file. We will inject the device #add a test for each worklet test file. We will inject the device

@ -54,6 +54,7 @@ include(CMake/VTKmCompilerExtras.cmake)
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
# Configurable Options # Configurable Options
option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF) option(VTKm_ENABLE_CUDA "Enable Cuda support" OFF)
option(VTKm_ENABLE_TBB "Enable TBB support" OFF)
option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON) option(VTKm_ENABLE_TESTING "Enable VTKm Testing" ON)
option(VTKm_USE_DOUBLE_PRECISION option(VTKm_USE_DOUBLE_PRECISION
@ -73,6 +74,9 @@ vtkm_configure_device(Serial)
if (VTKm_ENABLE_CUDA) if (VTKm_ENABLE_CUDA)
vtkm_configure_device(Cuda) vtkm_configure_device(Cuda)
endif (VTKm_ENABLE_CUDA) endif (VTKm_ENABLE_CUDA)
if (VTKm_ENABLE_TBB)
vtkm_configure_device(TBB)
endif (VTKm_ENABLE_TBB)
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------

@ -72,4 +72,6 @@ licenses.
- - - - - - - - - - - - - - - - - - - - - - - - do not remove this line - - - - - - - - - - - - - - - - - - - - - - - - do not remove this line
CMake/CheckCXX11Features.cmake CMake/CheckCXX11Features.cmake
CMake/FindBoostHeaders.cmake CMake/FindBoostHeaders.cmake
CMake/FindTBB.cmake
vtkm/cont/tbb/internal/parallel_sort.h
vtkm/testing/OptionParser.h vtkm/testing/OptionParser.h

@ -65,6 +65,9 @@ vtkm_declare_headers(${impl_headers} ${headers})
if (VTKm_ENABLE_CUDA) if (VTKm_ENABLE_CUDA)
add_subdirectory(cuda) add_subdirectory(cuda)
endif () endif ()
if (VTKm_ENABLE_TBB)
add_subdirectory(tbb)
endif ()
#----------------------------------------------------------------------------- #-----------------------------------------------------------------------------
add_subdirectory(testing) add_subdirectory(testing)

@ -486,8 +486,8 @@ public:
#include <vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h> #include <vtkm/cont/cuda/internal/DeviceAdapterAlgorithmCuda.h>
// #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP // #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
// #include <vtkm/openmp/cont/internal/DeviceAdapterAlgorithmOpenMP.h> // #include <vtkm/openmp/cont/internal/DeviceAdapterAlgorithmOpenMP.h>
// #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
// #include <vtkm/tbb/cont/internal/DeviceAdapterAlgorithmTBB.h> #include <vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h>
#endif #endif
#endif //vtk_m_cont_DeviceAdapterAlgorithm_h #endif //vtk_m_cont_DeviceAdapterAlgorithm_h

@ -162,8 +162,8 @@ public:
#include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h> #include <vtkm/cont/cuda/internal/ArrayManagerExecutionCuda.h>
// #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP // #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_OPENMP
// #include <vtkm/openmp/cont/internal/ArrayManagerExecutionOpenMP.h> // #include <vtkm/openmp/cont/internal/ArrayManagerExecutionOpenMP.h>
// #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
// #include <vtkm/tbb/cont/internal/ArrayManagerExecutionTBB.h> #include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
#endif #endif
#endif //vtk_m_cont_internal_ArrayManagerExecution_h #endif //vtk_m_cont_internal_ArrayManagerExecution_h

@ -37,6 +37,60 @@ namespace vtkm {
namespace cont { namespace cont {
namespace internal { namespace internal {
// Binary function object wrapper which can detect and handle calling the
// wrapped operator with complex value types such as
// IteratorFromArrayPortalValue which happen when passed an input array that
// is implicit.
template<typename ResultType, typename Function>
struct WrappedBinaryOperator
{
Function m_f;
VTKM_CONT_EXPORT
WrappedBinaryOperator(const Function &f)
: m_f(f)
{}
template<typename Argument1, typename Argument2>
VTKM_CONT_EXPORT ResultType operator()(const Argument1 &x, const Argument2 &y) const
{
return m_f(x, y);
}
template<typename Argument1, typename Argument2>
VTKM_CONT_EXPORT ResultType operator()(
const detail::IteratorFromArrayPortalValue<Argument1> &x,
const detail::IteratorFromArrayPortalValue<Argument2> &y) const
{
typedef typename detail::IteratorFromArrayPortalValue<Argument1>::ValueType
ValueTypeX;
typedef typename detail::IteratorFromArrayPortalValue<Argument2>::ValueType
ValueTypeY;
return m_f( (ValueTypeX)x, (ValueTypeY)y );
}
template<typename Argument1, typename Argument2>
VTKM_CONT_EXPORT ResultType operator()(
const Argument1 &x,
const detail::IteratorFromArrayPortalValue<Argument2> &y) const
{
typedef typename detail::IteratorFromArrayPortalValue<Argument2>::ValueType
ValueTypeY;
return m_f( x, (ValueTypeY)y );
}
template<typename Argument1, typename Argument2>
VTKM_CONT_EXPORT ResultType operator()(
const detail::IteratorFromArrayPortalValue<Argument1> &x,
const Argument2 &y) const
{
typedef typename detail::IteratorFromArrayPortalValue<Argument1>::ValueType
ValueTypeX;
return m_f( (ValueTypeX)x, y );
}
};
/// \brief /// \brief
/// ///
/// This struct provides algorithms that implement "general" device adapter /// This struct provides algorithms that implement "general" device adapter

@ -38,60 +38,6 @@
namespace vtkm { namespace vtkm {
namespace cont { namespace cont {
namespace internal
{
template<typename ResultType, typename Function>
struct WrappedBinaryOperator
{
Function m_f;
VTKM_CONT_EXPORT
WrappedBinaryOperator(const Function &f)
: m_f(f)
{}
template<typename Argument1, typename Argument2>
VTKM_CONT_EXPORT ResultType operator()(const Argument1 &x, const Argument2 &y) const
{
return m_f(x, y);
}
template<typename Argument1, typename Argument2>
VTKM_CONT_EXPORT ResultType operator()(
const detail::IteratorFromArrayPortalValue<Argument1> &x,
const detail::IteratorFromArrayPortalValue<Argument2> &y) const
{
typedef typename detail::IteratorFromArrayPortalValue<Argument1>::ValueType
ValueTypeX;
typedef typename detail::IteratorFromArrayPortalValue<Argument2>::ValueType
ValueTypeY;
return m_f( (ValueTypeX)x, (ValueTypeY)y );
}
template<typename Argument1, typename Argument2>
VTKM_CONT_EXPORT ResultType operator()(
const Argument1 &x,
const detail::IteratorFromArrayPortalValue<Argument2> &y) const
{
typedef typename detail::IteratorFromArrayPortalValue<Argument2>::ValueType
ValueTypeY;
return m_f( x, (ValueTypeY)y );
}
template<typename Argument1, typename Argument2>
VTKM_CONT_EXPORT ResultType operator()(
const detail::IteratorFromArrayPortalValue<Argument1> &x,
const Argument2 &y) const
{
typedef typename detail::IteratorFromArrayPortalValue<Argument1>::ValueType
ValueTypeX;
return m_f( (ValueTypeX)x, y );
}
};
}
template<> template<>
struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagSerial> : struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagSerial> :
@ -204,7 +150,7 @@ public:
PortalIn inputPortal = input.PrepareForInput(Device()); PortalIn inputPortal = input.PrepareForInput(Device());
PortalOut outputPortal = output.PrepareForOutput(numberOfValues, Device()); PortalOut outputPortal = output.PrepareForOutput(numberOfValues, Device());
if (numberOfValues <= 0) { return T(0); } if (numberOfValues <= 0) { return T(); }
std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal), std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal),
vtkm::cont::ArrayPortalToIteratorEnd(inputPortal), vtkm::cont::ArrayPortalToIteratorEnd(inputPortal),
@ -225,10 +171,6 @@ public:
typedef typename vtkm::cont::ArrayHandle<T,CIn> typedef typename vtkm::cont::ArrayHandle<T,CIn>
::template ExecutionTypes<Device>::PortalConst PortalIn; ::template ExecutionTypes<Device>::PortalConst PortalIn;
//We need to wrap the operator in a WrappedBinaryOperator struct
//which can detect and handle calling the binary operator with complex
//value types such as IteratorFromArrayPortalValue which happen
//when passed an input array that is implicit.
internal::WrappedBinaryOperator<T,BinaryOperation> wrappedBinaryOp( internal::WrappedBinaryOperator<T,BinaryOperation> wrappedBinaryOp(
binaryOp); binaryOp);
@ -237,7 +179,7 @@ public:
PortalIn inputPortal = input.PrepareForInput(Device()); PortalIn inputPortal = input.PrepareForInput(Device());
PortalOut outputPortal = output.PrepareForOutput(numberOfValues, Device()); PortalOut outputPortal = output.PrepareForOutput(numberOfValues, Device());
if (numberOfValues <= 0) { return T(0); } if (numberOfValues <= 0) { return T(); }
std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal), std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal),
vtkm::cont::ArrayPortalToIteratorEnd(inputPortal), vtkm::cont::ArrayPortalToIteratorEnd(inputPortal),
@ -263,7 +205,7 @@ public:
PortalIn inputPortal = input.PrepareForInput(Device()); PortalIn inputPortal = input.PrepareForInput(Device());
PortalOut outputPortal = output.PrepareForOutput(numberOfValues, Device()); PortalOut outputPortal = output.PrepareForOutput(numberOfValues, Device());
if (numberOfValues <= 0) { return 0; } if (numberOfValues <= 0) { return T(); }
std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal), std::partial_sum(vtkm::cont::ArrayPortalToIteratorBegin(inputPortal),
vtkm::cont::ArrayPortalToIteratorEnd(inputPortal), vtkm::cont::ArrayPortalToIteratorEnd(inputPortal),

@ -31,6 +31,7 @@
#define VTKM_DEVICE_ADAPTER_UNDEFINED -1 #define VTKM_DEVICE_ADAPTER_UNDEFINED -1
#define VTKM_DEVICE_ADAPTER_SERIAL 1 #define VTKM_DEVICE_ADAPTER_SERIAL 1
#define VTKM_DEVICE_ADAPTER_CUDA 2 #define VTKM_DEVICE_ADAPTER_CUDA 2
#define VTKM_DEVICE_ADAPTER_TBB 3
#ifndef VTKM_DEVICE_ADAPTER #ifndef VTKM_DEVICE_ADAPTER
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_SERIAL #define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_SERIAL
@ -98,6 +99,11 @@ struct DeviceAdapterTagCheck
#include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h> #include <vtkm/cont/cuda/internal/DeviceAdapterTagCuda.h>
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagCuda #define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagCuda
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_TBB
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
#define VTKM_DEFAULT_DEVICE_ADAPTER_TAG ::vtkm::cont::DeviceAdapterTagTBB
#elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_ERROR #elif VTKM_DEVICE_ADAPTER == VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/internal/DeviceAdapterError.h> #include <vtkm/cont/internal/DeviceAdapterError.h>

@ -0,0 +1,29 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2014 Sandia Corporation.
## Copyright 2014 UT-Battelle, LLC.
## Copyright 2014 Los Alamos National Security.
##
## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
set(headers
DeviceAdapterTBB.h
)
add_subdirectory(internal)
vtkm_declare_headers(${headers})
add_subdirectory(testing)

@ -0,0 +1,27 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_tbb_DeviceAdapterTBB_h
#define vtk_m_cont_tbb_DeviceAdapterTBB_h
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
#include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
#include <vtkm/cont/tbb/internal/DeviceAdapterAlgorithmTBB.h>
#endif //vtk_m_cont_tbb_DeviceAdapterTBB_h

@ -0,0 +1,77 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_tbb_internal_ArrayManagerExecutionTBB_h
#define vtk_m_cont_tbb_internal_ArrayManagerExecutionTBB_h
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
#include <vtkm/cont/internal/ArrayManagerExecution.h>
#include <vtkm/cont/internal/ArrayManagerExecutionShareWithControl.h>
// These must be placed in the vtkm::cont::internal namespace so that
// the template can be found.
namespace vtkm {
namespace cont {
namespace internal {
template <typename T, class StorageTag>
class ArrayManagerExecution
<T, StorageTag, vtkm::cont::DeviceAdapterTagTBB>
: public vtkm::cont::internal::ArrayManagerExecutionShareWithControl
<T, StorageTag>
{
public:
typedef vtkm::cont::internal::ArrayManagerExecutionShareWithControl
<T, StorageTag> Superclass;
typedef typename Superclass::ValueType ValueType;
typedef typename Superclass::PortalType PortalType;
typedef typename Superclass::PortalConstType PortalConstType;
typedef typename Superclass::StorageType StorageType;
VTKM_CONT_EXPORT
ArrayManagerExecution(StorageType *storage)
: Superclass(storage) { }
VTKM_CONT_EXPORT
PortalConstType PrepareForInput(bool updateData)
{
return this->Superclass::PrepareForInput(updateData);
}
VTKM_CONT_EXPORT
PortalType PrepareForInPlace(bool updateData)
{
return this->Superclass::PrepareForInPlace(updateData);
}
VTKM_CONT_EXPORT
PortalType PrepareForOutput(vtkm::Id numberOfValues)
{
return this->Superclass::PrepareForOutput(numberOfValues);
}
};
}
}
} // namespace vtkm::cont::internal
#endif //vtk_m_cont_tbb_internal_ArrayManagerExecutionTBB_h

@ -0,0 +1,28 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2014 Sandia Corporation.
## Copyright 2014 UT-Battelle, LLC.
## Copyright 2014 Los Alamos National Security.
##
## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
set(headers
ArrayManagerExecutionTBB.h
DeviceAdapterAlgorithmTBB.h
DeviceAdapterTagTBB.h
parallel_sort.h
)
vtkm_declare_headers(${headers})

@ -0,0 +1,554 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h
#define vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h
#include <vtkm/cont/internal/IteratorFromArrayPortal.h>
#include <vtkm/cont/tbb/internal/DeviceAdapterTagTBB.h>
#include <vtkm/cont/tbb/internal/ArrayManagerExecutionTBB.h>
#include <vtkm/exec/internal/ErrorMessageBuffer.h>
#include <vtkm/Extent.h>
#include <vtkm/cont/ArrayHandle.h>
#include <vtkm/cont/DeviceAdapterAlgorithm.h>
#include <vtkm/cont/ErrorExecution.h>
#include <vtkm/cont/internal/DeviceAdapterAlgorithmGeneral.h>
#include <boost/type_traits/remove_reference.hpp>
//we provide an patched implementation of tbb parallel_sort
//that fixes ADL for std::swap. This patch has been submitted to Intel
//and should be included in future version of TBB.
#include <vtkm/cont/tbb/internal/parallel_sort.h>
#include <tbb/blocked_range.h>
#include <tbb/blocked_range3d.h>
#include <tbb/parallel_for.h>
#include <tbb/parallel_scan.h>
#include <tbb/partitioner.h>
#include <tbb/tick_count.h>
namespace vtkm {
namespace cont {
template<>
struct DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagTBB> :
vtkm::cont::internal::DeviceAdapterAlgorithmGeneral<
DeviceAdapterAlgorithm<vtkm::cont::DeviceAdapterTagTBB>,
vtkm::cont::DeviceAdapterTagTBB>
{
private:
// The "grain size" of scheduling with TBB. Not a lot of thought has gone
// into picking this size.
static const vtkm::Id TBB_GRAIN_SIZE = 128;
template<class InputPortalType, class OutputPortalType,
class BinaryOperationType>
struct ScanInclusiveBody
{
typedef typename boost::remove_reference<
typename OutputPortalType::ValueType>::type ValueType;
ValueType Sum;
InputPortalType InputPortal;
OutputPortalType OutputPortal;
BinaryOperationType BinaryOperation;
VTKM_CONT_EXPORT
ScanInclusiveBody(const InputPortalType &inputPortal,
const OutputPortalType &outputPortal,
BinaryOperationType binaryOperation)
: Sum(), InputPortal(inputPortal), OutputPortal(outputPortal),
BinaryOperation(binaryOperation)
{ }
VTKM_EXEC_CONT_EXPORT
ScanInclusiveBody(const ScanInclusiveBody &body, ::tbb::split)
: Sum(),
InputPortal(body.InputPortal),
OutputPortal(body.OutputPortal),
BinaryOperation(body.BinaryOperation) { }
VTKM_EXEC_EXPORT
void operator()(const ::tbb::blocked_range<vtkm::Id> &range, ::tbb::pre_scan_tag)
{
typedef vtkm::cont::ArrayPortalToIterators<InputPortalType>
InputIteratorsType;
InputIteratorsType inputIterators(this->InputPortal);
//use temp, and iterators instead of member variable to reduce false sharing
ValueType temp = this->Sum;
typename InputIteratorsType::IteratorType inIter =
inputIterators.GetBegin() + range.begin();
for (vtkm::Id index = range.begin(); index != range.end();
++index, ++inIter)
{
temp = this->BinaryOperation(temp, *inIter);
}
this->Sum = temp;
}
VTKM_EXEC_EXPORT
void operator()(const ::tbb::blocked_range<vtkm::Id> &range, ::tbb::final_scan_tag)
{
typedef vtkm::cont::ArrayPortalToIterators<InputPortalType>
InputIteratorsType;
typedef vtkm::cont::ArrayPortalToIterators<OutputPortalType>
OutputIteratorsType;
InputIteratorsType inputIterators(this->InputPortal);
OutputIteratorsType outputIterators(this->OutputPortal);
//use temp, and iterators instead of member variable to reduce false sharing
ValueType temp = this->Sum;
typename InputIteratorsType::IteratorType inIter =
inputIterators.GetBegin() + range.begin();
typename OutputIteratorsType::IteratorType outIter =
outputIterators.GetBegin() + range.begin();
for (vtkm::Id index = range.begin(); index != range.end();
++index, ++inIter, ++outIter)
{
*outIter = temp = this->BinaryOperation(temp, *inIter);
}
this->Sum = temp;
}
VTKM_EXEC_CONT_EXPORT
void reverse_join(const ScanInclusiveBody &left)
{
this->Sum = this->BinaryOperation(left.Sum, this->Sum);
}
VTKM_EXEC_CONT_EXPORT
void assign(const ScanInclusiveBody &src)
{
this->Sum = src.Sum;
}
};
template<class InputPortalType, class OutputPortalType,
class BinaryOperationType>
VTKM_CONT_EXPORT static
typename boost::remove_reference<typename OutputPortalType::ValueType>::type
ScanInclusivePortals(InputPortalType inputPortal,
OutputPortalType outputPortal,
BinaryOperationType binaryOperation)
{
typedef typename
boost::remove_reference<typename OutputPortalType::ValueType>::type
ValueType;
typedef internal::WrappedBinaryOperator<ValueType, BinaryOperationType>
WrappedBinaryOp;
WrappedBinaryOp wrappedBinaryOp(binaryOperation);
ScanInclusiveBody<InputPortalType, OutputPortalType, WrappedBinaryOp>
body(inputPortal, outputPortal, wrappedBinaryOp);
vtkm::Id arrayLength = inputPortal.GetNumberOfValues();
::tbb::parallel_scan( ::tbb::blocked_range<vtkm::Id>(0, arrayLength), body);
return body.Sum;
}
template<class InputPortalType, class OutputPortalType,
class BinaryOperationType>
struct ScanExclusiveBody
{
typedef typename boost::remove_reference<
typename OutputPortalType::ValueType>::type ValueType;
ValueType Sum;
InputPortalType InputPortal;
OutputPortalType OutputPortal;
BinaryOperationType BinaryOperation;
VTKM_CONT_EXPORT
ScanExclusiveBody(const InputPortalType &inputPortal,
const OutputPortalType &outputPortal,
BinaryOperationType binaryOperation)
: Sum(), InputPortal(inputPortal), OutputPortal(outputPortal),
BinaryOperation(binaryOperation)
{ }
VTKM_EXEC_CONT_EXPORT
ScanExclusiveBody(const ScanExclusiveBody &body, ::tbb::split)
: Sum(),
InputPortal(body.InputPortal),
OutputPortal(body.OutputPortal),
BinaryOperation(body.BinaryOperation) { }
VTKM_EXEC_EXPORT
void operator()(const ::tbb::blocked_range<vtkm::Id> &range, ::tbb::pre_scan_tag)
{
typedef vtkm::cont::ArrayPortalToIterators<InputPortalType>
InputIteratorsType;
InputIteratorsType inputIterators(this->InputPortal);
ValueType temp = this->Sum;
//move the iterator to the first item
typename InputIteratorsType::IteratorType iter =
inputIterators.GetBegin() + range.begin();
for (vtkm::Id index = range.begin(); index != range.end(); ++index, ++iter)
{
temp = this->BinaryOperation(temp, *iter);
}
this->Sum = temp;
}
VTKM_EXEC_EXPORT
void operator()(const ::tbb::blocked_range<vtkm::Id> &range, ::tbb::final_scan_tag)
{
typedef vtkm::cont::ArrayPortalToIterators<InputPortalType>
InputIteratorsType;
typedef vtkm::cont::ArrayPortalToIterators<OutputPortalType>
OutputIteratorsType;
InputIteratorsType inputIterators(this->InputPortal);
OutputIteratorsType outputIterators(this->OutputPortal);
ValueType temp = this->Sum;
//move the iterators to the first item
typename InputIteratorsType::IteratorType inIter =
inputIterators.GetBegin() + range.begin();
typename OutputIteratorsType::IteratorType outIter =
outputIterators.GetBegin() + range.begin();
for (vtkm::Id index = range.begin(); index != range.end();
++index, ++inIter, ++outIter)
{
//copy into a local reference since Input and Output portal
//could point to the same memory location
ValueType v = *inIter;
*outIter = temp;
temp = this->BinaryOperation(temp, v);
}
this->Sum = temp;
}
VTKM_EXEC_CONT_EXPORT
void reverse_join(const ScanExclusiveBody &left)
{
this->Sum = this->BinaryOperation(left.Sum, this->Sum);
}
VTKM_EXEC_CONT_EXPORT
void assign(const ScanExclusiveBody &src)
{
this->Sum = src.Sum;
}
};
template<class InputPortalType, class OutputPortalType,
class BinaryOperationType>
VTKM_CONT_EXPORT static
typename boost::remove_reference<typename OutputPortalType::ValueType>::type
ScanExclusivePortals(InputPortalType inputPortal,
OutputPortalType outputPortal,
BinaryOperationType binaryOperation)
{
typedef typename
boost::remove_reference<typename OutputPortalType::ValueType>::type
ValueType;
typedef internal::WrappedBinaryOperator<ValueType, BinaryOperationType>
WrappedBinaryOp;
WrappedBinaryOp wrappedBinaryOp(binaryOperation);
ScanExclusiveBody<InputPortalType, OutputPortalType, WrappedBinaryOp>
body(inputPortal, outputPortal, wrappedBinaryOp);
vtkm::Id arrayLength = inputPortal.GetNumberOfValues();
::tbb::parallel_scan( ::tbb::blocked_range<vtkm::Id>(0, arrayLength), body);
// Seems a little weird to me that we would return the last value in the
// array rather than the sum, but that is how the function is specified.
return body.Sum;
}
public:
template<typename T, class CIn, class COut>
VTKM_CONT_EXPORT static T ScanInclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut> &output)
{
return ScanInclusivePortals(
input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
output.PrepareForOutput(input.GetNumberOfValues(),
vtkm::cont::DeviceAdapterTagTBB()), vtkm::internal::Add());
}
template<typename T, class CIn, class COut, class BinaryOperation>
VTKM_CONT_EXPORT static T ScanInclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut> &output,
BinaryOperation binaryOp)
{
return ScanInclusivePortals(
input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
output.PrepareForOutput(input.GetNumberOfValues(),
vtkm::cont::DeviceAdapterTagTBB()), binaryOp);
}
template<typename T, class CIn, class COut>
VTKM_CONT_EXPORT static T ScanExclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut> &output)
{
return ScanExclusivePortals(
input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
output.PrepareForOutput(input.GetNumberOfValues(),
vtkm::cont::DeviceAdapterTagTBB()), vtkm::internal::Add());
}
template<typename T, class CIn, class COut, class BinaryOperation>
VTKM_CONT_EXPORT static T ScanExclusive(
const vtkm::cont::ArrayHandle<T,CIn> &input,
vtkm::cont::ArrayHandle<T,COut> &output,
BinaryOperation binaryOp)
{
return ScanExclusivePortals(
input.PrepareForInput(vtkm::cont::DeviceAdapterTagTBB()),
output.PrepareForOutput(input.GetNumberOfValues(),
vtkm::cont::DeviceAdapterTagTBB()), binaryOp);
}
private:
template<class FunctorType>
class ScheduleKernel
{
public:
VTKM_CONT_EXPORT ScheduleKernel(const FunctorType &functor)
: Functor(functor)
{ }
VTKM_CONT_EXPORT void SetErrorMessageBuffer(
const vtkm::exec::internal::ErrorMessageBuffer &errorMessage)
{
this->ErrorMessage = errorMessage;
this->Functor.SetErrorMessageBuffer(errorMessage);
}
VTKM_EXEC_EXPORT
void operator()(const ::tbb::blocked_range<vtkm::Id> &range) const {
// The TBB device adapter causes array classes to be shared between
// control and execution environment. This means that it is possible for
// an exception to be thrown even though this is typically not allowed.
// Throwing an exception from here is bad because there are several
// simultaneous threads running. Get around the problem by catching the
// error and setting the message buffer as expected.
try
{
for (vtkm::Id index = range.begin(); index < range.end(); index++)
{
this->Functor(index);
}
}
catch (vtkm::cont::Error error)
{
this->ErrorMessage.RaiseError(error.GetMessage().c_str());
}
catch (...)
{
this->ErrorMessage.RaiseError(
"Unexpected error in execution environment.");
}
}
private:
FunctorType Functor;
vtkm::exec::internal::ErrorMessageBuffer ErrorMessage;
};
public:
template<class FunctorType>
VTKM_CONT_EXPORT
static void Schedule(FunctorType functor, vtkm::Id numInstances)
{
const vtkm::Id MESSAGE_SIZE = 1024;
char errorString[MESSAGE_SIZE];
errorString[0] = '\0';
vtkm::exec::internal::ErrorMessageBuffer
errorMessage(errorString, MESSAGE_SIZE);
ScheduleKernel<FunctorType> kernel(functor);
kernel.SetErrorMessageBuffer(errorMessage);
::tbb::blocked_range<vtkm::Id> range(0, numInstances, TBB_GRAIN_SIZE);
::tbb::parallel_for(range, kernel);
if (errorMessage.IsErrorRaised())
{
throw vtkm::cont::ErrorExecution(errorString);
}
}
private:
template<class FunctorType>
class ScheduleKernelId3
{
public:
VTKM_CONT_EXPORT ScheduleKernelId3(const FunctorType &functor,
const vtkm::Id3& dims)
: Functor(functor),
Dims(dims)
{ }
VTKM_CONT_EXPORT void SetErrorMessageBuffer(
const vtkm::exec::internal::ErrorMessageBuffer &errorMessage)
{
this->ErrorMessage = errorMessage;
this->Functor.SetErrorMessageBuffer(errorMessage);
}
VTKM_EXEC_EXPORT
void operator()(const ::tbb::blocked_range3d<vtkm::Id> &range) const {
try
{
for( vtkm::Id k=range.pages().begin(); k!=range.pages().end(); ++k)
{
vtkm::Id index = k * this->Dims[1] * this->Dims[0];
for( vtkm::Id j=range.rows().begin(); j!=range.rows().end(); ++j)
{
index += j * this->Dims[0];
for( vtkm::Id i=range.cols().begin(); i!=range.cols().end(); ++i)
{
this->Functor(index + i);
}
}
}
}
catch (vtkm::cont::Error error)
{
this->ErrorMessage.RaiseError(error.GetMessage().c_str());
}
catch (...)
{
this->ErrorMessage.RaiseError(
"Unexpected error in execution environment.");
}
}
private:
FunctorType Functor;
vtkm::Id3 Dims;
vtkm::exec::internal::ErrorMessageBuffer ErrorMessage;
};
public:
template<class FunctorType>
VTKM_CONT_EXPORT
static void Schedule(FunctorType functor,
vtkm::Id3 rangeMax)
{
//we need to extract from the functor that uniform grid information
const vtkm::Id MESSAGE_SIZE = 1024;
char errorString[MESSAGE_SIZE];
errorString[0] = '\0';
vtkm::exec::internal::ErrorMessageBuffer
errorMessage(errorString, MESSAGE_SIZE);
//memory is generally setup in a way that iterating the first range
//in the tightest loop has the best cache coherence.
::tbb::blocked_range3d<vtkm::Id> range(0, rangeMax[2],
0, rangeMax[1],
0, rangeMax[0]);
ScheduleKernelId3<FunctorType> kernel(functor,rangeMax);
kernel.SetErrorMessageBuffer(errorMessage);
::tbb::parallel_for(range, kernel);
if (errorMessage.IsErrorRaised())
{
throw vtkm::cont::ErrorExecution(errorString);
}
}
template<typename T, class Container>
VTKM_CONT_EXPORT static void Sort(
vtkm::cont::ArrayHandle<T,Container> &values)
{
typedef typename vtkm::cont::ArrayHandle<T,Container>::template
ExecutionTypes<vtkm::cont::DeviceAdapterTagTBB>::Portal PortalType;
PortalType arrayPortal = values.PrepareForInPlace(
vtkm::cont::DeviceAdapterTagTBB());
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
IteratorsType iterators(arrayPortal);
::tbb::parallel_sort(iterators.GetBegin(), iterators.GetEnd());
}
template<typename T, class Container, class Compare>
VTKM_CONT_EXPORT static void Sort(
vtkm::cont::ArrayHandle<T,Container> &values, Compare comp)
{
typedef typename vtkm::cont::ArrayHandle<T,Container>::template
ExecutionTypes<vtkm::cont::DeviceAdapterTagTBB>::Portal PortalType;
PortalType arrayPortal = values.PrepareForInPlace(
vtkm::cont::DeviceAdapterTagTBB());
typedef vtkm::cont::ArrayPortalToIterators<PortalType> IteratorsType;
IteratorsType iterators(arrayPortal);
::tbb::parallel_sort(iterators.GetBegin(), iterators.GetEnd(), comp);
}
VTKM_CONT_EXPORT static void Synchronize()
{
// Nothing to do. This device schedules all of its operations using a
// split/join paradigm. This means that the if the control threaad is
// calling this method, then nothing should be running in the execution
// environment.
}
};
/// TBB contains its own high resolution timer.
///
template<>
class DeviceAdapterTimerImplementation<vtkm::cont::DeviceAdapterTagTBB>
{
public:
VTKM_CONT_EXPORT DeviceAdapterTimerImplementation()
{
this->Reset();
}
VTKM_CONT_EXPORT void Reset()
{
vtkm::cont::DeviceAdapterAlgorithm<
vtkm::cont::DeviceAdapterTagTBB>::Synchronize();
this->StartTime = ::tbb::tick_count::now();
}
VTKM_CONT_EXPORT vtkm::Float64 GetElapsedTime()
{
vtkm::cont::DeviceAdapterAlgorithm<
vtkm::cont::DeviceAdapterTagTBB>::Synchronize();
::tbb::tick_count currentTime = ::tbb::tick_count::now();
::tbb::tick_count::interval_t elapsedTime = currentTime - this->StartTime;
return static_cast<vtkm::Float64>(elapsedTime.seconds());
}
private:
::tbb::tick_count StartTime;
};
}
} // namespace vtkm::cont
#endif //vtk_m_cont_tbb_internal_DeviceAdapterAlgorithmTBB_h

@ -0,0 +1,28 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#ifndef vtk_m_cont_tbb_internal_DeviceAdapterTagTBB_h
#define vtk_m_cont_tbb_internal_DeviceAdapterTagTBB_h
#include <vtkm/cont/internal/DeviceAdapterTag.h>
VTKM_CREATE_DEVICE_ADAPTER(TBB);
#endif //vtk_m_cont_tbb_internal_DeviceAdapterTagTBB_h

@ -0,0 +1,233 @@
/*
Copyright 2005-2013 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Threading Building Blocks; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
As a special exception, you may use this file as part of a free software
library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you compile
this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered by
the GNU General Public License.
*/
#ifndef __TBB_parallel_sort_H
#define __TBB_parallel_sort_H
#include <tbb/parallel_for.h>
#include <tbb/blocked_range.h>
#include <algorithm>
#include <iterator>
#include <functional>
namespace tbb {
//! @cond INTERNAL
namespace internal {
//! Range used in quicksort to split elements into subranges based on a value.
/** The split operation selects a splitter and places all elements less than or equal
to the value in the first range and the remaining elements in the second range.
@ingroup algorithms */
template<typename RandomAccessIterator, typename Compare>
class quick_sort_range: private no_assign {
inline size_t median_of_three(const RandomAccessIterator &array, size_t l, size_t m, size_t r) const {
return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp( array[l], array[r]) ? r : l ) )
: ( comp(array[r], array[m]) ? m : ( comp( array[r], array[l] ) ? r : l ) );
}
inline size_t pseudo_median_of_nine( const RandomAccessIterator &array, const quick_sort_range &range ) const {
size_t offset = range.size/8u;
return median_of_three(array,
median_of_three(array, 0, offset, offset*2),
median_of_three(array, offset*3, offset*4, offset*5),
median_of_three(array, offset*6, offset*7, range.size - 1) );
}
public:
static const size_t grainsize = 500;
const Compare &comp;
RandomAccessIterator begin;
size_t size;
quick_sort_range( RandomAccessIterator begin_, size_t size_, const Compare &comp_ ) :
comp(comp_), begin(begin_), size(size_) {}
bool empty() const {return size==0;}
bool is_divisible() const {return size>=grainsize;}
quick_sort_range( quick_sort_range& range, split ) : comp(range.comp) {
using std::swap;
RandomAccessIterator array = range.begin;
RandomAccessIterator key0 = range.begin;
size_t m = pseudo_median_of_nine(array, range);
if (m) swap ( array[0], array[m] );
size_t i=0;
size_t j=range.size;
// Partition interval [i+1,j-1] with key *key0.
for(;;) {
__TBB_ASSERT( i<j, NULL );
// Loop must terminate since array[l]==*key0.
do {
--j;
__TBB_ASSERT( i<=j, "bad ordering relation?" );
} while( comp( *key0, array[j] ));
do {
__TBB_ASSERT( i<=j, NULL );
if( i==j ) goto partition;
++i;
} while( comp( array[i],*key0 ));
if( i==j ) goto partition;
swap( array[i], array[j] );
}
partition:
// Put the partition key were it belongs
swap( array[j], *key0 );
// array[l..j) is less or equal to key.
// array(j..r) is greater or equal to key.
// array[j] is equal to key
i=j+1;
begin = array+i;
size = range.size-i;
range.size = j;
}
};
#if __TBB_TASK_GROUP_CONTEXT
//! Body class used to test if elements in a range are presorted
/** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare>
class quick_sort_pretest_body : internal::no_assign {
const Compare &comp;
public:
quick_sort_pretest_body(const Compare &_comp) : comp(_comp) {}
void operator()( const blocked_range<RandomAccessIterator>& range ) const {
task &my_task = task::self();
RandomAccessIterator my_end = range.end();
int i = 0;
for (RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i) {
if ( i%64 == 0 && my_task.is_cancelled() ) break;
// The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
if ( comp( *(k), *(k-1) ) ) {
my_task.cancel_group_execution();
break;
}
}
}
};
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! Body class used to sort elements in a range that is smaller than the grainsize.
/** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare>
struct quick_sort_body {
void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const {
//SerialQuickSort( range.begin, range.size, range.comp );
std::sort( range.begin, range.begin + range.size, range.comp );
}
};
//! Wrapper method to initiate the sort by calling parallel_for.
/** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare>
void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
#if __TBB_TASK_GROUP_CONTEXT
task_group_context my_context;
const int serial_cutoff = 9;
__TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" );
RandomAccessIterator k;
for (k = begin; k != begin + serial_cutoff; ++k ) {
if ( comp( *(k+1), *k ) ) {
goto do_parallel_quick_sort;
}
}
parallel_for( blocked_range<RandomAccessIterator>(k+1, end),
quick_sort_pretest_body<RandomAccessIterator,Compare>(comp),
auto_partitioner(),
my_context);
if (my_context.is_group_execution_cancelled())
do_parallel_quick_sort:
#endif /* __TBB_TASK_GROUP_CONTEXT */
parallel_for( quick_sort_range<RandomAccessIterator,Compare>(begin, end-begin, comp ),
quick_sort_body<RandomAccessIterator,Compare>(),
auto_partitioner() );
}
} // namespace internal
//! @endcond
/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
Requirements on value type \c T of \c RandomAccessIterator for \c parallel_sort:
- \code void swap( T& x, T& y ) \endcode Swaps \c x and \c y
- \code bool Compare::operator()( const T& x, const T& y ) \endcode
True if x comes before y;
**/
/** \name parallel_sort
See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
//@{
//! Sorts the data in [begin,end) using the given comparator
/** The compare function object is used for all comparisons between elements during sorting.
The compare object must define a bool operator() function.
@ingroup algorithms **/
template<typename RandomAccessIterator, typename Compare>
void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp) {
const int min_parallel_size = 500;
if( end > begin ) {
if (end - begin < min_parallel_size) {
std::sort(begin, end, comp);
} else {
internal::parallel_quick_sort(begin, end, comp);
}
}
}
//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
/** @ingroup algorithms **/
template<typename RandomAccessIterator>
inline void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) {
parallel_sort( begin, end, std::less< typename std::iterator_traits<RandomAccessIterator>::value_type >() );
}
//! Sorts the data in the range \c [begin,end) with a default comparator \c std::less<T>
/** @ingroup algorithms **/
template<typename T>
inline void parallel_sort( T * begin, T * end ) {
parallel_sort( begin, end, std::less< T >() );
}
//@}
} // namespace tbb
#endif

@ -0,0 +1,25 @@
##============================================================================
## Copyright (c) Kitware, Inc.
## All rights reserved.
## See LICENSE.txt for details.
## This software is distributed WITHOUT ANY WARRANTY; without even
## the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
## PURPOSE. See the above copyright notice for more information.
##
## Copyright 2014 Sandia Corporation.
## Copyright 2014 UT-Battelle, LLC.
## Copyright 2014 Los Alamos National Security.
##
## Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
## the U.S. Government retains certain rights in this software.
##
## Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
## Laboratory (LANL), the U.S. Government retains certain rights in
## this software.
##============================================================================
set(unit_tests
UnitTestDeviceAdapterTBB.cxx
UnitTestTBBArrayHandleFancy.cxx
)
vtkm_unit_tests(TBB SOURCES ${unit_tests})

@ -0,0 +1,31 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
#include <vtkm/cont/testing/TestingDeviceAdapter.h>
int UnitTestDeviceAdapterTBB(int, char *[])
{
return vtkm::cont::testing::TestingDeviceAdapter
<vtkm::cont::DeviceAdapterTagTBB>::Run();
}

@ -0,0 +1,30 @@
//============================================================================
// Copyright (c) Kitware, Inc.
// All rights reserved.
// See LICENSE.txt for details.
// This software is distributed WITHOUT ANY WARRANTY; without even
// the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
// PURPOSE. See the above copyright notice for more information.
//
// Copyright 2014 Sandia Corporation.
// Copyright 2014 UT-Battelle, LLC.
// Copyright 2014 Los Alamos National Security.
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Under the terms of Contract DE-AC52-06NA25396 with Los Alamos National
// Laboratory (LANL), the U.S. Government retains certain rights in
// this software.
//============================================================================
#define VTKM_DEVICE_ADAPTER VTKM_DEVICE_ADAPTER_ERROR
#include <vtkm/cont/tbb/DeviceAdapterTBB.h>
#include <vtkm/cont/testing/TestingFancyArrayHandles.h>
int UnitTestTBBArrayHandleFancy(int, char *[])
{
return vtkm::cont::testing::TestingFancyArrayHandles
<vtkm::cont::DeviceAdapterTagTBB>::Run();
}

@ -29,3 +29,6 @@ vtkm_worklet_unit_tests( VTKM_DEVICE_ADAPTER_SERIAL )
if (VTKm_ENABLE_CUDA) if (VTKm_ENABLE_CUDA)
vtkm_worklet_unit_tests( VTKM_DEVICE_ADAPTER_CUDA ) vtkm_worklet_unit_tests( VTKM_DEVICE_ADAPTER_CUDA )
endif() endif()
if (VTKm_ENABLE_TBB)
vtkm_worklet_unit_tests( VTKM_DEVICE_ADAPTER_TBB )
endif()