Merge topic 'latest-tbb'
3be3529ff Export tbb interface as vtkm::tbb a8825db59 Disable loading the TBBConfig.cmake file 5eb688da2 Update parallel radix sort for OpenMP 1eea0bee1 Use TBB task_group for radix sort 904e784e8 Remove TBB parallel_sort patch 0390c8b07 Pull FindTBB.cmake from VTK Acked-by: Kitware Robot <kwrobot@kitware.com> Acked-by: Vicente Bolea <vicente.bolea@kitware.com> Merge-request: !2509
This commit is contained in:
commit
2589e5b740
@ -68,6 +68,35 @@
|
||||
# FindTBB helper functions and macros
|
||||
#
|
||||
|
||||
# Use TBBConfig.cmake if possible.
|
||||
|
||||
# Disabling this as it running the TBBConfig.cmake on dragnipur is
|
||||
# causing a CMake error. I don't know if this is an install problem
|
||||
# or an issue with version 2018.0.
|
||||
# set(_tbb_find_quiet)
|
||||
# if (TBB_FIND_QUIETLY)
|
||||
# set(_tbb_find_quiet QUIET)
|
||||
# endif ()
|
||||
# set(_tbb_find_components)
|
||||
# set(_tbb_find_optional_components)
|
||||
# foreach (_tbb_find_component IN LISTS TBB_FIND_COMPONENTS)
|
||||
# if (TBB_FIND_REQUIRED_${_tbb_find_component})
|
||||
# list(APPEND _tbb_find_components "${_tbb_find_component}")
|
||||
# else ()
|
||||
# list(APPEND _tbb_find_optional_components "${_tbb_find_component}")
|
||||
# endif ()
|
||||
# endforeach ()
|
||||
# unset(_tbb_find_component)
|
||||
# find_package(TBB CONFIG ${_tbb_find_quiet}
|
||||
# COMPONENTS ${_tbb_find_components}
|
||||
# OPTIONAL_COMPONENTS ${_tbb_find_optional_components})
|
||||
# unset(_tbb_find_quiet)
|
||||
# unset(_tbb_find_components)
|
||||
# unset(_tbb_find_optional_components)
|
||||
# if (TBB_FOUND)
|
||||
# return ()
|
||||
# endif ()
|
||||
|
||||
#====================================================
|
||||
# Fix the library path in case it is a linker script
|
||||
#====================================================
|
||||
@ -232,7 +261,7 @@ if (WIN32 AND MSVC)
|
||||
set(COMPILER_PREFIX "vc11")
|
||||
elseif(MSVC_VERSION EQUAL 1800)
|
||||
set(COMPILER_PREFIX "vc12")
|
||||
elseif(MSVC_VERSION EQUAL 1900)
|
||||
elseif(MSVC_VERSION GREATER_EQUAL 1900)
|
||||
set(COMPILER_PREFIX "vc14")
|
||||
endif ()
|
||||
|
||||
@ -277,6 +306,9 @@ endif ()
|
||||
# check compiler ABI
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set(COMPILER_PREFIX)
|
||||
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8)
|
||||
list(APPEND COMPILER_PREFIX "gcc4.8")
|
||||
endif()
|
||||
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7)
|
||||
list(APPEND COMPILER_PREFIX "gcc4.7")
|
||||
endif()
|
||||
@ -286,6 +318,9 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
list(APPEND COMPILER_PREFIX "gcc4.1")
|
||||
elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
set(COMPILER_PREFIX)
|
||||
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) # Complete guess
|
||||
list(APPEND COMPILER_PREFIX "gcc4.8")
|
||||
endif()
|
||||
if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.6)
|
||||
list(APPEND COMPILER_PREFIX "gcc4.7")
|
||||
endif()
|
||||
@ -392,12 +427,18 @@ findpkg_finish(TBB_MALLOC_PROXY tbbmalloc_proxy)
|
||||
#=============================================================================
|
||||
#parse all the version numbers from tbb
|
||||
if(NOT TBB_VERSION)
|
||||
|
||||
#only read the start of the file
|
||||
file(STRINGS
|
||||
if (EXISTS "${TBB_INCLUDE_DIR}/oneapi/tbb/version.h")
|
||||
file(STRINGS
|
||||
"${TBB_INCLUDE_DIR}/oneapi/tbb/version.h"
|
||||
TBB_VERSION_CONTENTS
|
||||
REGEX "VERSION")
|
||||
else()
|
||||
#only read the start of the file
|
||||
file(STRINGS
|
||||
"${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h"
|
||||
TBB_VERSION_CONTENTS
|
||||
REGEX "VERSION")
|
||||
endif()
|
||||
|
||||
string(REGEX REPLACE
|
||||
".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1"
|
||||
|
@ -46,7 +46,7 @@ if(VTKm_ENABLE_TBB AND NOT TARGET vtkm::tbb)
|
||||
add_library(vtkmTBB INTERFACE)
|
||||
add_library(vtkm::tbb ALIAS vtkmTBB)
|
||||
target_link_libraries(vtkmTBB INTERFACE TBB::tbb)
|
||||
set_target_properties(vtkmTBB PROPERTIES EXPORT_NAME tbb)
|
||||
set_target_properties(vtkmTBB PROPERTIES EXPORT_NAME vtkm::tbb)
|
||||
install(TARGETS vtkmTBB EXPORT ${VTKm_EXPORT_NAME})
|
||||
endif()
|
||||
|
||||
|
@ -485,7 +485,7 @@ struct RunTask
|
||||
}
|
||||
|
||||
template <typename ThreaderData = void*>
|
||||
void operator()(ThreaderData tData = nullptr)
|
||||
void operator()(ThreaderData tData = nullptr) const
|
||||
{
|
||||
size_t num_nodes_at_current_height = (size_t)pow(2, (double)binary_tree_height_);
|
||||
if (num_threads_ <= num_nodes_at_current_height)
|
||||
|
@ -44,7 +44,7 @@ struct RadixThreaderOpenMP
|
||||
}
|
||||
|
||||
template <typename TaskType>
|
||||
void RunParentTask(TaskType task)
|
||||
void RunParentTask(TaskType task) const
|
||||
{
|
||||
assert(!omp_in_parallel());
|
||||
#pragma omp parallel default(none) shared(task)
|
||||
@ -57,7 +57,7 @@ struct RadixThreaderOpenMP
|
||||
}
|
||||
|
||||
template <typename TaskType, typename ThreadData>
|
||||
void RunChildTasks(ThreadData, TaskType left, TaskType right)
|
||||
void RunChildTasks(ThreadData, TaskType left, TaskType right) const
|
||||
{
|
||||
assert(omp_in_parallel());
|
||||
#pragma omp task default(none) firstprivate(right)
|
||||
|
@ -25,7 +25,6 @@ endif()
|
||||
|
||||
vtkm_declare_headers(${headers}
|
||||
ParallelSortTBB.hxx
|
||||
parallel_sort.h
|
||||
)
|
||||
|
||||
#These sources need to always be built
|
||||
|
@ -38,15 +38,6 @@ VTKM_THIRDPARTY_PRE_INCLUDE
|
||||
// correct settings so that we don't clobber any existing function
|
||||
#include <vtkm/internal/Windows.h>
|
||||
|
||||
#include <tbb/tbb_stddef.h>
|
||||
#if (TBB_VERSION_MAJOR == 4) && (TBB_VERSION_MINOR == 2)
|
||||
//we provide an patched implementation of tbb parallel_sort
|
||||
//that fixes ADL for std::swap. This patch has been submitted to Intel
|
||||
//and is fixed in TBB 4.2 update 2.
|
||||
#include <vtkm/cont/tbb/internal/parallel_sort.h>
|
||||
#else
|
||||
#include <tbb/parallel_sort.h>
|
||||
#endif
|
||||
|
||||
#include <numeric>
|
||||
#include <tbb/blocked_range.h>
|
||||
@ -54,6 +45,7 @@ VTKM_THIRDPARTY_PRE_INCLUDE
|
||||
#include <tbb/parallel_for.h>
|
||||
#include <tbb/parallel_reduce.h>
|
||||
#include <tbb/parallel_scan.h>
|
||||
#include <tbb/parallel_sort.h>
|
||||
#include <tbb/partitioner.h>
|
||||
#include <tbb/tick_count.h>
|
||||
|
||||
|
@ -53,7 +53,7 @@
|
||||
// correct settings so that we don't clobber any existing function
|
||||
#include <vtkm/internal/Windows.h>
|
||||
|
||||
#include <tbb/task.h>
|
||||
#include <tbb/tbb.h>
|
||||
#include <thread>
|
||||
|
||||
#if defined(VTKM_MSVC)
|
||||
@ -71,6 +71,7 @@ namespace sort
|
||||
|
||||
const size_t MAX_CORES = std::thread::hardware_concurrency();
|
||||
|
||||
#if TBB_VERSION_MAJOR < 2020
|
||||
// Simple TBB task wrapper around a generic functor.
|
||||
template <typename FunctorType>
|
||||
struct TaskWrapper : public ::tbb::task
|
||||
@ -94,7 +95,7 @@ struct RadixThreaderTBB
|
||||
size_t GetAvailableCores() const { return MAX_CORES; }
|
||||
|
||||
template <typename TaskType>
|
||||
void RunParentTask(TaskType task)
|
||||
void RunParentTask(TaskType task) const
|
||||
{
|
||||
using Task = TaskWrapper<TaskType>;
|
||||
Task& root = *new (::tbb::task::allocate_root()) Task(task);
|
||||
@ -102,7 +103,7 @@ struct RadixThreaderTBB
|
||||
}
|
||||
|
||||
template <typename TaskType>
|
||||
void RunChildTasks(TaskWrapper<TaskType>* wrapper, TaskType left, TaskType right)
|
||||
void RunChildTasks(TaskWrapper<TaskType>* wrapper, TaskType left, TaskType right) const
|
||||
{
|
||||
using Task = TaskWrapper<TaskType>;
|
||||
::tbb::empty_task& p = *new (wrapper->allocate_continuation())::tbb::empty_task();
|
||||
@ -115,6 +116,34 @@ struct RadixThreaderTBB
|
||||
}
|
||||
};
|
||||
|
||||
#else // TBB_VERSION_MAJOR >= 2020
|
||||
|
||||
// In TBB version 2020, the task class was deprecated. Instead, we use the simpler task_group.
|
||||
|
||||
struct RadixThreaderTBB
|
||||
{
|
||||
std::shared_ptr<::tbb::task_group> TaskGroup =
|
||||
std::shared_ptr<::tbb::task_group>(new ::tbb::task_group);
|
||||
|
||||
size_t GetAvailableCores() const { return MAX_CORES; }
|
||||
|
||||
template <typename TaskType>
|
||||
void RunParentTask(TaskType task) const
|
||||
{
|
||||
this->TaskGroup->run_and_wait(task);
|
||||
// All tasks should be complete at this point.
|
||||
}
|
||||
|
||||
template <typename TaskType>
|
||||
void RunChildTasks(void*, TaskType left, TaskType right) const
|
||||
{
|
||||
this->TaskGroup->run(left);
|
||||
this->TaskGroup->run(right);
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
VTKM_INSTANTIATE_RADIX_SORT_FOR_THREADER(RadixThreaderTBB)
|
||||
}
|
||||
}
|
||||
|
@ -1,273 +0,0 @@
|
||||
/*
|
||||
Copyright 2005-2013 Intel Corporation. All Rights Reserved.
|
||||
|
||||
This file is part of Threading Building Blocks.
|
||||
|
||||
Threading Building Blocks is free software; you can redistribute it
|
||||
and/or modify it under the terms of the GNU General Public License
|
||||
version 2 as published by the Free Software Foundation.
|
||||
|
||||
Threading Building Blocks is distributed in the hope that it will be
|
||||
useful, but WITHOUT ANY WARRANTY; without even the implied warranty
|
||||
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with Threading Building Blocks; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
As a special exception, you may use this file as part of a free software
|
||||
library without restriction. Specifically, if other files instantiate
|
||||
templates or use macros or inline functions from this file, or you compile
|
||||
this file and link it with other files to produce an executable, this
|
||||
file does not by itself cause the resulting executable to be covered by
|
||||
the GNU General Public License. This exception does not however
|
||||
invalidate any other reasons why the executable file might be covered by
|
||||
the GNU General Public License.
|
||||
*/
|
||||
|
||||
#ifndef __TBB_parallel_sort_H
|
||||
#define __TBB_parallel_sort_H
|
||||
|
||||
#include <tbb/blocked_range.h>
|
||||
#include <tbb/parallel_for.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
|
||||
namespace tbb
|
||||
{
|
||||
|
||||
//! @cond INTERNAL
|
||||
namespace internal
|
||||
{
|
||||
|
||||
//! Range used in quicksort to split elements into subranges based on a value.
|
||||
/** The split operation selects a splitter and places all elements less than or equal
|
||||
to the value in the first range and the remaining elements in the second range.
|
||||
@ingroup algorithms */
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
class quick_sort_range : private no_assign
|
||||
{
|
||||
|
||||
inline size_t median_of_three(const RandomAccessIterator& array,
|
||||
size_t l,
|
||||
size_t m,
|
||||
size_t r) const
|
||||
{
|
||||
return comp(array[l], array[m])
|
||||
? (comp(array[m], array[r]) ? m : (comp(array[l], array[r]) ? r : l))
|
||||
: (comp(array[r], array[m]) ? m : (comp(array[r], array[l]) ? r : l));
|
||||
}
|
||||
|
||||
inline size_t pseudo_median_of_nine(const RandomAccessIterator& array,
|
||||
const quick_sort_range& range) const
|
||||
{
|
||||
size_t offset = range.size / 8u;
|
||||
return median_of_three(array,
|
||||
median_of_three(array, 0, offset, offset * 2),
|
||||
median_of_three(array, offset * 3, offset * 4, offset * 5),
|
||||
median_of_three(array, offset * 6, offset * 7, range.size - 1));
|
||||
}
|
||||
|
||||
public:
|
||||
static const size_t grainsize = 500;
|
||||
const Compare& comp;
|
||||
RandomAccessIterator begin;
|
||||
size_t size;
|
||||
|
||||
quick_sort_range(RandomAccessIterator begin_, size_t size_, const Compare& comp_)
|
||||
: comp(comp_)
|
||||
, begin(begin_)
|
||||
, size(size_)
|
||||
{
|
||||
}
|
||||
|
||||
bool empty() const { return size == 0; }
|
||||
bool is_divisible() const { return size >= grainsize; }
|
||||
|
||||
quick_sort_range(quick_sort_range& range, split)
|
||||
: comp(range.comp)
|
||||
{
|
||||
using std::swap;
|
||||
RandomAccessIterator array = range.begin;
|
||||
RandomAccessIterator key0 = range.begin;
|
||||
size_t m = pseudo_median_of_nine(array, range);
|
||||
if (m)
|
||||
swap(array[0], array[m]);
|
||||
|
||||
size_t i = 0;
|
||||
size_t j = range.size;
|
||||
// Partition interval [i+1,j-1] with key *key0.
|
||||
for (;;)
|
||||
{
|
||||
__TBB_ASSERT(i < j, nullptr);
|
||||
// Loop must terminate since array[l]==*key0.
|
||||
do
|
||||
{
|
||||
--j;
|
||||
__TBB_ASSERT(i <= j, "bad ordering relation?");
|
||||
} while (comp(*key0, array[j]));
|
||||
do
|
||||
{
|
||||
__TBB_ASSERT(i <= j, nullptr);
|
||||
if (i == j)
|
||||
goto partition;
|
||||
++i;
|
||||
} while (comp(array[i], *key0));
|
||||
if (i == j)
|
||||
goto partition;
|
||||
swap(array[i], array[j]);
|
||||
}
|
||||
partition:
|
||||
// Put the partition key were it belongs
|
||||
swap(array[j], *key0);
|
||||
// array[l..j) is less or equal to key.
|
||||
// array(j..r) is greater or equal to key.
|
||||
// array[j] is equal to key
|
||||
i = j + 1;
|
||||
begin = array + i;
|
||||
size = range.size - i;
|
||||
range.size = j;
|
||||
}
|
||||
};
|
||||
|
||||
#if __TBB_TASK_GROUP_CONTEXT
|
||||
//! Body class used to test if elements in a range are presorted
|
||||
/** @ingroup algorithms */
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
class quick_sort_pretest_body : internal::no_assign
|
||||
{
|
||||
const Compare& comp;
|
||||
|
||||
public:
|
||||
quick_sort_pretest_body(const Compare& _comp)
|
||||
: comp(_comp)
|
||||
{
|
||||
}
|
||||
|
||||
void operator()(const blocked_range<RandomAccessIterator>& range) const
|
||||
{
|
||||
task& my_task = task::self();
|
||||
RandomAccessIterator my_end = range.end();
|
||||
|
||||
int i = 0;
|
||||
for (RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i)
|
||||
{
|
||||
if (i % 64 == 0 && my_task.is_cancelled())
|
||||
break;
|
||||
|
||||
// The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
|
||||
if (comp(*(k), *(k - 1)))
|
||||
{
|
||||
my_task.cancel_group_execution();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
#endif /* __TBB_TASK_GROUP_CONTEXT */
|
||||
|
||||
//! Body class used to sort elements in a range that is smaller than the grainsize.
|
||||
/** @ingroup algorithms */
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
struct quick_sort_body
|
||||
{
|
||||
void operator()(const quick_sort_range<RandomAccessIterator, Compare>& range) const
|
||||
{
|
||||
//SerialQuickSort( range.begin, range.size, range.comp );
|
||||
std::sort(range.begin, range.begin + range.size, range.comp);
|
||||
}
|
||||
};
|
||||
|
||||
//! Wrapper method to initiate the sort by calling parallel_for.
|
||||
/** @ingroup algorithms */
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
void parallel_quick_sort(RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp)
|
||||
{
|
||||
#if __TBB_TASK_GROUP_CONTEXT
|
||||
task_group_context my_context;
|
||||
const int serial_cutoff = 9;
|
||||
|
||||
__TBB_ASSERT(begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?");
|
||||
RandomAccessIterator k;
|
||||
for (k = begin; k != begin + serial_cutoff; ++k)
|
||||
{
|
||||
if (comp(*(k + 1), *k))
|
||||
{
|
||||
goto do_parallel_quick_sort;
|
||||
}
|
||||
}
|
||||
|
||||
parallel_for(blocked_range<RandomAccessIterator>(k + 1, end),
|
||||
quick_sort_pretest_body<RandomAccessIterator, Compare>(comp),
|
||||
auto_partitioner(),
|
||||
my_context);
|
||||
|
||||
if (my_context.is_group_execution_cancelled())
|
||||
do_parallel_quick_sort:
|
||||
#endif /* __TBB_TASK_GROUP_CONTEXT */
|
||||
parallel_for(quick_sort_range<RandomAccessIterator, Compare>(begin, end - begin, comp),
|
||||
quick_sort_body<RandomAccessIterator, Compare>(),
|
||||
auto_partitioner());
|
||||
}
|
||||
|
||||
} // namespace internal
|
||||
//! @endcond
|
||||
|
||||
//! @cond INTERNAL
|
||||
/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
|
||||
Requirements on value type \c T of \c RandomAccessIterator for \c parallel_sort:
|
||||
- \code void swap( T& x, T& y ) \endcode Swaps \c x and \c y
|
||||
- \code bool Compare::operator()( const T& x, const T& y ) \endcode
|
||||
True if x comes before y;
|
||||
**/
|
||||
|
||||
/** \name parallel_sort
|
||||
See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
|
||||
//@{
|
||||
|
||||
//! Sorts the data in [begin,end) using the given comparator
|
||||
/** The compare function object is used for all comparisons between elements during sorting.
|
||||
The compare object must define a bool operator() function.
|
||||
@ingroup algorithms **/
|
||||
//! @endcond
|
||||
template <typename RandomAccessIterator, typename Compare>
|
||||
void parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp)
|
||||
{
|
||||
const int min_parallel_size = 500;
|
||||
if (end > begin)
|
||||
{
|
||||
if (end - begin < min_parallel_size)
|
||||
{
|
||||
std::sort(begin, end, comp);
|
||||
}
|
||||
else
|
||||
{
|
||||
internal::parallel_quick_sort(begin, end, comp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
|
||||
/** @ingroup algorithms **/
|
||||
template <typename RandomAccessIterator>
|
||||
inline void parallel_sort(RandomAccessIterator begin, RandomAccessIterator end)
|
||||
{
|
||||
parallel_sort(
|
||||
begin, end, std::less<typename std::iterator_traits<RandomAccessIterator>::value_type>());
|
||||
}
|
||||
|
||||
//! Sorts the data in the range \c [begin,end) with a default comparator \c std::less<T>
|
||||
/** @ingroup algorithms **/
|
||||
template <typename T>
|
||||
inline void parallel_sort(T* begin, T* end)
|
||||
{
|
||||
parallel_sort(begin, end, std::less<T>());
|
||||
}
|
||||
//@}
|
||||
|
||||
} // namespace tbb
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user